<a href="https://colab.research.google.com/github/boxty123/SoThat-NLP/blob/main/Konlpy_stopwords.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install konlpy
!pip install pandas
!pip install numpy

import re
import pandas as pd
from konlpy.tag import Okt
from collections import Counter

class TextProcessor:
    def __init__(self, replies, stopwords_file):
        """
        TextProcessor 클래스 초기화
        :param replies: 문장 리스트
        :param stopwords_file: 불용어 파일 경로
        """
        self.data = pd.DataFrame({'reply': replies})
        self.okt = Okt()
        self.stopwords = self.load_stopwords(stopwords_file)

    def load_stopwords(self, file_path):
        """불용어 리스트 불러오기"""
        try:
            with open(file_path, 'r', encoding='utf-8-sig') as f:
                return set(f.read().split(","))
        except Exception as e:
            print(f"불용어 파일을 불러오는 중 오류 발생: {e}")
            return set()

    def extract_word(self, text):
        """한글만 남기기"""
        hangul = re.compile('[^ㄱ-ㅎㅏ-ㅣ가-힣 ]')
        return hangul.sub('', text)

    def process_text(self, text):
        """형태소 분석 및 한 글자 단어 제거"""
        words = self.okt.morphs(text, stem=True)  # 원형 복원
        return [w for w in words if len(w) > 1]  # 한 글자 제거

    def remove_stopwords(self, words):
        """불용어 제거"""
        return [word for word in words if word not in self.stopwords]

    def process_all(self):
        """전체 데이터 처리"""
        self.data['reply'] = self.data['reply'].apply(self.extract_word)  # 한글만 남기기
        self.data['reply'] = self.data['reply'].apply(self.process_text)  # 형태소 분석
        self.data['reply'] = self.data['reply'].apply(self.remove_stopwords)  # 불용어 제거
        self.data['reply'] = self.data['reply'].apply(lambda x: " ".join(x))  # 리스트 → 문자열 변환

    def get_frequent_words(self):
        """단어 빈도수 계산"""
        all_words = " ".join(self.data['reply']).split()
        return Counter(all_words).most_common()


Collecting konlpy
  Downloading konlpy-0.6.0-py2.py3-none-any.whl.metadata (1.9 kB)
Collecting JPype1>=0.7.0 (from konlpy)
  Downloading jpype1-1.5.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.9 kB)
Downloading konlpy-0.6.0-py2.py3-none-any.whl (19.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m19.4/19.4 MB[0m [31m15.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading jpype1-1.5.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (494 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m494.1/494.1 kB[0m [31m8.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: JPype1, konlpy
Successfully installed JPype1-1.5.2 konlpy-0.6.0
