In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# 필요한 패키지 import
import pandas as pd
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [None]:
# 데이터 불러오기
import pandas as pd
data = pd.read_csv('/content/drive/MyDrive/SW 전문인재양성 2기/개인프로젝트/Open ai/twcs.csv')

In [None]:
data.head(5)

#### 데이터 전처리

In [None]:
# 불용어
nltk.download('punkt')
nltk.download('stopwords')

In [None]:
def remove_stopwords(text):
    tokens = word_tokenize(text.lower())
    stop_words = set(stopwords.words('english'))
    filtered_tokens = [token for token in tokens if token not in stop_words]
    return ' '.join(filtered_tokens)

In [None]:
data['text'] = data['text'].apply(remove_stopwords)

#### 감정 분석

In [None]:
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
nltk.download('vader_lexicon')
from nltk.sentiment.vader import SentimentIntensityAnalyzer

In [None]:
# VADER SentimentIntensityAnalyzer 객체 생성
analyzer = SentimentIntensityAnalyzer()

In [None]:
# 감정 점수 계산 함수 정의
def calculate_sentiment_score(text):
    # VADER SentimentIntensityAnalyzer를 사용하여 텍스트의 감정 점수 계산
    sentiment_scores = analyzer.polarity_scores(text)
    return sentiment_scores


In [None]:
# 감정 점수 계산
data['sentiment_scores'] = data['text'].apply(calculate_sentiment_score)

In [None]:
# 감정 레이블 생성
data['sentiment'] = data['sentiment_scores'].apply(lambda x: 1 if x['compound'] >= 0 else 0)

In [None]:
positive_scores = data['sentiment_scores'].apply(lambda x: x['pos'] if isinstance(x, dict) else None)
negative_scores = data['sentiment_scores'].apply(lambda x: x['neg'] if isinstance(x, dict) else None)
neutral_scores = data['sentiment_scores'].apply(lambda x: x['neu'] if isinstance(x, dict) else None)

# 이후 시각화 코드를 추가하여 감정 점수를 시각적으로 확인하거나 출력합니다.

#### 감정 분석 시각화

In [None]:
positive_scores = data['sentiment_scores'].apply(lambda x: x['pos'] if isinstance(x, dict) else None)
negative_scores = data['sentiment_scores'].apply(lambda x: x['neg'] if isinstance(x, dict) else None)
neutral_scores = data['sentiment_scores'].apply(lambda x: x['neu'] if isinstance(x, dict) else None)

In [None]:
import matplotlib.pyplot as plt
fig, ax = plt.subplots(figsize=(8, 6))
index = range(len(data))
bar_width = 0.3

ax.bar(index, positive_scores, bar_width, label='Positive', color='g')
ax.bar(index, negative_scores, bar_width, label='Negative', color='r', bottom=positive_scores)
ax.bar(index, neutral_scores, bar_width, label='Neutral', color='b', bottom=[sum(x) for x in zip(positive_scores, negative_scores)])

ax.set_xlabel('문장 인덱스')
ax.set_ylabel('감정 점수')
ax.set_title('감정 분석 결과')
ax.set_xticks(index)
ax.set_xticklabels(data.index)
ax.legend()

plt.tight_layout()
plt.show()


#### 성능 평가

In [None]:
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score

# 감정 분석 결과 평가 함수 정의
def evaluate_sentiment_analysis(y_true, y_pred):
    # Confusion Matrix
    cm = confusion_matrix(y_true, y_pred)
    # 정확도
    accuracy = accuracy_score(y_true, y_pred)
    # 정밀도
    precision = precision_score(y_true, y_pred)
    # 재현율
    recall = recall_score(y_true, y_pred)
    # F1 스코어
    f1 = f1_score(y_true, y_pred)

    return cm, accuracy, precision, recall, f1

In [None]:
import random

# 임의의 y_true와 y_pred 생성 (0: 부정, 1: 긍정)
y_true = [random.choice([0, 1]) for _ in range(len(data))]
y_pred = [random.choice([0, 1]) for _ in range(len(data))]

In [None]:
cm, accuracy, precision, recall, f1 = evaluate_sentiment_analysis(y_true, y_pred)

In [None]:
# 평가 결과 출력
print("Confusion Matrix:")
print(cm)
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)

#### 간단한 테스트

In [None]:
test_sentence = "Sorry. We will improve our servce"
sentiment_scores, sentiment = analyze_sentiment(test_sentence)
# 결과 출력
print("입력 문장:", test_sentence)
print("감정 점수:", sentiment_scores)
print("분류 결과:", sentiment)


In [None]:
!pip install matplotlib
import matplotlib.pyplot as plt
from nltk.sentiment.vader import SentimentIntensityAnalyzer