In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
import torch
import numpy as np

# 데이터 로드
data = pd.read_csv('finance_data.csv')

# 데이터 전처리
X = data['kor_sentence'].tolist()
y = data['labels'].map({'negative':0, 'neutral':1,'positive':2}).values

# 데이터 분할
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# BERT 토크나이저 및 모델 로드
tokenizer = BertTokenizer.from_pretrained('klue/bert-base')
model = BertForSequenceClassification.from_pretrained('klue/bert-base', num_labels=3)

# 데이터셋 클래스 정의
class NewsDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx], dtype=torch.long)
        return item

    def __len__(self):
        return len(self.labels)

# 데이터셋 생성
train_encodings = tokenizer(X_train, truncation=True, padding=True, max_length=512)
test_encodings = tokenizer(X_test, truncation=True, padding=True, max_length=512)

train_dataset = NewsDataset(train_encodings, y_train)
test_dataset = NewsDataset(test_encodings, y_test)

# 학습 설정
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=3,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=64,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir='./logs',
)

# 트레이너 초기화 및 학습
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset
)

trainer.train()

# 모델 평가
predictions = trainer.predict(test_dataset)
preds = np.argmax(predictions.predictions, axis=-1)

print(classification_report(y_test, preds, target_names=['negative', 'neutral', 'positive']))

# 새로운 뉴스 분석 함수
def analyze_sentiment(news_text):
    # Check if CUDA is available and move model to GPU if possible
    device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
    model.to(device) # Move the model to the selected device

    inputs = tokenizer(news_text, return_tensors="pt", truncation=True, padding=True, max_length=512)
    # Move input tensors to the same device as the model
    inputs = {k: v.to(device) for k, v in inputs.items()}

    outputs = model(**inputs)
    probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
    pred = torch.argmax(probs, dim=-1).item()
    sentiment = ['negative', 'neutral', 'positive'][pred]
    confidence = probs[0][pred].item()
    return sentiment, confidence

# 사용 예시
news = "삼성전자의 실적이 예상을 뛰어넘어 주가가 상승했습니다."
sentiment, confidence = analyze_sentiment(news)
print(f"뉴스: {news}")
print(f"감성: {sentiment}")
print(f"신뢰도: {confidence:.2f}")