In [1]:
import pandas as pd
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch

# 파일 경로 설정 (로컬 경로)
file_paths = {
    "bangbang": r"C:\fintech_service\final_project\data\분석\뱅뱅막국수_combined_reviews_분석_재분석.csv",
    "pasccucci": r"C:\fintech_service\final_project\data\분석\파스쿠찌_combined_reviews_분석_재분석.csv",
    "hangangsu": r"C:\fintech_service\final_project\data\분석\한강수_combined_reviews_분석_재분석.csv"
}

# 각 파일을 개별적으로 불러오기
data_bangbang = pd.read_csv(file_paths["bangbang"])
data_pasccucci = pd.read_csv(file_paths["pasccucci"])
data_hangangsu = pd.read_csv(file_paths["hangangsu"])

# 'Review' 열과 'sentiment' 열을 선택하여 텍스트와 라벨 설정
texts_bangbang = data_bangbang['Review']
labels_bangbang = data_bangbang['sentiment']

texts_pasccucci = data_pasccucci['Review']
labels_pasccucci = data_pasccucci['sentiment']

texts_hangangsu = data_hangangsu['Review']
labels_hangangsu = data_hangangsu['sentiment']

# 모델과 토크나이저 불러오기
tokenizer = AutoTokenizer.from_pretrained("monologg/kobigbird-bert-base")
model = AutoModelForSequenceClassification.from_pretrained("monologg/kobigbird-bert-base", num_labels=2)

# GPU 사용 설정
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
model = model.to(device)

# 예측 함수 정의
def predict_sentiment(texts, tokenizer, model, device):
    model = model.eval()  # 모델을 평가 모드로 전환
    predictions = []
    
    with torch.no_grad():
        for text in texts:
            # 텍스트 토크나이즈
            inputs = tokenizer(text, return_tensors="pt", truncation=True, padding="max_length", max_length=512).to(device)
            
            # 예측
            outputs = model(**inputs)
            logits = outputs.logits
            prediction = torch.argmax(logits, dim=1).item()
            predictions.append("Positive" if prediction == 1 else "Negative")
    
    return predictions

# 각 데이터셋에 대해 예측 수행
bangbang_predictions = predict_sentiment(texts_bangbang.tolist(), tokenizer, model, device)
pasccucci_predictions = predict_sentiment(texts_pasccucci.tolist(), tokenizer, model, device)
hangangsu_predictions = predict_sentiment(texts_hangangsu.tolist(), tokenizer, model, device)

# 예측 결과 출력 (예시로 첫 5개 결과 출력)
print("뱅뱅막국수 감성 분석 결과:", bangbang_predictions[:5])
print("파스쿠찌 감성 분석 결과:", pasccucci_predictions[:5])
print("한강수 감성 분석 결과:", hangangsu_predictions[:5])


  from .autonotebook import tqdm as notebook_tqdm
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Some weights of BigBirdForSequenceClassification were not initialized from the model checkpoint at monologg/kobigbird-bert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Attention type 'block_sparse' is not possible if sequence_length: 512 <= num global tokens: 2 * config.block_size + min. num sliding tokens: 3 * config.block_size + config.num_random_blocks * config.block_size + additional buffer: config.num_random_blocks * config.block_size = 704 with config.block_size = 64, c

뱅뱅막국수 감성 분석 결과: ['Positive', 'Positive', 'Positive', 'Positive', 'Positive']
파스쿠찌 감성 분석 결과: ['Positive', 'Positive', 'Positive', 'Positive', 'Negative']
한강수 감성 분석 결과: ['Positive', 'Positive', 'Negative', 'Positive', 'Positive']


In [None]:
import pandas as pd
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch

# 파일 경로 설정 (로컬 경로)
file_paths = {
    "bangbang": r"C:\fintech_service\final_project\data\분석\뱅뱅막국수_combined_reviews_분석_재분석.csv",
    "pasccucci": r"C:\fintech_service\final_project\data\분석\파스쿠찌_combined_reviews_분석_재분석.csv",
    "hangangsu": r"C:\fintech_service\final_project\data\분석\한강수_combined_reviews_분석_재분석.csv"
}

# 각 파일을 개별적으로 불러오기
data_bangbang = pd.read_csv(file_paths["bangbang"])
data_pasccucci = pd.read_csv(file_paths["pasccucci"])
data_hangangsu = pd.read_csv(file_paths["hangangsu"])

# 'Review' 열과 'sentiment' 열을 선택하여 텍스트와 라벨 설정
texts_bangbang = data_bangbang['Review']
labels_bangbang = data_bangbang['sentiment']

texts_pasccucci = data_pasccucci['Review']
labels_pasccucci = data_pasccucci['sentiment']

texts_hangangsu = data_hangangsu['Review']
labels_hangangsu = data_hangangsu['sentiment']

# 모델과 토크나이저 불러오기
tokenizer = AutoTokenizer.from_pretrained("monologg/kobigbird-bert-base")
model = AutoModelForSequenceClassification.from_pretrained("monologg/kobigbird-bert-base", num_labels=2)

# GPU 사용 설정
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
model = model.to(device)

# 예측 함수 정의
def predict_sentiment(texts, tokenizer, model, device):
    model = model.eval()  # 모델을 평가 모드로 전환
    predictions = []
    
    with torch.no_grad():
        for text in texts:
            # 텍스트 토크나이즈
            inputs = tokenizer(text, return_tensors="pt", truncation=True, padding="max_length", max_length=512).to(device)
            
            # 예측
            outputs = model(**inputs)
            logits = outputs.logits
            prediction = torch.argmax(logits, dim=1).item()
            predictions.append("Positive" if prediction == 1 else "Negative")
    
    return predictions

# 각 데이터셋에 대해 예측 수행
bangbang_predictions = predict_sentiment(texts_bangbang.tolist(), tokenizer, model, device)
pasccucci_predictions = predict_sentiment(texts_pasccucci.tolist(), tokenizer, model, device)
hangangsu_predictions = predict_sentiment(texts_hangangsu.tolist(), tokenizer, model, device)

# 예측 결과를 데이터프레임에 추가
data_bangbang['Predicted_Sentiment'] = bangbang_predictions
data_pasccucci['Predicted_Sentiment'] = pasccucci_predictions
data_hangangsu['Predicted_Sentiment'] = hangangsu_predictions

# 예측 결과를 포함한 CSV 파일로 저장
data_bangbang.to_csv(r"C:\fintech_service\final_project\data\분석\뱅뱅막국수_감성_분석_결과.csv", index=False)
data_pasccucci.to_csv(r"C:\fintech_service\final_project\data\분석\파스쿠찌_감성_분석_결과.csv", index=False)
data_hangangsu.to_csv(r"C:\fintech_service\final_project\data\분석\한강수_감성_분석_결과.csv", index=False)

print("감성 분석 결과가 저장되었습니다.")
