In [1]:
import os
from openai import OpenAI
from dotenv import load_dotenv

load_dotenv()

client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))

In [2]:
review = "이 영화는 정말 재미있고 감동적이에요!"

# Sentimental Analysis with OpenAI API
response = client.chat.completions.create(
    model="gpt-4o-mini",
    messages=[
        {"role": "system",
         "content": "Analyze the sentiment of the following movie review and categorize it strictly as 1 for positive or 0 for negative without providing any explanation or reasoning."},
        {"role": "user", "content": "핵노잼"},
        {"role": "assistant", "content": "0"},
        {"role": "user", "content": "개꿀잼"},
        {"role": "assistant", "content": "1"},
        {"role": "user", "content": review}
    ],
    max_tokens=60
)

# 예측 결과 출력
message = response.choices[0].message.content.strip()
print(f"Review: {review}")
print(f"Sentiment: {message}")

Review: 이 영화는 정말 재미있고 감동적이에요!
Sentiment: 1


In [3]:
import json
import pandas as pd
import urllib.request

# 데이터 다운로드(Naver sentiment movie corpus v1.0)
urllib.request.urlretrieve("https://raw.githubusercontent.com/e9t/nsmc/master/ratings_train.txt",
                           filename="ratings_train.txt")
urllib.request.urlretrieve("https://raw.githubusercontent.com/e9t/nsmc/master/ratings_test.txt",
                           filename="ratings_test.txt")

('ratings_test.txt', <http.client.HTTPMessage at 0x155480ca0>)

In [4]:
# data load
train_data = pd.read_table('ratings_train.txt')
test_data = pd.read_table('ratings_test.txt')

# Few-shot example test
few_shot_samples = train_data.sample(5, random_state=42)

# data check
few_shot_samples

Unnamed: 0,id,document,label
59770,8932939,수OO만에 다시보네여,1
21362,3681731,일방적인 영화다. 관객 좀 고려해주시길,0
127324,9847174,세상을 초월하는 한 사람의 선한 마음,1
140509,8506899,멍하다.. 여러생각이 겹치는데 오랜만에 영화 보고 이런 느낌 느껴본다,1
144297,9991656,"우와 별 반개도 아까운판에 밑에 CJ 알바생들 쩐다.. 전부 만점이야 ㅎㅎㅎ..,....",0


In [5]:
# Few-shot prompt
few_shot_examples = []
for idx, row in few_shot_samples.iterrows():
    example = [
        {"role": "user", "content": row['document']},
        {"role": "assistant", "content": str(row['label'])}
    ]
    few_shot_examples.extend(example)

In [6]:
few_shot_examples

[{'role': 'user', 'content': '수OO만에 다시보네여'},
 {'role': 'assistant', 'content': '1'},
 {'role': 'user', 'content': '일방적인 영화다. 관객 좀 고려해주시길'},
 {'role': 'assistant', 'content': '0'},
 {'role': 'user', 'content': '세상을 초월하는 한 사람의 선한 마음'},
 {'role': 'assistant', 'content': '1'},
 {'role': 'user', 'content': '멍하다.. 여러생각이 겹치는데 오랜만에 영화 보고 이런 느낌 느껴본다'},
 {'role': 'assistant', 'content': '1'},
 {'role': 'user',
  'content': '우와 별 반개도 아까운판에 밑에 CJ 알바생들 쩐다.. 전부 만점이야 ㅎㅎㅎ..,. CJ야 그만해라 저영화는 정말 쓰레기다...원작에서 크게벋어났고 마치 메간폭스를 위해 스폰한 영화. 저걸보느니 투니버스봐라. 돈아깝고 시간아깝다.'},
 {'role': 'assistant', 'content': '0'}]

Sentimental Analysis with Batch API

In [7]:
# 100 random sampling for the test 
test_data_sample = test_data.sample(100, random_state=42)

tasks = []

for idx, row in test_data_sample.iterrows():
    messages = [
        {"role": "system",
         "content": "Analyze the sentiment of the following movie review and categorize it strictly as 1 for positive or 0 for negative without providing any explanation or reasoning."}
    ]
    messages.extend(few_shot_examples)
    messages.append({"role": "user", "content": row['document']})

    task = {
        "custom_id": f"task-{idx}", # working # identification
        "method": "POST", # API call method
        "url": "/v1/chat/completions", # API Endpoint URL
        "body": {
            "model": "gpt-4o-mini",
            "messages": messages,
            "max_tokens": 60
        }
    }
    tasks.append(task)

# Store JSONL(JSON Lines) file type 
file_name = "batch_tasks_naver_reviews.jsonl"
with open(file_name, 'w') as file:
    for obj in tasks:
        file.write(json.dumps(obj) + '\n')

**Be aware of the Batch API spending 

In [ ]:
# batch file upload
batch_input_file = client.files.create(
    file=open(file_name, "rb"),
    purpose="batch"
)

# Batches creation
batch_job = client.batches.create(
    input_file_id=batch_input_file.id,
    endpoint="/v1/chat/completions",
    completion_window="24h"
)

In [ ]:
import time

# Check Batch Status
batch_id = batch_job.id
while True:
    batch_status = client.batches.retrieve(batch_id)
    print("Batch 상태:", batch_status)
    if batch_status.status in ['completed', 'failed']:
        break
    time.sleep(60)  # every 1min

In [ ]:
# get the result file
result_file_id = batch_status.output_file_id
result_content = client.files.content(result_file_id).content

# store result file
result_file_name = "batch_job_results_naver_reviews.jsonl"
with open(result_file_name, 'wb') as file:
    file.write(result_content)

In [ ]:
# load results
results = []
with open(result_file_name, 'r') as file:
    for line in file:
        results.append(json.loads(line.strip()))

results

In [ ]:
# evaluation metrics
actuals = test_data_sample['label'].tolist()
predictions = []

for res in results:
    prediction = res['response']['body']['choices'][0]['message']['content'].strip()
    predictions.append(int(prediction))

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

accuracy = accuracy_score(actuals, predictions)
precision = precision_score(actuals, predictions)
recall = recall_score(actuals, predictions)
f1 = f1_score(actuals, predictions)

print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")