# Preliminaries and Imports

In [None]:
# mount drive
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# system and path set up
import os
import sys

GOOGLE_DRIVE_PATH = os.path.join(
    '/content', 'drive', 'My Drive', 'ColabNotebooks', 'DL','Deep Learning Project Full',
    'DL Team Project', '20251126_22.47_DL_Team_Project_Code_Files_Structure')
print(os.listdir(GOOGLE_DRIVE_PATH))
sys.path.append(GOOGLE_DRIVE_PATH)
print(os.listdir(GOOGLE_DRIVE_PATH))

In [None]:
# install libraries (only once and then comment out)
# !pip install -q (
#     transformers 
#     datasets 
#     huggingface_hub 
#     evaluate 
#     sentencepiece 
#     scikit-learn 
#     imbalanced-learn 
#     accelerate 
#     torch 
#     pandas 
#     matplotlib)

In [None]:
# import standard libraries
import pandas as pd

# import utils
from src import utils

# import model
from src.model_openai_zero_few import OpenAiModelZeroFew

In [None]:
# set seed for reproduceibility
seed = 42

# Load and prepare data 

In [None]:
# load pre-processed data
csv_path_train_data = f"{GOOGLE_DRIVE_PATH}/data/data_train.csv"
csv_path_val_data = f"{GOOGLE_DRIVE_PATH}/data/data_val.csv"
csv_path_test_data = f"{GOOGLE_DRIVE_PATH}/data/data_test.csv"

train_bal = pd.read_csv(csv_path_train_data)
val = pd.read_csv(csv_path_val_data)
test = pd.read_csv(csv_path_test_data)

In [None]:
# keep only needed columns (drop lang and label5)
train_bal = train_bal.drop(columns=["label5","lang"])   
val = val.drop(columns=["label5","lang"])   
test = test.drop(columns=["label5","lang"])

In [None]:
# downsample data
# notes: 
# - cost incur per input and output token; thus, for this student project we downsample 
# - train: no real train data needed; however, few-shot examples taken from train data

val = val.sample(n=100, random_state=seed)
test = test.sample(n=100, random_state=seed)

# Run model/ Experiment

Specify model: 
- Developer message (for few shot: include examples)
- openai model (e.g., gpt-4.1-mini)

Specify data: 
- val data: for tuning
- test data: for final run

In [12]:
# developer message
# Note: zeroshot without # Examples # Context
developer_message = """
# Identity:

You are a helpful assistant that labels consumer product reviews 
as negative, neutral, or positive. 

# Instructions:

* Depending on the sentiment in the product review that you are given, 
respond with a single word: "negative", "neutral", "positive" 
* Do not add an additional comment or formatting to your single word response
"""

In [None]:
# create instance of model
openai_model = OpenAiModelZeroFew(
    model="gpt-4.1-mini", 
    developer_message=developer_message)

# predict with model
pred_txt = openai_model.predict(val["text"], report_interval=10)
pred_num = openai_model.label3_text_to_num(pred_txt)

Reviews processed: 10
Reviews processed: 20
Reviews processed: 30
Reviews processed: 40
Reviews processed: 50
Reviews processed: 60
Reviews processed: 70
Reviews processed: 80
Reviews processed: 90
Reviews processed: 100


In [None]:
# sanity check: accuracy
acc = sum(val["label3"] == pred_num) / len(pred_num) 
print(f"acc cntrl: {acc}")

acc cntrl: 0.76


In [16]:
# assess cost (i.e. token used)
print("Input tokens used:", openai_model.input_tokens)
print("Output tokens used:", openai_model.output_tokens)

Input tokens used: 43800
Output tokens used: 600


# Evaluate Results/ Experiment

In [10]:
# TODO: Full evaluation using KP's function