In [None]:
import openai
import pandas as pd
from sklearn.metrics import classification_report
from tqdm import tqdm

import os
from dotenv import load_dotenv
load_dotenv()

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

if OPENAI_API_KEY is None:
    print("Error: OPENAI_API_KEY is not set in the .env file.")
else:
    openai.api_key = OPENAI_API_KEY
    print("OpenAI API Key loaded successfully!")

# Load dataset
file_path = 'train.csv'
data = pd.read_csv(file_path)

# Preprocessing
X = data['sentence'].tolist()
y = data['label'].tolist()
label_to_index = {label: i for i, label in enumerate(set(y))}
index_to_label = {i: label for label, i in label_to_index.items()}

# Splitting the dataset
from sklearn.model_selection import train_test_split
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Helper Function to Classify Using OpenAI
def classify_with_openai(prompt, candidate_labels, model="gpt-4"):
    try:
        response = openai.ChatCompletion.create(
            model=model,
            messages=[
                {"role": "system", "content": "You are a helpful assistant for text classification. Just give the output as correct label"},
                {"role": "user", "content": prompt}
            ],
            temperature=0  # Ensures deterministic outputs
        )
        result = response['choices'][0]['message']['content'].strip()
        return result
    except Exception as e:
        print(f"Error: {e}")
        return None

# Create prompts and classify
def create_prompt(sentence, labels):
    label_list = ", ".join(labels)
    return f"The following text: \"{sentence}\" belongs to one of these categories: {label_list}. Identify the most suitable category."

# Evaluate the model
def evaluate_openai(X_data, y_data, labels, model="gpt-4"):
    predictions = []
    true_labels = []

    for sentence, true_label in tqdm(zip(X_data, y_data), total=len(y_data), desc="Evaluating"):
        prompt = create_prompt(sentence, labels)
        predicted_label = classify_with_openai(prompt, labels, model=model)
        predictions.append(predicted_label)
        true_labels.append(true_label)

    return predictions, true_labels

# Run Evaluation
print("Evaluating OpenAI model...")
candidate_labels = list(label_to_index.keys())
predictions, true_labels = evaluate_openai(X_val, y_val, candidate_labels, model="gpt-4")




Evaluating OpenAI model...


Evaluating: 100%|██████████| 66/66 [00:51<00:00,  1.29it/s]


In [9]:
# Classification Report
print("\nClassification Report:")
print(classification_report(true_labels, predictions, target_names=candidate_labels))



Classification Report:
                       precision    recall  f1-score   support

   SIZE_CUSTOMIZATION       1.00      0.50      0.67         4
             WARRANTY       0.67      0.67      0.67         3
         DISTRIBUTORS       1.00      1.00      1.00         2
        MATTRESS_COST       1.00      1.00      1.00         1
100_NIGHT_TRIAL_OFFER       1.00      1.00      1.00         2
             LEAD_GEN       0.50      1.00      0.67         1
   ABOUT_SOF_MATTRESS       1.00      0.50      0.67         2
               OFFERS       1.00      1.00      1.00         8
       ORTHO_FEATURES       1.00      0.80      0.89         5
    DELAY_IN_DELIVERY       1.00      0.75      0.86         4
                  COD       0.75      0.75      0.75         4
        ERGO_FEATURES       1.00      1.00      1.00         3
              PILLOWS       0.75      1.00      0.86         3
      RETURN_EXCHANGE       0.33      1.00      0.50         1
         ORDER_STATUS       1.