In [None]:
import pandas as pd
import ast
import openai
import time
import os
import openai
openai.api_key = os.getenv("OPENAI_API_KEY")

# Load the dataset
df = pd.read_csv('./IMDB Dataset.csv')


# Initialize a column for predicted sentiments
df['predicted_sentiment'] = None

# Process in batches of 20 reviews
batch_size = 20
total_reviews = len(df)

for start in range(0, total_reviews, batch_size):
    end = min(start + batch_size, total_reviews)
    reviews = df['review'][start:end].tolist()

    # Numbering each review in the batch
    numbered_reviews = [f"{i+1}. {review}" for i, review in enumerate(reviews)]

    while True:
        # run code for only 1000 samples since its costly to run for 50,000 records
        if end >1000:
            break
        try:
            # Formatting the reviews into a string for the prompt
            reviews_content = "\n".join(numbered_reviews)

            # Send the request to OpenAI
            response = openai.ChatCompletion.create(
                model="gpt-3.5-turbo-1106",
                messages=[
                    {"role": "system", "content": "Classify each of the following reviews as either 'positive' or 'negative'. Please return your classifications in the form of a list. For example, ['positive', 'negative', ...]"},
                    {"role": "user", "content": reviews_content},
                    {"role": "assistant", "content": ""},
                ]
            )

            # Convert the response to a list
            predicted_sentiments = ast.literal_eval(response.choices[0].message.content)

            # Check the number of predictions
            if len(predicted_sentiments) == len(reviews):
                df.loc[start:end-1, 'predicted_sentiment'] = predicted_sentiments
                df.to_csv('./IMDB Dataset_with_Predictions.csv', index=False)
                break
            else:
                print(f"Error: The number of predictions does not match the number of reviews for batch starting at index {start}.")
                break

        except openai.error.RateLimitError:
            print("Rate limit reached. Waiting for 45 seconds before retrying...")
            time.sleep(45)

In [8]:
import pandas as pd
from sklearn.metrics import precision_score, confusion_matrix

# Filter out rows where 'predicted_sentiment' is null or missing in case we decide to rerun code for small sample
filtered_df = df.dropna(subset=['predicted_sentiment'])

# Count the number of matches between predicted and actual sentiments in the filtered DataFrame
correct_predictions = (filtered_df['predicted_sentiment'] == filtered_df['sentiment']).sum()
total_predictions = len(filtered_df)

# Calculate accuracy
accuracy = correct_predictions / total_predictions if total_predictions > 0 else 0

# Calculate precision
# Note: Precision is calculated for each class individually and then averaged
precision = precision_score(filtered_df['sentiment'], filtered_df['predicted_sentiment'], average='weighted')

# Create a confusion matrix
conf_matrix = confusion_matrix(filtered_df['sentiment'], filtered_df['predicted_sentiment'])

# Print results
print(f"Accuracy: {accuracy:.2%}")
print("Confusion Matrix:")
print(conf_matrix)


Accuracy: 82.00%
Precision: 82.00%
Confusion Matrix:
[[20506  4494]
 [ 4506 20494]]
