## Basic classification with proprietary models

In [53]:
import datasets
import pandas as pd
from tqdm import tqdm
import os
from dotenv import load_dotenv
import time
from abc import ABC, abstractmethod
import openai

# Load environment variables
load_dotenv()

class SentimentAnalyzer(ABC):
    def __init__(self,model: str, prompt: str):
        self._setup(model, prompt)
        
    @abstractmethod
    def _setup(self):
        """Setup API credentials and model configuration"""
        pass
        
    @abstractmethod
    def _get_raw_sentiment(self, text: str) -> str:
        """Get raw sentiment from the API"""
        pass

    def analyze_sentiment(self, text: str) -> str:
        """Analyze sentiment of the given text. Returns 'positive', 'negative', or 'neutral'."""
        try:
            sentiment = self._get_raw_sentiment(text).strip().lower()
            return self._validate_sentiment(sentiment)
        except Exception as e:
            print(f"{self.__class__.__name__} Error: {e}")
            return 'neutral'

    def _validate_sentiment(self, sentiment: str) -> str:
        """Validate and normalize sentiment response"""
        return sentiment if sentiment in ['positive', 'negative', 'neutral'] else 'neutral'

class OpenAIAnalyzer(SentimentAnalyzer):
    def _setup(self,model: str, prompt: str):
        self.model = model 
        self.prompt = prompt
    def _get_raw_sentiment(self, text: str) -> str:
        client = openai.OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
        response = client.chat.completions.create(
            model=self.model,
            messages=[
                {"role": "system", "content": self.prompt},
                {"role": "user", "content": text}
            ],
            temperature=0,
            max_tokens=10
        )

        return response.choices[0].message.content


In [54]:
dataset = datasets.load_dataset("tweet_eval", "sentiment")

# Convert to pandas DataFrame for easier handling
train_df = pd.DataFrame(dataset['train'])
test_df = pd.DataFrame(dataset['test'])

# Take a subset for testing (to manage API costs)
sample_size = 100
train_sample = train_df.sample(n=sample_size, random_state=42)

In [55]:

# Prompt variants for sentiment classification
prompts = [
    "You are a sentiment analysis assistant. Classify the sentiment of the given tweet as 'positive', 'neutral', or 'negative'. Reply with only one word.",
    "Analyze the emotional tone of this tweet and categorize it as 'positive', 'neutral', or 'negative'. Provide only a one-word response.",
    "Determine whether the following tweet expresses a 'positive', 'neutral', or 'negative' sentiment. Respond with a single word only.",
    "As a sentiment classifier, evaluate this tweet and label it as 'positive', 'neutral', or 'negative'. Your response should be exactly one word.",
    "Read this tweet and identify its sentiment. Is it 'positive', 'neutral', or 'negative'? Answer with just one word."
]

In [59]:
# Load TweetEval dataset
model = "gpt-3.5-turbo" #"gpt-4o-mini" #
prompt_ix = 2

analyzer = OpenAIAnalyzer(model=model, prompt=prompts[prompt_ix])
print("Loading TweetEval dataset...")


print(f"Processing {sample_size} tweets...")

# Add sentiment predictions
sentiments = []
for tweet in tqdm(train_sample['text']):
    sentiment = analyzer.analyze_sentiment(tweet)
    sentiments.append(sentiment)
    # Add a small delay to avoid rate limits
    time.sleep(0.1)

column_name = f'predicted_sentiment_{model}_promptix_{prompt_ix}'
train_sample[column_name] = sentiments

# Save results
output_file = f'tweet_sentiment_results_{model}_promptix_{prompt_ix}.csv'
train_sample.to_csv(output_file, index=False)
print(f"Results saved to {output_file}")

# Print some statistics
print("\nSentiment Distribution:")
print(train_sample[column_name].value_counts())



Loading TweetEval dataset...
Processing 100 tweets...


100%|██████████| 100/100 [01:04<00:00,  1.55it/s]

Results saved to tweet_sentiment_results_gpt-3.5-turbo_promptix_2.csv

Sentiment Distribution:
predicted_sentiment_gpt-3.5-turbo_promptix_2
positive    54
negative    25
neutral     21
Name: count, dtype: int64





In [60]:
train_sample.head()

Unnamed: 0,text,label,predicted_sentiment_gpt-4o-mini_promptix_2,predicted_sentiment_gpt-3.5-turbo_promptix_2
11449,I forgot all about Ice Cube being in the movie...,0,neutral,neutral
26433,playoffs are finally set. Chardon plays warren...,1,neutral,positive
33669,Are we just going to ignore the fact that Ice ...,1,negative,negative
33013,If you live in the South Orlando area\u002c be...,1,positive,positive
13399,First record of Colin Baker at the BBC: BBC2 s...,1,neutral,positive


In [61]:
# Calculate and print accuracy
def calculate_accuracy(train_sample, column_name):
    id2label = {0: 'negative', 1: 'neutral', 2: 'positive'}
    label2id = {v: k for k, v in id2label.items()}
    correct_predictions = [label2id[train_sample[column_name].iloc[i]] == train_sample['label'].iloc[i] for i in range(sample_size)]
    accuracy = sum(correct_predictions) / sample_size
    print(f"\nAccuracy: {accuracy:.2%}")
calculate_accuracy(train_sample, column_name)


Accuracy: 64.00%


## Batch Processing

In [81]:
import json
import os
from dotenv import load_dotenv

# Load environment variables
load_dotenv()
openai_api_key = os.getenv("OPENAI_API_KEY")

# Prepare batch processing data
print("Preparing batch processing data...")


def get_batch_requests(model: str, train_sample, prompt: str):
    # Create a list to store request objects
    batch_requests = []

    # Loop through the sample tweets to create batch requests
    for idx, tweet in enumerate(train_sample['text']):
        request = {
            "custom_id": f"tweet-{idx}",
            "method": "POST",
            "url": "/v1/chat/completions",
            "body": {
                "model": model,
                "messages": [
                    {"role": "system", "content":  prompt},
                    {"role": "user", "content": tweet}
                ],
                "max_tokens": 10
            }
        }
        batch_requests.append(request)
    return batch_requests



batch_requests = get_batch_requests(model, train_sample, prompts[prompt_ix])
# Write requests to a JSONL file
output_jsonl = 'batch_sentiment_requests2.jsonl'
with open(output_jsonl, 'w') as f:
    for request in batch_requests:
        f.write(json.dumps(request) + '\n')

print(f"Batch requests saved to {output_jsonl}")
print(f"Total requests prepared: {len(batch_requests)}")



Preparing batch processing data...
Batch requests saved to batch_sentiment_requests2.jsonl
Total requests prepared: 100


In [82]:
from openai import OpenAI
client = OpenAI()

batch_input_file = client.files.create(
    file=open("batch_sentiment_requests2.jsonl", "rb"),
    purpose="batch"
)

print(batch_input_file)

FileObject(id='file-T2gV1JgMG2urmv8vr6E3fV', bytes=45136, created_at=1744218995, filename='batch_sentiment_requests2.jsonl', object='file', purpose='batch', status='processed', expires_at=None, status_details=None)


In [83]:
from openai import OpenAI
client = OpenAI()

batch_input_file_id = batch_input_file.id
client.batches.create(
    input_file_id=batch_input_file_id,
    endpoint="/v1/chat/completions",
    completion_window="24h",
    metadata={
        "description": "Sentiment analysis of tweets"
    }
)

Batch(id='batch_67f6ab9098908190968c0f4379fbf66f', completion_window='24h', created_at=1744219024, endpoint='/v1/chat/completions', input_file_id='file-T2gV1JgMG2urmv8vr6E3fV', object='batch', status='validating', cancelled_at=None, cancelling_at=None, completed_at=None, error_file_id=None, errors=None, expired_at=None, expires_at=1744305424, failed_at=None, finalizing_at=None, in_progress_at=None, metadata={'description': 'Sentiment analysis of tweets'}, output_file_id=None, request_counts=BatchRequestCounts(completed=0, failed=0, total=0))

In [94]:
batch = client.batches.retrieve('batch_67f6ab9098908190968c0f4379fbf66f')
batch.status

'completed'

In [98]:
client.batches.list(limit=10).data

[Batch(id='batch_67f6ab9098908190968c0f4379fbf66f', completion_window='24h', created_at=1744219024, endpoint='/v1/chat/completions', input_file_id='file-T2gV1JgMG2urmv8vr6E3fV', object='batch', status='completed', cancelled_at=None, cancelling_at=None, completed_at=1744219054, error_file_id=None, errors=None, expired_at=None, expires_at=1744305424, failed_at=None, finalizing_at=1744219048, in_progress_at=1744219025, metadata={'description': 'Sentiment analysis of tweets'}, output_file_id='file-ApLMS99TktKPGedJpG71YC', request_counts=BatchRequestCounts(completed=100, failed=0, total=100)),
 Batch(id='batch_67f6a87784ec8190bc1b49a896919589', completion_window='24h', created_at=1744218231, endpoint='/v1/chat/completions', input_file_id='file-XRBDjmKoo2gy41pDrquwmm', object='batch', status='completed', cancelled_at=None, cancelling_at=None, completed_at=1744218325, error_file_id=None, errors=None, expired_at=None, expires_at=1744304631, failed_at=None, finalizing_at=1744218317, in_progress

In [99]:
import json
file_response = client.files.content("file-GW9WhvpmqkC6aecCEGJfUn")
# Get the outputs from the file response
file_content = file_response.text

# Parse the JSON content
try:
    # If the content is a single JSON object
    parsed_data = json.loads(file_content)
    print("Successfully parsed JSON data")
    print(f"Number of records: 1")
    
except json.JSONDecodeError:
    # If the content is JSONL (multiple JSON objects, one per line)
    parsed_data = []
    for line in file_content.strip().split('\n'):
        try:
            parsed_data.append(json.loads(line))
        except json.JSONDecodeError as e:
            print(f"Error parsing line: {e}")
    
    print("Successfully parsed JSONL data")
    print(f"Number of records: {len(parsed_data)}")

# Display a sample of the parsed data
if parsed_data:
    if isinstance(parsed_data, list):
        print("\nSample record:")
        print(json.dumps(parsed_data[0], indent=2))
    else:
        print("\nParsed data:")
        print(json.dumps(parsed_data, indent=2))


Successfully parsed JSONL data
Number of records: 100

Sample record:
{
  "id": "batch_req_67f6a8cd91348190adc964ca1b7e4a2d",
  "custom_id": "tweet-0",
  "response": {
    "status_code": 200,
    "request_id": "42bf69f07acef7354529b5a5bea04cab",
    "body": {
      "id": "chatcmpl-BKT9DGbsiBCwYxMNFjezcGzqYUNqm",
      "object": "chat.completion",
      "created": 1744218303,
      "model": "gpt-3.5-turbo-0125",
      "choices": [
        {
          "index": 0,
          "message": {
            "role": "assistant",
            "content": "neutral",
            "refusal": null,
            "annotations": []
          },
          "logprobs": null,
          "finish_reason": "stop"
        }
      ],
      "usage": {
        "prompt_tokens": 61,
        "completion_tokens": 2,
        "total_tokens": 63,
        "prompt_tokens_details": {
          "cached_tokens": 0,
          "audio_tokens": 0
        },
        "completion_tokens_details": {
          "reasoning_tokens": 0,
         

In [111]:
# Extract predictions from the batch output
predictions = []
for item in parsed_data:
    # Extract the sentiment prediction from the model's response
    response_content = item['response']['body']['choices'][0]['message']['content']
    
    # Determine the sentiment from the response
    sentiment = None
    response_lower = response_content.lower()
    if 'positive' in response_lower:
        sentiment = 'positive'
    elif 'negative' in response_lower:
        sentiment = 'negative'
    elif 'neutral' in response_lower:
        sentiment = 'neutral'
    else:
        sentiment = 'unknown'
    
    predictions.append({
        'id': item['custom_id'].split('-')[1], # !!! the response can have different order as the request
        'predicted_sentiment': sentiment
    })

predictions = [pred['predicted_sentiment'] for pred in sorted(predictions, key=lambda x: int(x['id']))]
predictions



['neutral',
 'positive',
 'negative',
 'positive',
 'neutral',
 'positive',
 'positive',
 'neutral',
 'positive',
 'neutral',
 'negative',
 'positive',
 'negative',
 'negative',
 'negative',
 'positive',
 'negative',
 'positive',
 'positive',
 'positive',
 'positive',
 'negative',
 'neutral',
 'positive',
 'negative',
 'positive',
 'neutral',
 'negative',
 'neutral',
 'positive',
 'positive',
 'positive',
 'neutral',
 'negative',
 'positive',
 'positive',
 'positive',
 'neutral',
 'negative',
 'positive',
 'neutral',
 'positive',
 'neutral',
 'positive',
 'neutral',
 'negative',
 'neutral',
 'neutral',
 'positive',
 'positive',
 'negative',
 'positive',
 'neutral',
 'negative',
 'positive',
 'positive',
 'negative',
 'negative',
 'positive',
 'positive',
 'positive',
 'negative',
 'positive',
 'positive',
 'positive',
 'positive',
 'neutral',
 'negative',
 'positive',
 'neutral',
 'positive',
 'negative',
 'positive',
 'positive',
 'positive',
 'positive',
 'positive',
 'negative',
 'p

In [113]:
column_name = f'predicted_sentiment_{model}_promptix_{prompt_ix}_batch'
train_sample[column_name] = sentiments
calculate_accuracy(train_sample, column_name)


Accuracy: 64.00%
