In [None]:
import google.generativeai as genai
from sklearn.metrics import accuracy_score, f1_score
import pandas as pd
from time import sleep

# Step 2: Set Gemini API key
genai.configure(api_key="YOUR_API_KEY")

# Step 3: Load Gemini model
model = genai.GenerativeModel("gemini-2.0-flash")

# Step 4: Define batched classification prompt
def classify_batch(text_batch):
    batched_prompt = """
You are a mental health sentiment classifier. Classify each of the following user texts into one of these 4 labels:

0 → neutral  
1 → low mood / depressed  
2 → anxious / worried  
3 → stressed / overwhelmed

For each input, return only a list of integers (one per text) in the same order. No explanation. Example: [1, 0, 2]

Texts:
""" + "\n\n".join([f"{i+1}. {text}" for i, text in enumerate(text_batch)]) + "\n\nAnswer:"

    try:
        response = model.generate_content(batched_prompt)
        preds_text = response.text.strip()
        preds = eval(preds_text.split("\n")[0])
        return preds
    except Exception as e:
        print("Error:", e)
        return [None] * len(text_batch)

In [2]:
df = pd.read_csv('Datasets/journal_reddit_posts.csv')

texts = df['text'].tolist()
true_labels = df['label'].tolist()
ids = df['id'].tolist()

In [11]:
import time

# Step 6: Predict in batches with rate limit handling
predicted_labels = []
batch_size = 3

for i in range(0, len(texts), batch_size):
    batch_texts = texts[i:i+batch_size]

    try:
        batch_preds = classify_batch(batch_texts)
        
        # Check output shape (sanity)
        if len(batch_preds) != batch_size:
            print(f"⚠️ Unexpected prediction count at batch {i//batch_size + 1}: {batch_preds}")
        
        predicted_labels.extend(batch_preds)
        print(f"✅ Batch {i//batch_size + 1}: {batch_preds}")
        time.sleep(5)  # Delay between requests to avoid triggering RPD
    except Exception as e:
        print(f"❌ Error on batch {i//batch_size + 1}: {e}")
        if "quota" in str(e).lower() or "rate" in str(e).lower() or "429" in str(e):
            print("🛑 Rate limit or quota exceeded. Stopping.")
            break

✅ Batch 1: [0, 0, 0]
✅ Batch 2: [2, 1, 0]
✅ Batch 3: [0, 2, 0]
✅ Batch 4: [0, 0, 0]
✅ Batch 5: [2, 1, 0]
✅ Batch 6: [1, 0, 0]
✅ Batch 7: [3, 1, 0]
✅ Batch 8: [0, 0, 0]
✅ Batch 9: [0, 0, 0]
✅ Batch 10: [0, 3, 2]
✅ Batch 11: [3, 0, 2]
✅ Batch 12: [0, 0, 3]
✅ Batch 13: [0, 0, 0]
✅ Batch 14: [0, 3, 1]
✅ Batch 15: [1, 0, 3]
✅ Batch 16: [0, 0, 1]
✅ Batch 17: [2, 0, 0]
✅ Batch 18: [1, 0, 0]
✅ Batch 19: [0, 0, 3]
✅ Batch 20: [2, 2, 0]
✅ Batch 21: [0, 0, 3]
✅ Batch 22: [1, 0, 0]
✅ Batch 23: [0, 0, 0]
✅ Batch 24: [0, 0, 0]
✅ Batch 25: [0, 0, 2]
✅ Batch 26: [1, 0, 0]
✅ Batch 27: [1, 0, 0]
✅ Batch 28: [2, 0, 0]
✅ Batch 29: [0, 0, 0]
✅ Batch 30: [0, 0, 2]
✅ Batch 31: [0, 0, 0]
✅ Batch 32: [0, 2, 0]
✅ Batch 33: [0, 1, 0]
✅ Batch 34: [0, 0, 0]
✅ Batch 35: [0, 0, 3]
✅ Batch 36: [0, 0, 0]
✅ Batch 37: [0, 0, 2]
✅ Batch 38: [0, 0, 0]
✅ Batch 39: [0, 2, 3]
✅ Batch 40: [0, 2, 0]
✅ Batch 41: [0, 0, 0]
✅ Batch 42: [0, 0, 0]
✅ Batch 43: [0, 0, 0]
✅ Batch 44: [2, 1, 0]
✅ Batch 45: [0, 0, 0]
✅ Batch 46: [2, 0, 

KeyboardInterrupt: 

In [14]:
# Step 7: Evaluation metrics
# Replace None with 0 in predictions
cleaned_preds = [0 if p is None else int(p) for p in predicted_labels]

# Truncate true labels to match
true_labels_eval = true_labels[:len(cleaned_preds)]

print("\nEvaluation Results:")
print("Accuracy:", accuracy_score(true_labels_eval, predicted_labels))
print("Macro F1 Score:", f1_score(true_labels_eval, predicted_labels, average='macro'))
print("Weighted F1 Score:", f1_score(true_labels_eval, predicted_labels, average='weighted'))


Evaluation Results:


ValueError: Classification metrics can't handle a mix of multiclass and unknown targets