In [None]:
import pandas as pd
from ollama import chat
from pydantic import BaseModel, Field
from typing import Literal

# -----------------------------
# Sentiment Schema & Function

class SentimentScoreResult(BaseModel):
    sentiment_score: float = Field(
        ..., ge=-1.0, le=1.0,
        description="Sentiment score between -1 and 1"
    )

def analyze_sentiment_continuous(text: str, model: str = "llama3.2:latest") -> float:
    """
    Returns a continuous sentiment score between -1 and 1.
    """
    response = chat(
        messages=[{
            "role": "user",
            "content": (
                "You are a sentiment analysis expert.\n"
                "Evaluate the overall sentiment of the following text.\n"
                "Return only a single numeric value between -1 and 1, where:\n"
                "-1 = very negative, 0 = neutral, 1 = very positive.\n\n"
                f"Text:\n{text}"
            )
        }],
        model=model,
        format=SentimentScoreResult.model_json_schema(),
        options={"temperature": 0.1, "seed": 42},
    )

    try:
        parsed = SentimentScoreResult.model_validate_json(response.message.content)
        score = parsed.sentiment_score
    except Exception:
        try:
            score = float(response.message.content.strip())
        except ValueError:
            score = 0.0

    return max(-1.0, min(1.0, score))



# -----------------------------
# Flair Schema & Function

class FlairGroupResult(BaseModel):
    flair_group: Literal[
        "Politics",
        "Academics",
        "Finance",
        "Social",
        "Career",
        "Housing",
        "Athletics",
        "Mental Health",
        "Other"
    ] = Field(..., description="High-level category assigned to each Reddit post.")

def classify_flair_group_structured(text: str, model: str = "llama3.2:latest") -> str:
    """
    Uses LLM structured output to classify Reddit posts into a high-level flair group.
    """
    category_prompt = """
You are classifying Reddit posts from a university subreddit into ONE of the following flair groups.
Use the examples below to guide your decision.

Return ONLY a JSON object like this:
{"flair_group": "Politics"}

### CATEGORIES AND EXAMPLES ###

(Politics)
Includes: "Politics", "Political", "Campus Politics", "petition", "Meta", "News"
Examples: campus petitions, administrative decisions, protests, policy debates.

(Academics)
Includes: "University", "Academic Life", "Course Questions", "General Question", "Admissions", 
"Prospective/Incoming Students", "For ALL Frosh too", "Question", "Discussion", 
"Advice/help", "CS/EECS", "Important", "Resource"
Examples: course advice, admissions help, professor discussions, class experiences, academic guidance.

(Finance)
Includes: "Financial Aid", "Finance", "Scholarship", "Money", "Budgeting", "Tuition", "Expenses"
Examples: financial aid questions, tuition cost concerns, scholarship applications, budgeting, 
money management, financial stress.

(Social)
Includes: "Events/Organizations", "Events/Meetups/Social", "Event", "Meetup", "Social Life", 
"Dating/Relationships", "General", "For Sale/Giveaway", "Buy/Sell", "Lost and Found", "Transportation"
Examples: social events, dating, clubs, rants about social life, casual or personal posts.

(Career)
Includes: "Jobs/Employment", "Internships", "Career Fairs"
Examples: job applications, internship experiences, career planning, resume discussions.

(Housing)
Includes: "Housing", "Food", "Gym/Exercise", "Athletics/Esports", "Local", "City/Local", 
"IV/Goleta/SB", "Parking"
Examples: housing searches, roommate requests, local living issues, dining halls, parking permits, 
gym access, fitness discussions.

(Athletics)
Includes: "Sports", "Athletics", "Esports", "Game", "Team", "Football", "Basketball", 
"Soccer", "Volleyball", "Baseball", "Tennis", "Track", "Swim", "Competition"
Examples: posts about school sports teams, game results, tailgates, 
intramural sports, athletic achievements, esports tournaments, 
team spirit, or attending a game.

(Mental Health)
Includes: "Rant", "Rant/Complaint", "Stress", "Anxiety", "Depression", "Relieved", 
"Mental Health", "Wellness", "Counseling", "Resources"
Examples: feeling stressed about exams, anxiety about grades, mental health resources, burnout, 
relief after finals, complaints about stress.

(Other)
For anything that does not clearly fit in any of the above categories.

Now classify the following Reddit post:
"""

    response = chat(
        messages=[{
            "role": "user",
            "content": f"{category_prompt}\n\n{text}"
        }],
        model=model,
        format=FlairGroupResult.model_json_schema(),
        options={"temperature": 0.2, "seed": 42},
    )

    try:
        parsed = FlairGroupResult.model_validate_json(response.message.content)
        flair_group = parsed.flair_group
    except Exception:
        raw = response.message.content.strip()
        allowed = [c for c in FlairGroupResult.model_fields["flair_group"].annotation.__args__]
        match = next((c for c in allowed if c.lower() in raw.lower()), "Other")
        flair_group = match

    return flair_group



# -----------------------------
# Combined Analysis Function

def analyze_posts_with_sentiment_and_flair(posts_csv: str):
    posts_df = pd.read_csv(posts_csv)
    posts_df = posts_df[["post_id", "subreddit", "title", "selftext"]]

    results = []
    for _, post in posts_df.iterrows():
        text_block = f"Title: {post['title']}\nBody: {post['selftext']}"

        # LLM calls
        sentiment_score = analyze_sentiment_continuous(text_block)
        flair_group = classify_flair_group_structured(text_block)

        results.append({
            "post_id": post["post_id"],
            "subreddit": post["subreddit"],
            "title": post["title"],
            "selftext": post["selftext"],
            "post_sentiment": sentiment_score,
            "flair_group": flair_group
        })

    output_df = pd.DataFrame(results)
    output_df.to_csv("UC_post_sentiments_flair.csv", index=False)
    print("Saved UC_post_sentiments_flair.csv with sentiment and flair_group columns")



# -----------------------------
# Run the Combined Script


analyze_posts_with_sentiment_and_flair("UC_posts.csv")
