In [1]:
# === FanPulse EPL ‚Äî Final Interview-Ready (Rechecked) ===
# Columns: Club | Recent result | Positive% | Why positive | Negative% | Why negative | Top 3 players | Quotes (1/2/3) | Outlook

!pip -q install praw requests pandas

import os, re, json, logging, requests
from datetime import datetime, timezone, timedelta
import pandas as pd
from getpass import getpass
from IPython.display import display, HTML
import praw

# ‚îÄ‚îÄ Quiet logs
logging.getLogger("praw").setLevel(logging.ERROR)
logging.getLogger("prawcore").setLevel(logging.ERROR)

# ‚îÄ‚îÄ Keys
if not os.getenv("OPENAI_API_KEY"):
    os.environ["OPENAI_API_KEY"] = getpass("Paste your OpenAI API key (starts with 'sk-'): ")
if not os.getenv("REDDIT_CLIENT_ID"):
    os.environ["REDDIT_CLIENT_ID"] = getpass("Reddit client_id: ")
if not os.getenv("REDDIT_CLIENT_SECRET"):
    os.environ["REDDIT_CLIENT_SECRET"] = getpass("Reddit client_secret: ")

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
RID  = os.getenv("REDDIT_CLIENT_ID")
RSEC = os.getenv("REDDIT_CLIENT_SECRET")

# ‚îÄ‚îÄ Settings
HOURS_LOOKBACK = 72
POSTS_PER_SUB  = 8
MAX_COMMENTS_PER_POST = 150
MODEL = "gpt-4o-mini"
TEMPERATURE = 0.2

# ‚îÄ‚îÄ Club ‚Üí subs
CLUB_SUBS = {
    "Arsenal": ["ArsenalFC"],
    "Aston Villa": ["AstonVilla"],
    "Bournemouth": ["afcbournemouth"],
    "Brentford": ["BrentfordFC"],
    "Brighton & Hove Albion": ["BHAAlbion"],
    "Burnley": ["BurnleyFC"],
    "Chelsea": ["chelseafc"],
    "Crystal Palace": ["CPFC"],
    "Everton": ["Everton"],
    "Fulham": ["FulhamFC"],
    "Leeds United": ["LeedsUnited"],
    "Liverpool": ["LiverpoolFC"],
    "Manchester City": ["MCFC"],
    "Manchester United": ["RedDevils"],
    "Newcastle United": ["NUFC"],
    "Nottingham Forest": ["NottinghamForest"],
    "Sunderland": ["sunderlandafc"],
    "Tottenham Hotspur": ["Tottenham","coys"],
    "West Ham United": ["Hammers"],
    "Wolverhampton Wanderers": ["Wolves"]
}
FALLBACK_SUB = "PremierLeague"

# ‚îÄ‚îÄ Sentiment lexicons
POS_WORDS = set("""
amazing great good awesome love loved loving wow brilliant superb class clutch solid upgrade fantastic excellent outstanding confident creative clinical impressive tidy reliable consistent sharp quality
win winning clean improved comeback dominated controlled deserved masterclass resilient momentum press energy creative sharp progress form bounce fight determined intense aggressive balanced structured fluid cohesive
proud excited thrilled joy satisfied motivated focused assist tackle save pressing control dominate clean sheet cleansheet worldie brace hat-trick "hat trick"
""".split())
NEG_WORDS = set("""
bad awful terrible poor hate hated trash horrible fraud bottling weak useless worst decline sack shambles pathetic shocking error mistake slow sloppy
lose lost defeat injury bottle embarrassing hopeless lazy horrendous tactless problem collapse disaster painful frustrating miss waste criticized weak flat negative downgrade dull boring inconsistent
angry furious worried concerned disappointed errors penalty "red card" offside concede conceded conceding own-goal "own goal"
""".split())

def simple_sentiment(text: str) -> str:
    t = text.lower()
    words = re.findall(r"[a-z']+", t)
    pos_hits = sum(w in POS_WORDS for w in words)
    neg_hits = sum(w in NEG_WORDS for w in words)
    # emotional punctuation
    if "!" in t: pos_hits += 1
    if "?" in t and neg_hits <= pos_hits: neg_hits += 0.5
    # football cues (Unicode dashes safe)
    if any(x in t for x in ["goal","win","scored","dominate","clean sheet","assist","tackle","save","pressing","worldie","brace","hat-trick","hat trick"]):
        pos_hits += 1
    if any(x in t for x in ["lose","loss","injury","mistake","error","bad","missed","penalty","red card","offside","own goal","own-goal"]):
        neg_hits += 1
    # prefer emotion over neutral
    if pos_hits >= neg_hits + 0.5 and pos_hits > 0: return "positive"
    if neg_hits >= pos_hits + 0.5 and neg_hits > 0: return "negative"
    return "neutral"

# ‚îÄ‚îÄ OpenAI helpers
def openai_chat(messages, model=MODEL, temperature=TEMPERATURE):
    url = "https://api.openai.com/v1/chat/completions"
    headers = {"Authorization": f"Bearer {OPENAI_API_KEY}", "Content-Type": "application/json"}
    payload = {"model": model, "messages": messages, "temperature": temperature}
    r = requests.post(url, headers=headers, json=payload, timeout=90)
    r.raise_for_status()
    return r.json()["choices"][0]["message"]["content"]

def gpt_players_only(club: str, candidates: list, examples_text: str) -> list:
    prompt = f"""
Return ONLY a JSON array of up to 3 football PLAYER names mentioned with {club}.
Rules:
- Players only (no teams, countries, coaches, referees, generic words).
- Prefer the most-discussed players across the text.
- Use full canonical names (e.g., "Erling Haaland").
Candidates: {json.dumps(candidates[:40], ensure_ascii=False)}
Text sample: {examples_text[:1800]}
"""
    msg = [{"role":"system","content":"Return only a JSON array. No prose."},
           {"role":"user","content":prompt}]
    raw = openai_chat(msg)
    try:
        arr = json.loads(raw)
        return [str(x) for x in arr][:3]
    except Exception:
        return []

def gpt_reasons_and_outlook(club: str, quotes: list, counts: dict) -> dict:
    p, n, ne = counts.get("positive",0), counts.get("negative",0), counts.get("neutral",0)
    tot = max(p+n+ne,1)
    pp, nn = round(100*p/tot), round(100*n/tot)
    prompt = f"""
Return STRICT JSON: {{ "positive_why": "...", "negative_why": "...", "outlook": "..." }}
- Be specific (players/tactics/injuries/refs) based on quotes.
- One short phrase each. Avoid clich√©s ("mixed feelings", "awaits next match").
Counts: positive={p} ({pp}%), negative={n} ({nn}%)
Quotes: {json.dumps([q[:220] for q in quotes], ensure_ascii=False)}
"""
    msg = [{"role":"system","content":"Return strict JSON only."},
           {"role":"user","content":prompt}]
    raw = openai_chat(msg)
    try:
        obj = json.loads(raw)
    except Exception:
        obj = {"positive_why":"", "negative_why":"", "outlook":"Fans show cautious belief with pressure rising."}
    obj["positive_why"] = obj.get("positive_why","")[:140]
    obj["negative_why"] = obj.get("negative_why","")[:140]
    obj["outlook"] = obj.get("outlook","")[:140]
    return obj

# ‚îÄ‚îÄ Reddit client
reddit = praw.Reddit(
    client_id=RID,
    client_secret=RSEC,
    user_agent="FanPulseEPL/InterviewReady/1.1 by u/your_username",
    check_for_async=False
)

# ‚îÄ‚îÄ Utils
def clean_text(t: str) -> str:
    t = re.sub(r"http\S+","", t or "")
    t = re.sub(r"\s+"," ", t).strip()
    return t

def fetch_comments_for_sub(subname: str, earliest):
    comments = []
    try:
        sub = reddit.subreddit(subname)
        # new posts
        for p in sub.new(limit=POSTS_PER_SUB):
            created = datetime.fromtimestamp(p.created_utc, tz=timezone.utc)
            if created < earliest:
                continue
            p.comments.replace_more(limit=0)
            for c in p.comments.list()[:MAX_COMMENTS_PER_POST]:
                body = clean_text(getattr(c,"body",""))
                if body:
                    comments.append({"text": body, "score": getattr(c,"score",0),
                                     "permalink": f"https://www.reddit.com{getattr(c,'permalink','')}",
                                     "sub": subname})
    except Exception:
        pass
    return comments

# Catch: Post/Full Time + Unicode dashes
MATCH_PAT = re.compile(
    r"(post\s*match|full\s*[- ]?time|^ft[:\]]?)"
    r".{0,80}?([A-Za-z &.'-]+?)\s+(\d+)\s*[‚Äì‚Äî-]\s*(\d+)\s+([A-Za-z &.'-]+)",
    re.IGNORECASE
)

def detect_recent_result(club: str, earliest):
    """Return 'Win 2-1 vs Tottenham' / 'Draw 1-1 vs Chelsea' / ''."""
    def scan_sub(subname):
        try:
            sub = reddit.subreddit(subname)
            streams = [
                sub.search(club, sort="new", time_filter="week", limit=25),
                sub.new(limit=25)
            ]
            for stream in streams:
                for p in stream:
                    created = datetime.fromtimestamp(p.created_utc, tz=timezone.utc)
                    if created < earliest:
                        continue
                    title = f"{p.title or ''} {p.selftext or ''}"
                    m = MATCH_PAT.search(title)
                    if not m:
                        continue
                    a, ga, gb, b = m.group(2).strip(), int(m.group(3)), int(m.group(4)), m.group(5).strip()
                    if club.lower() not in a.lower() and club.lower() not in b.lower():
                        continue
                    # Determine side
                    is_home = club.lower() in a.lower()
                    my_g, op_g = (ga, gb) if is_home else (gb, ga)
                    opp = b if is_home else a
                    if my_g > op_g: return f"Win {my_g}-{op_g} vs {opp}"
                    if my_g < op_g: return f"Loss {my_g}-{op_g} vs {opp}"
                    return f"Draw {my_g}-{op_g} vs {opp}"
        except Exception:
            return ""
        return ""
    # try club subs first
    for s in CLUB_SUBS.get(club, []):
        res = scan_sub(s)
        if res: return res
    # then general subs
    for s in ["soccer", FALLBACK_SUB]:
        res = scan_sub(s)
        if res: return res
    return ""

def fetch_all_comments_for_club(club: str):
    comments = []
    now = datetime.now(timezone.utc)
    earliest = now - timedelta(hours=HOURS_LOOKBACK)
    # club subs
    for s in CLUB_SUBS.get(club, []):
        comments.extend(fetch_comments_for_sub(s, earliest))
    # fallback to r/PremierLeague search if empty
    if not comments:
        try:
            sub = reddit.subreddit(FALLBACK_SUB)
            for p in sub.search(club, sort="new", time_filter="week", limit=8):
                created = datetime.fromtimestamp(p.created_utc, tz=timezone.utc)
                if created < earliest:
                    continue
                p.comments.replace_more(limit=0)
                for c in p.comments.list()[:100]:
                    body = clean_text(getattr(c,"body",""))
                    if body:
                        comments.append({"text": body, "score": getattr(c,"score",0),
                                         "permalink": f"https://www.reddit.com{getattr(c,'permalink','')}",
                                         "sub": FALLBACK_SUB})
        except Exception:
            pass
    return comments, earliest

# ‚îÄ‚îÄ Main
rows = []
print(f"üîé Collecting fan comments & recent results (last {HOURS_LOOKBACK}h)...\n")

for club in CLUB_SUBS.keys():
    print(f"‚Ä¢ {club}: scanning‚Ä¶", end="")
    comments, earliest = fetch_all_comments_for_club(club)
    recent_result = detect_recent_result(club, earliest) or "No match in last 72h"

    if not comments:
        print(" none.")
        rows.append({
            "Club": club,
            "Recent result": recent_result,
            "Positive%": 0,
            "Why positive": "",
            "Negative%": 0,
            "Why negative": "",
            "Top 3 discussed players": "",
            "Quotes (1/2/3)": "",
            "Outlook": ""
        })
        continue

    # sentiment
    for c in comments:
        c["sentiment"] = simple_sentiment(c["text"])
    df = pd.DataFrame(comments)
    counts = df["sentiment"].value_counts().to_dict()
    counts = {k: counts.get(k,0) for k in ["positive","negative","neutral"]}
    total = max(sum(counts.values()), 1)
    pct_pos = round(100*counts["positive"]/total)
    pct_neg = round(100*counts["negative"]/total)

    # quotes 1/2/3
    top3 = df.sort_values("score", ascending=False).head(3)
    quotes = top3["text"].tolist()
    quotes_ranked = [f"{i+1}. {q[:220]}" for i, q in enumerate(quotes)]

    # candidate player names (capitalized bigrams then unigrams)
    text_all = " ".join(df["text"].tolist())
    bigrams = re.findall(r"\b([A-Z][a-z]{2,}\s+[A-Z][a-z]{2,})\b", text_all)
    unis   = re.findall(r"\b([A-Z][a-z]{2,})\b", text_all)
    # filter: remove common words & club parts
    stop_like = set("""
The This That They Fans Team Club Match Game Coach Manager VAR Ref Referee Europe Reddit Today Yesterday Tomorrow He She They We You Why
""".split())
    club_parts = set(re.findall(r"[A-Za-z]+", club))
    candidates = []
    for s in bigrams + unis:
        base = s.strip()
        if len(base) <= 2 or base in stop_like or base in club_parts:
            continue
        candidates.append(base)
    cand_sorted = pd.Series(candidates).value_counts().index.tolist() if candidates else []
    players3 = gpt_players_only(club, cand_sorted[:40], " ".join(quotes + df["text"].head(60).tolist()))

    # reasons + outlook (no clich√©s)
    reasons = gpt_reasons_and_outlook(club, quotes, counts)

    rows.append({
        "Club": club,
        "Recent result": recent_result,
        "Positive%": pct_pos,
        "Why positive": reasons.get("positive_why",""),
        "Negative%": pct_neg,
        "Why negative": reasons.get("negative_why",""),
        "Top 3 discussed players": ", ".join(players3),
        "Quotes (1/2/3)": " | ".join(quotes_ranked),
        "Outlook": reasons.get("outlook","")
    })
    print(" done.")

# ‚îÄ‚îÄ Output
result_df = pd.DataFrame(rows).sort_values(["Positive%","Negative%"], ascending=[False,True]).reset_index(drop=True)
ts = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M UTC")
display(HTML(f"<h3>FanPulse EPL ‚Äî Final Interview-Ready (Rechecked)</h3><p>Generated {ts}</p>"))
display(result_df)

csv_path = "/content/fanpulse_epl_final.csv"
result_df.to_csv(csv_path, index=False)

print("\n‚úÖ Done! Download from the left Files panel:")
print("‚Ä¢ fanpulse_epl_final.csv")


[?25l   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m0.0/189.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m189.3/189.3 kB[0m [31m6.6 MB/s[0m eta [36m0:00:00[0m
[?25hPaste your OpenAI API key (starts with 'sk-'): ¬∑¬∑¬∑¬∑¬∑¬∑¬∑¬∑¬∑¬∑
Reddit client_id: ¬∑¬∑¬∑¬∑¬∑¬∑¬∑¬∑¬∑¬∑
Reddit client_secret: ¬∑¬∑¬∑¬∑¬∑¬∑¬∑¬∑¬∑¬∑
üîé Collecting fan comments & recent results (last 72h)...

‚Ä¢ Arsenal: scanning‚Ä¶ done.
‚Ä¢ Aston Villa: scanning‚Ä¶ done.
‚Ä¢ Bournemouth: scanning‚Ä¶ done.
‚Ä¢ Brentford: scanning‚Ä¶ done.
‚Ä¢ Brighton & Hove Albion: scanning‚Ä¶ done.
‚Ä¢ Burnley: scanning‚Ä¶ done.
‚Ä¢ Chelsea: scanning‚Ä¶ done.
‚Ä¢ Crystal Palace: scanning‚Ä¶ done.
‚Ä¢ Everton: scanning‚Ä¶ done.
‚Ä¢ Fulham: scanning‚Ä¶ done.
‚Ä¢ Leeds United: scanning‚Ä¶ done.
‚Ä¢ Liverpool

Unnamed: 0,Club,Recent result,Positive%,Why positive,Negative%,Why negative,Top 3 discussed players,Quotes (1/2/3),Outlook
0,West Ham United,Win 3-2 vs Burnley,37,Tommy's bravery in addressing issues unites th...,9,Central's performance is seen as an embarrassm...,,1. didn‚Äôt think i could love the man any more....,Team spirit is strong despite individual criti...
1,Manchester City,Win 3-0 vs Liverpool,35,"Best sequence ever seen, team in a good spot.",16,Upcoming match against Arsenal will be tougher.,,1. That was one of the best sequences I‚Äôve eve...,Concerns over Liverpool's pressing tactics.
2,Aston Villa,Win 4-0 vs Bournemouth,33,"Supporting Villa isn‚Äôt a choice, it‚Äôs a calling.",6,AI posts infesting football subs mislead fans.,,1. How do people not realise these posts are A...,Relatable global fan experience highlights ded...
3,Liverpool,Loss 0-3 vs Man City,32,Winning brings joy and celebration.,21,Inability to defend titles leads to disappoint...,,1. All this tells me is we party the most when...,Uncertain future with inconsistent performance.
4,Bournemouth,Loss 0-4 vs Aston Villa,32,Jimenez showed great attacking potential.,22,Small individual errors led to capitulation.,,"1. I love this club, but today's result is an ...",International break needed for recovery.
5,Newcastle United,Loss 1-3 vs Brentford,32,Wolte's strengths can be better utilized with ...,27,Injuries to fullbacks and underperforming wing...,,1. Few disjointed thoughts: - We haven't figur...,Need to adapt tactics to cope without Isak.
6,Chelsea,Win 3-0 vs Wolves,31,Engagement in merchandise sales shows fan inte...,22,Lack of knowledge about player backgrounds ind...,,1. I‚Äôm sure someone would love to buy your fak...,Potential for improved fan education and merch...
7,Everton,Win 2-0 vs Fulham,29,"Delighted we've got him, the biggest legend si...",18,Suspected referee bias due to Remembrance Sund...,,1. Not sure it's a claim... It's an absolute c...,Confidence in the team's legacy and leadership.
8,Arsenal,Draw 2-2 vs Sunderland,29,Havertz's quick recovery from injury boosts sq...,20,Shithousery tactics undermine sportsmanship an...,,"1. Oh for Christ sake, it's just a bit of shit...",Focus on self-improvement is crucial for futur...
9,Manchester United,Draw 2-2 vs Tottenham Hotspur,29,Players are showing great form and confidence.,21,Concerns over potential injuries affecting key...,,1. ![gif](giphy|CAukyJ5esw36CyRllk|downsized) ...,Team needs to maintain momentum despite challe...



‚úÖ Done! Download from the left Files panel:
‚Ä¢ fanpulse_epl_final.csv
