In [3]:
import pandas as pd
from rapidfuzz import fuzz, process

# === Load files ===
sentiment_df = pd.read_csv("sentiment_results.csv")
players_df = pd.read_csv("player_data.csv")

# === Clean player list ===
player_list = players_df['player'].dropna().str.lower().str.strip().unique()

# === Preprocess cleaned text ===
sentiment_df['cleaned_text'] = sentiment_df['cleaned_text'].astype(str).str.lower()

# === ✅ Fuzzy Matching Function (insert it here) ===
def fuzzy_match_players(text, player_list, threshold=90):
    matched = []
    for match, score, _ in process.extract(text, player_list, scorer=fuzz.partial_ratio, limit=10):
        if score >= threshold:
            matched.append(match)
    return list(set(matched)) if matched else None

# === Apply fuzzy matching to each row ===
sentiment_df['mentioned_players'] = sentiment_df['cleaned_text'].apply(
    lambda x: fuzzy_match_players(x, player_list)
)

# === Save enriched output ===
sentiment_df.to_csv("sentiment_with_players_fuzzy.csv", index=False)
print("✅ Fuzzy player matching done! Saved to 'sentiment_with_players_fuzzy.csv'")

# === Preview sample rows ===
print(sentiment_df[['cleaned_text', 'final_sentiment', 'mentioned_players']].head(10))


✅ Fuzzy player matching done! Saved to 'sentiment_with_players_fuzzy.csv'
                                        cleaned_text final_sentiment  \
0  in an lbw it is either hitting the stumps or n...         neutral   
1  like rinku ashutosh shashank sharukh khan nama...         neutral   
2  kkr released gill for venky and gill scored 90...        positive   
3  i have not seen his performance in half of the...        negative   
4  what happened today tewatia duck so he is proa...         neutral   
5  why did kkr management let go of indias ct2025...        positive   
6  kishan v iyer pant maxwell livingstone and jfm...         neutral   
7  vs kkrcomparing with other teams looks bit shy...         neutral   
8  this post contains content not supported on ol...         neutral   
9  from the article the season for chennai super ...         neutral   

                                   mentioned_players  
0                                               [om]  
1              [shahruk