In [None]:
import pandas as pd
from rapidfuzz import fuzz, process

# Load data
sentiment_df = pd.read_csv("sentiment_results.csv")
players_df = pd.read_csv("player_data.csv")

# Prepare player list
players_df['player_cleaned'] = players_df['player'].str.lower().str.strip()
player_list = players_df['player_cleaned'].dropna().unique()

sentiment_df['cleaned_text'] = sentiment_df['cleaned_text'].astype(str).str.lower()

# Fuzzy matching function
def fuzzy_match_players(text, player_list, threshold=93):
    matched = []
    for match, score, _ in process.extract(text, player_list, scorer=fuzz.partial_ratio, limit=10):
        if score >= threshold:
            matched.append(match)
    return list(set(matched)) if matched else None

# Apply fuzzy matching
sentiment_df['mentioned_players'] = sentiment_df['cleaned_text'].apply(
    lambda x: fuzzy_match_players(x, player_list)
)

# Explode to individual player rows
exploded_df = sentiment_df.explode('mentioned_players')
exploded_df = exploded_df.dropna(subset=['mentioned_players'])

# Merge competition info
merged_df = exploded_df.merge(
    players_df[['player_cleaned', 'comp', 'team']],
    left_on='mentioned_players',
    right_on='player_cleaned',
    how='left'
).rename(columns={'comp': 'Competition', 'team': 'Team'})

# Save with player and competition info
merged_df.to_csv("sentiment_with_players_fuzzy.csv", index=False)
print(merged_df[['cleaned_text', 'final_sentiment', 'mentioned_players', 'Competition']].head(10))


✅ Fuzzy matching + competition info saved to 'sentiment_with_players_fuzzy.csv'
                                        cleaned_text final_sentiment  \
0  in an lbw it is either hitting the stumps or n...         neutral   
1  like rinku ashutosh shashank sharukh khan nama...         neutral   
2  like rinku ashutosh shashank sharukh khan nama...         neutral   
3  like rinku ashutosh shashank sharukh khan nama...         neutral   
4  i have not seen his performance in half of the...        negative   
5  i have not seen his performance in half of the...        negative   
6  i have not seen his performance in half of the...        negative   
7  what happened today tewatia duck so he is proa...         neutral   
8  why did kkr management let go of indias ct2025...        positive   
9  why did kkr management let go of indias ct2025...        positive   

  mentioned_players              Competition  
0                om           col_c_k_nayudu  
1          shashank           col