#### Merge sentiment analysis result of each player to the final data after prediction

In [8]:
import pandas as pd
import numpy as np
import plotly.express as px


sentiment_df = pd.read_csv('sentiment_result_raw.csv')
sentiment_df['mentioned_players'] = sentiment_df['mentioned_players'].str.lower()
sentiment_df = (
    pd.to_datetime(sentiment_df['created_date'])
    .pipe(lambda x: sentiment_df.assign(created_date=x))
    .query('created_date.dt.year >= 2024')
    .groupby('mentioned_players')
    .agg(SENTIMENT=('sentiment_result', 'mean'))
    .reset_index()
)

sentiment_df

Unnamed: 0,mentioned_players,SENTIMENT
0,aaron ekblad,-0.600000
1,adam larsson,0.000000
2,adam ruzicka,-1.000000
3,andrew peeke,0.000000
4,artemi panarin,0.000000
...,...,...
79,william eklund,1.000000
80,yakov trenin,0.000000
81,yanni gourde,0.000000
82,zach benson,-0.200000


In [3]:
final_df = pd.read_csv('front_end_data.csv')


final_df.head()

Unnamed: 0,TEAM,PLAYER,POSITION,GP,TOI/GP,TOI,IPP,GOALS,TOTAL ASSISTS,FIRST ASSISTS,...,BASE SALARY,S.BONUS,P.BONUS,SEASON,SALARY CAP,SALARY CAP PERCENTAGE,DECEASED,PRED_SALARY_PERCENTAGE,PRED/ACTUAL,TEAM_FULL_NAMES
0,chi,adam burish,Right Wing,81,11.748354,951.616667,40.0,4,4,3,...,500000,75000,0,2007-08,50300000,0.011431,0,0.01401,1.225597,Chicago Blackhawks
1,pit,adam hall,Right Wing,46,11.872826,546.15,75.0,2,4,1,...,525000,0,0,2007-08,50300000,0.010437,0,0.015781,1.512001,Pittsburgh Penguins
2,edm,ales hemsky,Right Wing,74,18.574099,1374.483333,81.61,20,51,36,...,3600000,0,0,2007-08,50300000,0.081511,0,0.073371,0.900138,Edmonton Oilers
3,buf,ales kotalik,Right Wing,79,15.342405,1212.05,67.19,23,20,7,...,2500000,0,0,2007-08,50300000,0.046388,0,0.04505,0.971143,Buffalo Sabres
4,pit,alex goligoski,Defence,3,13.938889,41.816667,100.0,0,2,1,...,500000,295300,188900,2007-08,50300000,0.016805,0,0.034137,2.031335,Pittsburgh Penguins


In [4]:
merged_df = pd.merge(
    final_df,
    sentiment_df,
    left_on='PLAYER',
    right_on='mentioned_players',
    how='left'
)
merged_df.drop(columns=['mentioned_players'], inplace=True)
merged_df.loc[merged_df['SEASON'] != '2023-24', 'SENTIMENT'] = np.nan

merged_df.to_csv('front_end_data_with_sentiment.csv', index=False)

In [5]:
filtered_df = merged_df[['PLAYER', 'SEASON', 'AAV', 'PRED/ACTUAL', 'SENTIMENT']]
filtered_df = filtered_df.query('SEASON == "2023-24"')
filtered_df = filtered_df.sort_values(by='SENTIMENT', ascending=False)
filtered_df = filtered_df[filtered_df['SENTIMENT'].notna()]

filtered_df

Unnamed: 0,PLAYER,SEASON,AAV,PRED/ACTUAL,SENTIMENT
11763,jordan martinook,2023-24,1800000,1.728688,1.000
11570,connor mcdavid,2023-24,12500000,0.801139,1.000
11672,jack hughes,2023-24,8000000,1.336866,1.000
11521,brock faber,2023-24,1008333,2.663592,1.000
11507,brendan lemieux,2023-24,800000,1.642169,1.000
...,...,...,...,...,...
11920,nick cousins,2023-24,1100000,2.155138,-0.775
11872,matthew tkachuk,2023-24,9500000,0.991646,-0.800
12076,tom wilson,2023-24,5166667,0.909788,-0.850
11484,boone jenner,2023-24,3750000,1.393707,-1.000


In [6]:
fig = px.scatter(
    filtered_df,
    y='SENTIMENT',
    x='PRED/ACTUAL',
    title='Scatter Plot of SENTIMENT vs. PRED/ACTUAL'
)

fig.update_layout(
    yaxis_title='SENTIMENT',
    xaxis_title='PRED/ACTUAL'
)
fig.update_layout(width=600, height=600, showlegend=False)
fig.update_xaxes(range=[0, 2])
fig.show()