#### Sentiment analysis of players with more than 2 posts

In [38]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go


df = pd.read_csv('../sentiment_result_raw.csv')
df['created_date'] = pd.to_datetime(df['created_date'])
df = df.query('created_date.dt.year >= 2024')
df['mentioned_players'] = df['mentioned_players'].str.lower()
df = df.rename(columns={'sentiment_result': 'SENTIMENT'})
df

Unnamed: 0,_id,created_date,score,combined_text,mentioned_players,mentioned_players_count,SENTIMENT
0,6605c7cec120fd154e58655f,2024-03-28,0,"McDavid's case for the Hart trophy\nFirst off,...",johnny gaudreau,1,0.5
1,6605c7d07bcaf9812bb4a654,2024-03-27,1876,Sidney Crosby only made the NHL because his pa...,sidney crosby,1,-0.6
2,6605c7d07bcaf9812bb4a666,2024-03-27,51,"As requested by my Oilers fans, Here’s my late...",zach hyman,1,1.0
3,6605c7d2193c784b94e387e9,2024-03-27,0,Really obscure stat\nLogan Stanley played for ...,logan stanley,1,0.1
4,6605c7d2246827bb414386ae,2024-03-27,549,Jack Hughes buries the Leafs\n,jack hughes,1,1.0
...,...,...,...,...,...,...,...
162,6614dc8e9c4b5e9f0b6885f0,2024-04-07,155,Brady Tkachuk sets record for most hits in 1 g...,brady tkachuk,1,1.0
163,6614dc8e9b0968b447f2dfd5,2024-04-07,46,Juraj Slafkovsky spectacular pass\n,juraj slafkovsky,1,1.0
164,6614dc8f62576b69004bdbb3,2024-04-07,0,Evan Bouchard is the worst player in the leagu...,evan bouchard,1,-1.0
165,6614dc8f0668543a4f0419c3,2024-04-07,4782,Empty net etiquette expert Brady Tkachuk calml...,brady tkachuk,1,0.5


In [107]:
player_grouped_df = df.groupby('mentioned_players').agg(
    count=('SENTIMENT', 'size'),
    average_sentiment=('SENTIMENT', 'mean')
).reset_index().rename(columns={'mentioned_players': 'name', 'average_sentiment': 'SENTIMENT'}).sort_values(by='count', ascending=False)

player_grouped_df = player_grouped_df.query('count > 1')
player_grouped_df

Unnamed: 0,name,count,SENTIMENT
68,sidney crosby,14,0.407143
51,matt rempe,6,-0.1
5,auston matthews,5,0.86
57,nikita kucherov,4,0.275
55,nick cousins,4,-0.775
83,zach hyman,3,0.666667
64,ryan reaves,3,0.033333
8,brady tkachuk,3,0.833333
18,connor bedard,3,0.0
36,jeff skinner,2,-0.1


In [108]:
df_player = pd.read_csv('../front_end_data.csv')
# df_player
final_df = df_player.merge(player_grouped_df, left_on='PLAYER', right_on='name', how='left')
# final_df = final_df.drop(columns=['name'], inplace=True)
final_df = final_df.query('SEASON == "2023-24"')
final_df = final_df.dropna(subset=['SENTIMENT'])
final_df['PRED_AAV'] = final_df['PRED_AAV'].replace('[\$,]', '', regex=True).astype(float)

final_df

Unnamed: 0,TEAM,PLAYER,POSITION,GP,TOI/GP,TOI,IPP,GOALS,TOTAL ASSISTS,FIRST ASSISTS,...,SALARY CAP,SALARY CAP PERCENTAGE,DECEASED,PRED_AAV,PRED_SALARY_PERCENTAGE,PRED/ACTUAL,TEAM_FULL_NAMES,name,count,SENTIMENT
11406,fla,aaron ekblad,Defence,47,21.134397,993.316667,30.91,4,13,5,...,83500000,0.08982,0,6224222.0,0.074542,0.829896,Florida Panthers,aaron ekblad,2.0,-0.6
11473,tor,auston matthews,Center,67,21.082587,1412.533333,71.2,57,32,20,...,83500000,0.139404,0,11450554.0,0.137132,0.983703,Toronto Maple Leafs,auston matthews,5.0,0.86
11487,bos,brad marchand,Left Wing,71,19.28615,1369.316667,65.26,27,35,18,...,83500000,0.073353,0,6014291.0,0.072027,0.981925,Boston Bruins,brad marchand,2.0,0.0
11490,ott,brady tkachuk,Left Wing,67,18.978358,1271.55,69.51,31,26,21,...,83500000,0.098272,0,7887017.0,0.094455,0.961162,Ottawa Senators,brady tkachuk,3.0,0.833333
11503,stl,brayden schenn,Center,70,17.629286,1234.05,53.97,15,19,14,...,83500000,0.077844,0,5365586.0,0.064259,0.825475,St. Louis Blues,brayden schenn,2.0,0.4
11549,ott,claude giroux,Right Wing,68,20.020833,1361.416667,70.73,19,39,25,...,83500000,0.077844,0,6728253.0,0.080578,1.035116,Ottawa Senators,claude giroux,2.0,-0.1
11566,chi,connor bedard,Center,56,19.603274,1097.783333,81.82,21,33,24,...,83500000,0.011377,0,2578278.0,0.030878,2.713977,Chicago Blackhawks,connor bedard,3.0,0.0
11570,edm,connor mcdavid,Center,65,21.573333,1402.266667,78.32,26,86,54,...,83500000,0.149701,0,12157337.0,0.145597,0.972587,Edmonton Oilers,connor mcdavid,2.0,1.0
11630,edm,evan bouchard,Defence,67,23.006716,1541.45,47.52,15,52,22,...,83500000,0.046707,0,4086031.0,0.048935,1.0477,Edmonton Oilers,evan bouchard,2.0,-0.5
11634,"car, wsh",evgeny kuznetsov,Center,51,18.328758,934.766667,61.11,8,14,9,...,83500000,0.093413,0,7306411.0,0.087502,0.936719,"Carolina Hurricanes, Washington Capitals",evgeny kuznetsov,2.0,0.5


In [109]:
fig = px.scatter(
    final_df,
    x='AAV',
    y='SENTIMENT',
    title='Scatter Plot of SENTIMENT vs. SALARY',
    hover_name='PLAYER',
    trendline='ols',
    trendline_color_override='#e377c2'
)

trendline_trace = fig.data[-1]
trendline_trace.line.dash = 'dot'

fig.update_layout(
    xaxis_title='SALARY',
    yaxis_title='SENTIMENT',
    width=600,
    height=600,
    showlegend=True
)

fig.show()

correlation_coefficient = final_df['SENTIMENT'].corr(final_df['SALARY'])
print("Correlation Coefficient r = ", correlation_coefficient)

Correlation Coefficient r =  0.5060760657664793


In [110]:
fig = px.scatter(
    final_df,
    x='PRED_AAV',
    y='SENTIMENT',
    title='Scatter Plot of SENTIMENT vs. PREDICTED SALARY',
    hover_name='PLAYER',
    trendline='ols',
    trendline_color_override='#e377c2'
)

trendline_trace = fig.data[-1]
trendline_trace.line.dash = 'dot'

fig.update_layout(
    xaxis_title='PREDICTED SALARY',
    yaxis_title='SENTIMENT',
    width=600,
    height=600,
    showlegend=True
)

fig.show()

correlation_coefficient = final_df['SENTIMENT'].corr(final_df['PRED_AAV'])
print("Correlation Coefficient r = ", correlation_coefficient)

Correlation Coefficient r =  0.6779890896243749


In [114]:
fig = px.scatter(
    final_df,
    x='PRED/ACTUAL',
    y='SENTIMENT',
    title='Scatter Plot of SENTIMENT vs. SALARY',
    hover_name='PLAYER',
    size = 'AAV',
    color='PLAYER'
)

fig.update_layout(
    xaxis_title='PREDICTION / ACTUAL',
    yaxis_title='SENTIMENT',
    width=800,
    height=600,
    showlegend=True
)
fig.update_xaxes(range=[0, 2])

fig.show()