In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime,timedelta
import tqdm.notebook as tq
import matplotlib
# import os 
# os.environ["PATH"] += os.pathsep + '/Library/TeX/texbin'
# matplotlib.use("pgf")
# matplotlib.rcParams.update({
#     "pgf.texsystem": "pdflatex",
#     'font.family': 'serif',
#     'text.usetex': True,
#     'pgf.rcfonts': False,
# })

In [None]:
def get_sentiment_trend(teamA, teamB, time, hour):
    df_A = pd.read_csv(f'../data/comments/gameday/Sentiment/{teamA}_gameday_comments_sentiment.csv')
    df_B = pd.read_csv(f'../data/comments/gameday/Sentiment/{teamB}_gameday_comments_sentiment.csv')
    game_start_time = datetime.fromisoformat(time)
    start_hour = game_start_time.hour
    start_minute = game_start_time.minute
    end_time = game_start_time+timedelta(hours=hour)
    df_A['time'] = pd.to_datetime(df_A.time)
    df_A_game = df_A[df_A.time>=game_start_time]
    df_A_game = df_A_game[df_A_game.time<=end_time]
    df_B['time'] = pd.to_datetime(df_B.time)
    df_B_game = df_B[df_B.time>=game_start_time]
    df_B_game = df_B_game[df_B_game.time<=end_time]
    
    df_A_game['hour'] = df_A_game['time'].dt.hour
    df_A_game['minute'] = df_A_game['time'].dt.minute

    df_A_game['minute_after_game_start'] = 60*(df_A_game['hour']-start_hour)+df_A_game['minute']-start_minute
    df_A_game['minute_after_game_start_5'] = (df_A_game['minute_after_game_start']/5+1).astype(int)

    df_A_time_sentiment = (df_A_game.groupby(['minute_after_game_start_5'])['sentiment_score'].mean())
    df_A_time_count = (df_A_game.groupby(['minute_after_game_start_5'])['sentiment_score'].count())
    
    df_B_game['hour'] = df_B_game['time'].dt.hour
    df_B_game['minute'] = df_B_game['time'].dt.minute

    df_B_game['minute_after_game_start'] = 60*(df_B_game['hour']-start_hour)+df_B_game['minute']-start_minute
    df_B_game['minute_after_game_start_5'] = (df_B_game['minute_after_game_start']/5+1).astype(int)

    df_B_time_sentiment = (df_B_game.groupby(['minute_after_game_start_5'])['sentiment_score'].mean())
    df_B_time_count = (df_B_game.groupby(['minute_after_game_start_5'])['sentiment_score'].count())
    
    return df_A_time_sentiment,df_B_time_sentiment,df_A_time_count,df_B_time_count

In [None]:
A_time_sentiment,B_time_sentiment = get_sentiment_trend('MNC', 'TOT', '2020-11-21T16:30:00', 3)
plt.figure(figsize = (12,9))
plt.plot()
ax = plt.gca()

plt.grid(axis='y', alpha=0.75)

ax.plot(A_time_sentiment.index*5,A_time_sentiment,color='r',marker='o',label = 'sentiment_score_MNC')
ax.plot(B_time_sentiment.index*5,B_time_sentiment,color='b',marker='o',label = 'sentiment_score_TOT')

plt.xlabel('minute after game starts')
plt.ylabel('Average sentiment score')
plt.title('Average sentiment score trend over the game progress')
plt.legend()
plt.show()

In [None]:
A_time_sentiment,B_time_sentiment,_,_ = get_sentiment_trend('MNC', 'LEI', '2020-09-27T15:30:00', 3)

fig, ax = plt.subplots(1, 1)

fig.set_size_inches(6.4, 4.2)
ax = plt.gca()

plt.grid(axis='y', alpha=0.75)

ax.plot(A_time_sentiment.index*5,A_time_sentiment,color='r',marker='o',label = 'sentiment_score_MNC')
ax.plot(B_time_sentiment.index*5,B_time_sentiment,color='b',marker='o',label = 'sentiment_score_LEI')

ax.axvline(4)
ax.text(4, 0.4, "MNC goal", ha='left', wrap=True)
ax.axvline(37)
ax.text(37, 0.4, "LEI goal", ha='left', wrap=True)
ax.axvline(72)
ax.text(72, 0.4, "LEI goal", ha='left', wrap=True)
ax.axvline(76)
ax.text(76, 0.36, "LEI goal", ha='left', wrap=True)
ax.axvline(95)
ax.text(95, 0.4, "LEI goal", ha='left', wrap=True)
ax.axvline(102)
ax.text(102, 0.42, "MNC goal", ha='left', wrap=True)
ax.axvline(106)
ax.text(106, 0.36, "LEI goal", ha='left', wrap=True)

ax.set_xlabel('Minutes after game starts')
ax.set_ylabel('Comments\' average sentiment score')
ax.legend(loc='lower center', bbox_to_anchor=(0.5, 1))
plt.savefig('../../Final Report/Assets/MNC_LEI.pgf', bbox_inches='tight')

In [None]:
A_time_sentiment,B_time_sentiment = get_sentiment_trend('LIV', 'ARS', 3, 3)
fig, ax = plt.subplots(1, 1)

fig.set_size_inches(6.4, 4.8)
ax = plt.gca()

ax.grid(axis='y', alpha=0.75)

ax.plot(A_time_sentiment.index*5,A_time_sentiment,color='r',marker='o',label = 'sentiment_score_LIV')
ax.plot(B_time_sentiment.index*5,B_time_sentiment,color='b',marker='o',label = 'sentiment_score_ARS')

ax.axvline(25)
ax.text(25, 0.4, "ARS goal", ha='right', wrap=True)
ax.axvline(28)
ax.text(28, 0.36, "LIV goal", ha='left', wrap=True)
ax.axvline(34)
ax.text(34, 0.44, "LIV goal", ha='left', wrap=True)
ax.axvline(106)
ax.text(106, 0.4, "LIV goal", ha='left', wrap=True)

ax.set_xlabel('Minutes after game starts')
ax.set_ylabel('Comments\' average sentiment score')
ax.legend(loc='lower center', bbox_to_anchor=(0.5, 1))
plt.savefig('../../Final Report/Assets/LIV_ARS.pgf', bbox_inches='tight')

In [None]:
A_time_sentiment,B_time_sentiment,_,_ = get_sentiment_trend('MNU', 'LIV', '2021-05-13T19:15:00', 3)
fig, ax = plt.subplots(1, 1)

fig.set_size_inches(6.4, 4.2)
ax = plt.gca()

plt.grid(axis='y', alpha=0.75)

ax.plot(A_time_sentiment.index*5,A_time_sentiment,color='r',marker='o',label = 'sentiment_score_MNU')
ax.plot(B_time_sentiment.index*5,B_time_sentiment,color='b',marker='o',label = 'sentiment_score_LIV')

ax.axvline(9)
ax.text(9, 0.18, "MNU goal", ha='left', wrap=True)
ax.axvline(33)
ax.text(33, 0.22, "LIV goal", ha='right', wrap=True)
ax.axvline(45)
ax.text(45, 0.18, "LIV goal", ha='left', wrap=True)
ax.axvline(65)
ax.text(65, 0.22, "LIV goal", ha='left', wrap=True)
ax.axvline(86)
ax.text(86, 0.18, "MNU goal", ha='left', wrap=True)
ax.axvline(108)
ax.text(108, 0.22, "LIV goal", ha='left', wrap=True)

ax.set_xlabel('Minutes after game starts')
ax.set_ylabel('Comments\' average sentiment score')
ax.legend(loc='lower center', bbox_to_anchor=(0.5, 1))
#plt.savefig('../../Final Report/Assets/MNU_LIV.pgf', bbox_inches='tight')

In [None]:
df_A_time_sentiment,df_B_time_sentiment,df_A_count,df_B_count = get_sentiment_trend('TOT', 'LEI', '2021-05-23T15:00:00', 3)
print(list(df_A_time_sentiment))
print("---------------------")
print(list(df_B_time_sentiment))
print("---------------------")
print(list(df_A_count))
print("---------------------")
print(list(df_B_count))
print("---------------------")
print(df_A_time_sentiment.index*5)
plt.figure(figsize = (12,9))
plt.plot()
ax = plt.gca()

plt.grid(axis='y', alpha=0.75)

ax.plot(df_A_time_sentiment.index*5,df_A_time_sentiment,color='r',marker='o',label = 'sentiment_score_TOT')
ax.plot(df_B_time_sentiment.index*5,df_B_time_sentiment,color='b',marker='o',label = 'sentiment_score_LEI')
plt.axvline(18)
plt.text(18, 0.3, "LEI goal", ha='left', wrap=True)
plt.axvline(41)
plt.text(41, 0.3, "TOT goal", ha='left', wrap=True)
plt.axvline(71)
plt.text(71, 0.3, "LEI goal", ha='left', wrap=True)
plt.axvline(95)
plt.text(95, 0.3, "TOT goal", ha='right', wrap=True)
plt.axvline(105)
plt.text(105, 0.3, "TOT goal", ha='center', wrap=True)
plt.axvline(115)
plt.text(115, 0.3, "TOT goal", ha='left', wrap=True)
plt.xlabel('minute after game starts')
plt.ylabel('Average sentiment score')
plt.title('Average sentiment score trend over the game progress')
plt.legend()
plt.show()

In [None]:
event=pd.read_csv("../data/game_event.csv")
event.shape

In [None]:
event.team.value_counts()
data_single = event[['team','time']]
event_single = data_single.groupby(['team','time']).agg('mean').reset_index()

In [None]:
def get_team_sentiment_trend(teamA, time):
    df = pd.read_csv(f'../data/comments/gameday/Sentiment/{teamA}_gameday_comments_sentiment.csv')
    game_start_time = datetime.fromisoformat(time)
    start_hour = game_start_time.hour
    start_minute = game_start_time.minute
    start_time = game_start_time-timedelta(minutes=10)
    end_time = game_start_time+timedelta(minutes=125)
    df['time'] = pd.to_datetime(df.time)
    df_game = df[df.time>=start_time]
    df_game = df_game[df_game.time<=end_time]
    print(teamA)
    print(time)
    print(df_game.shape[0])
    
    df_game['hour'] = df_game['time'].dt.hour
    df_game['minute'] = df_game['time'].dt.minute

    df_game['minute_after_game_start'] = 60*(df_game['hour']-start_hour)+df_game['minute']-start_minute
    df_game['minute_after_game_start_5'] = (df_game['minute_after_game_start']/5+1).astype(int)

    df_time_sentiment = (df_game.groupby(['minute_after_game_start_5'])['sentiment_score'].mean())
    sentiment_dif = []
    for i in range(1,25):
        now = 0
        now_b1 = 0
        now_f1 = 0
        now_f2 = 0
        if i in df_time_sentiment.index:
            now = df_time_sentiment[i]
        if i+1 in df_time_sentiment.index:
            now_b1 = df_time_sentiment[i+1]
        if i-1 in df_time_sentiment.index:
            now_f1 = df_time_sentiment[i-1]
        if i-2 in df_time_sentiment.index:
            now_f2 = df_time_sentiment[i-2]
        sentiment_dif.append((now+now_b1-now_f1-now_f2)/2)
        
    return sentiment_dif
    
    #return df_game.shape[0]
    
    

In [None]:
sentiment_all = []
for i in tq.tqdm(range(event_single.shape[0])):
    sentiment = get_team_sentiment_trend(event_single.team[i], event_single.time[i])
    sentiment_all.append(sentiment)

flat_sentiment = [item for sublist in sentiment_all for item in sublist]
event = event.sort_values(by = ['team','time'])
event['sentiment_dif'] = flat_sentiment

In [None]:
event.to_csv('../data/match_event_sentiment.csv')

In [None]:
event['is_goal'] = event['goal']>0
event['is_opponent_goal'] = event['opponent_goal']>0
event.groupby(['is_goal','is_opponent_goal']).agg('mean')

In [None]:
sentiment_all = pd.DataFrame()
team = []
time = []
nums = []
for i in tq.tqdm(range(event_single.shape[0])):
    num = get_team_sentiment_trend(event_single.team[i], event_single.time[i])
    team.append(event_single.team[i])
    time.append(event_single.time[i])
    nums.append(num)

In [None]:
sentiment_all['team'] = team
sentiment_all['time'] = time
sentiment_all['nums'] = nums

In [None]:
pd.set_option('display.max_rows', None)
sentiment_all.loc[(sentiment_all.nums<50) & (sentiment_all.nums>0)]

In [None]:
def get_team_sentiment_trend(teamA, time, time_elapsed):
    df = pd.read_csv(f'../data/comments/gameday/Sentiment/{teamA}_gameday_comments_sentiment.csv')
    game_start_time = datetime.fromisoformat(time)
    event_time = game_start_time+timedelta(minutes=int(time_elapsed))
    start_time = event_time-timedelta(minutes=10)
    end_time = event_time+timedelta(minutes=10)
    df['time'] = pd.to_datetime(df.time)
    df_before = df[df.time<event_time]
    df_before = df_before[df_before.time>=start_time]
    df_after = df[df.time>=event_time]
    df_after = df_after[df_after.time<end_time]
    
    df_before_non_neg = df_before.loc[df_before.sentiment_score>=-0.05]
    df_after_non_neg = df_after.loc[df_after.sentiment_score>=-0.05]
    
    return df_after_non_neg.shape[0],df_after.shape[0],df_before_non_neg.shape[0],df_before.shape[0]
    #sentiment_dif = (df_after_non_neg.shape[0]/df_after.shape[0])-(df_before_non_neg.shape[0]/df_before.shape[0])
    #return sentiment_dif

In [None]:
event = pd.read_csv("../data/event_top8.csv")
event

In [None]:
before1 = []
before = []
after1 = []
after = []
for i in tq.tqdm(range(event.shape[0])):
    n_noneg_after,n_after,n_noneg_before,n_before = get_team_sentiment_trend(event.team[i], event.match_time[i],event.time_elapsed[i])
    before1.append(n_noneg_before)
    before.append(n_before)
    after1.append(n_noneg_after)
    after.append(n_after)

In [None]:
event['before_comments'] = before
event['before_non_negative_comments'] = before1
event['non_neg_rate_before'] = event['before_non_negative_comments']/event['before_comments']
event['after_comments'] = after
event['after_non_negative_comments'] = after1
event['non_neg_rate_after'] = event['after_non_negative_comments']/event['after_comments']

In [None]:
event['comments_difference'] = event['after_comments']-event['before_comments']

In [None]:
event.to_csv('../data/events_top8_sentiment_5min.csv',index=False)

In [None]:
event.groupby(['goal_difference','goal','opponent_goal'])['comments_difference'].mean()

event.groupby(['goal_difference','goal'])['sentiment_changes'].mean()

In [None]:
event_tie = event.loc[event.goal_difference==0]
event_tie['sentiment_changes'] = event_tie['sentiment_changes']*100
event_tie.to_csv('../data/events_top8_sentiment_tie_10x.csv',index=False)

In [None]:
event_up1 = event.loc[event.goal_difference==1]
event_up1.to_csv('../data/events_top8_sentiment_up1.csv',index=False)

In [None]:
event_down1 = event.loc[event.goal_difference==-1]
event_down1.to_csv('../data/events_top8_sentiment_down1.csv',index=False)

In [None]:
df = pd.read_csv('../data/match_event_sentiment.csv')
df

In [None]:
df = df.drop(columns = ['Unnamed: 0'])
df['is_goal'] = df['goal']>0
df['is_opponent_goal'] = df['opponent_goal']>0

df.groupby(['goal_difference','is_goal','is_opponent_goal'])['sentiment_dif'].mean()

In [None]:
df = pd.read_csv('../data/events_top8_sentiment.csv')

In [None]:
df.groupby(['goal_difference','goal','opponent_goal'])['sentiment_changes'].mean()

df_team = df.loc[df.team=='LIV']
df_team.groupby(['goal_difference','goal','opponent_goal'])['sentiment_changes'].mean()

df.groupby(['team','match_time'])['sentiment_changes'].mean()

In [None]:
df = pd.read_csv('../data/events_top8_sentiment_5min.csv')