In [None]:
## we will now read the socials information one at a time into a massive dataframe
import pandas as pd
df_social_A = pd.read_csv("./socials/socialsA.csv")
df_social_B = pd.read_csv("./socials/socialsB.csv")

full_df = pd.concat([df_social_A, df_social_B], ignore_index=True)

In [26]:
import numpy as np

## NOTE: Do this when running for the first time!!
# nltk.download("vader_lexicon")

def sentiment_preprocessing(df, analyzer):
    df = df.copy()

    df['created_at'] = pd.to_datetime(df['created_at'])
    

    df["sentiment_score"] = df["text"].apply(lambda x: analyzer.polarity_scores(x)["compound"])


    iso = df['created_at'].dt.isocalendar()

    df['year'] = iso.year
    df['week'] = iso.week

    name_fix = {
        "TottenhamHotspur" : "Tottenham Hotspur",
        "ManchesterUnited" : "Manchester United",
        "ManchesterCity" : "Manchester City",
        "LeicesterCity" : "Leicester City",
        "CrystalPalace" : "Crystal Palace"
    }

    df['file_name'] = df['file_name'].replace(name_fix)

    weekly = (
        df.groupby(['file_name', 'year', 'week'])['sentiment_score'].mean().reset_index()
    )

    weekly['year_week'] = weekly['year'].astype(str) + "_" + weekly['week'].astype(str)

    pivot = weekly.pivot(index = 'file_name', columns = 'year_week', values='sentiment_score')
    pivot.columns.name = None
    pivot.index.name = "team"

    pivot = pivot.sort_index(axis=1)
    pivot = pivot.ffill(axis=1).fillna(0)

    pivot = pivot.reset_index()


    return pivot

## read through and select relevant features

# df_social_B[["created_at", "followers", "friends", "group_name", "retweet_count", "text"]]

In [27]:
import nltk
from nltk.sentiment import SentimentIntensityAnalyzer

sentiment_analyzer = SentimentIntensityAnalyzer()

sentiment_results = sentiment_preprocessing(full_df, sentiment_analyzer)

sentiment_results.to_csv("./statistics/social_media_sentiment_stats.csv")

In [28]:

sentiment_results = sentiment_results.rename(columns={"file_name": "team"})
display(sentiment_results)

Unnamed: 0,team,2020_28,2020_29,2020_30,2020_31,2020_32,2020_33,2020_34,2020_35,2020_36,2020_37,2020_38,2020_39,2020_40,2020_41,2020_42
0,AFC Bournemouth,0.132426,0.114013,0.006023,0.252295,0.166822,0.250492,0.276741,0.255104,0.301689,0.312591,0.272169,0.267155,0.267554,0.279272,0.245047
1,Arsenal,0.174264,0.250285,0.202555,0.198404,0.210337,0.208617,0.242181,0.250133,0.257678,0.25063,0.265198,0.212486,0.192358,0.212461,0.214333
2,Burnley,0.151131,0.143896,0.25593,0.133761,0.184745,0.248752,0.269961,0.242079,0.249182,0.232015,0.207714,0.155197,0.149289,0.120468,0.232769
3,Chelsea,0.152317,0.315625,0.323341,0.168225,0.130109,0.208693,0.257964,0.273278,0.221444,0.248696,0.195736,0.155815,0.178386,0.190232,0.199679
4,Crystal Palace,0.119248,0.179932,0.138541,0.267464,0.210985,0.186167,0.196119,0.197284,0.174867,0.209508,0.177076,0.122671,0.125287,0.159602,0.157186
5,Everton,0.159116,0.123707,0.233855,0.365441,0.233318,0.273398,0.230785,0.247912,0.231979,0.263118,0.254589,0.23514,0.208696,0.217194,0.181214
6,Leicester City,0.178605,0.215659,0.25812,0.238332,0.127657,0.187434,0.17394,0.16539,0.189722,0.206052,0.207725,0.168851,0.155196,0.215343,0.192172
7,Liverpool,0.199701,0.288547,0.338311,0.292169,0.242475,0.289387,0.285885,0.257003,0.270119,0.264089,0.268321,0.237461,0.255704,0.243548,0.250844
8,Manchester City,0.203529,0.122441,0.227011,0.211199,0.240409,0.201377,0.226647,0.184717,0.207438,0.21957,0.190298,0.1903,0.17951,0.167369,0.19389
9,Manchester United,0.277783,0.230277,0.171926,0.220971,0.263338,0.238069,0.178601,0.190403,0.271557,0.232547,0.167403,0.204486,0.188987,0.183161,0.215061


In [29]:
## read in the weekly dataframe
weekly_df = pd.read_csv("./standings/weekly.csv")


In [33]:
merged = weekly_df.merge(sentiment_results, on=['team'], suffixes=["_points", "_sentiment"])
merged

Unnamed: 0.1,Unnamed: 0,team,2019_32,2019_33,2019_34,2019_35,2019_37,2019_38,2019_39,2019_40,...,2020_33,2020_34,2020_35,2020_36,2020_37,2020_38,2020_39,2020_40,2020_41,2020_42
0,0,Arsenal,3.0,6.0,6.0,7.0,8.0,11.0,11.0,15.0,...,0.208617,0.242181,0.250133,0.257678,0.25063,0.265198,0.212486,0.192358,0.212461,0.214333
1,4,Burnley,3.0,3.0,4.0,4.0,5.0,8.0,9.0,12.0,...,0.248752,0.269961,0.242079,0.249182,0.232015,0.207714,0.155197,0.149289,0.120468,0.232769
2,5,Chelsea,0.0,1.0,4.0,5.0,8.0,8.0,11.0,14.0,...,0.208693,0.257964,0.273278,0.221444,0.248696,0.195736,0.155815,0.178386,0.190232,0.199679
3,6,Crystal Palace,1.0,1.0,4.0,7.0,7.0,8.0,11.0,14.0,...,0.186167,0.196119,0.197284,0.174867,0.209508,0.177076,0.122671,0.125287,0.159602,0.157186
4,7,Everton,1.0,4.0,4.0,7.0,7.0,7.0,7.0,7.0,...,0.273398,0.230785,0.247912,0.231979,0.263118,0.254589,0.23514,0.208696,0.217194,0.181214
5,8,Leicester City,1.0,2.0,5.0,8.0,8.0,11.0,14.0,14.0,...,0.187434,0.17394,0.16539,0.189722,0.206052,0.207725,0.168851,0.155196,0.215343,0.192172
6,9,Liverpool,3.0,6.0,9.0,12.0,15.0,18.0,21.0,24.0,...,0.289387,0.285885,0.257003,0.270119,0.264089,0.268321,0.237461,0.255704,0.243548,0.250844
7,10,Manchester City,3.0,4.0,7.0,10.0,10.0,13.0,16.0,16.0,...,0.201377,0.226647,0.184717,0.207438,0.21957,0.190298,0.1903,0.17951,0.167369,0.19389
8,15,Southampton,0.0,0.0,3.0,4.0,7.0,7.0,7.0,7.0,...,0.195133,0.199101,0.177115,0.273685,0.233267,0.22714,0.216359,0.2022,0.200931,0.188499
