In [1]:
import pandas as pd
import pyreadr
import plotly.graph_objects as go
import plotly.express as px

In [2]:
df = pyreadr.read_r("f_data_sm.rda")["f_data_sm"]

### Preprocessing

In [4]:
df = df.rename(columns={'FTSC': 'SCORE',
                   'FTR': 'WON',
                   'FTHG': 'HOME_GOALS',
                   'FTAG': 'AWAY_GOALS',
                   'FTTG': 'TOTAL_GOALS'})

In [6]:
df = df.drop(['H', 'D', 'A'], axis = 1)
df = df.dropna()

In [8]:
df = df.astype({'SEASON': 'int',
          'HOME_GOALS': 'int',
          'AWAY_GOALS': 'int',
          'TOTAL_GOALS': 'int'})

### Picking only top 4 leagues

In [28]:
df["COUNTRY"].unique()

array(['England', 'Germany', 'Spain', 'Italy'], dtype=object)

In [26]:
df = df[df["COUNTRY"].isin(["England", "Germany", "Spain", "Italy"])]

In [27]:
df.head()

Unnamed: 0,SEASON,COUNTRY,LEAGUE,DATE,HOMETEAM,AWAYTEAM,SCORE,WON,HOME_GOALS,AWAY_GOALS,TOTAL_GOALS
0,1994,England,Premier League,1993-08-14,Arsenal,Coventry,0-3,A,0,3,3
1,1994,England,Premier League,1993-08-14,Aston Villa,QPR,4-1,H,4,1,5
2,1994,England,Premier League,1993-08-14,Chelsea,Blackburn,1-2,A,1,2,3
3,1994,England,Premier League,1993-08-14,Liverpool,Sheffield Weds,2-0,H,2,0,2
4,1994,England,Premier League,1993-08-14,Man City,Leeds,1-1,D,1,1,2


# 1. Total goals scored in a league over seasons

In [59]:
grouped1 = df.groupby(['SEASON', 'LEAGUE']).sum()['TOTAL_GOALS'].reset_index()

In [60]:
grouped1.head()

Unnamed: 0,SEASON,LEAGUE,TOTAL_GOALS
0,1994,Bundesliga 1,895
1,1994,La Liga Primera Division,988
2,1994,Premier League,1195
3,1994,Serie A,741
4,1995,Bundesliga 1,918


In [199]:
def total_goals_per_season(plot_type):
    if plot_type == 'Line':
        fig = px.line(grouped1, x="SEASON", y="TOTAL_GOALS", color='LEAGUE',
                    title = 'Total Goals Scored in a Season',
                      labels={"SEASON": "Season",  "TOTAL_GOALS": "Total goals scored in a season",
                              "LEAGUE": "League"}
                     )
    elif plot_type == 'Bar':
        fig = px.bar(grouped1, x="SEASON", y="TOTAL_GOALS", color='LEAGUE',
                    title = 'Total Goals Scored in a Season',
                      labels={"SEASON": "Season",  "TOTAL_GOALS": "Total goals scored in a season",
                              "LEAGUE": "League"}
                     )
    else:
        raise ValueError("plot_type must be 'Line' or 'Bar'")
    fig.show()

In [200]:
total_goals_per_season('Bar')

# 2. Home goals and away goals scored for a given team over seasons

In [91]:
grouped2_home = df.groupby(['SEASON', 'HOMETEAM']).sum()['HOME_GOALS']
grouped2_away = df.groupby(['SEASON', 'AWAYTEAM']).sum()['AWAY_GOALS']

In [106]:
grouped2 = pd.concat([grouped2_home, grouped2_away], axis=1, join="inner").reset_index().\
    rename(columns = {'HOMETEAM': 'TEAM'})

In [127]:
def home_and_away_goals_scored_per_season(team):
    data = grouped2[grouped2.TEAM == team]
    fig = go.Figure()
    fig.add_trace(go.Scatter(x = data.SEASON, y = data.HOME_GOALS, name = 'Home goals'))
    fig.add_trace(go.Scatter(x = data.SEASON, y = data.AWAY_GOALS, name = 'Away goals'))
    fig.update_layout(title='Total home and away goals scored in a season by {}'.format(team),
                   xaxis_title='Season',
                   yaxis_title='Goals')
    fig.show()

In [128]:
home_and_away_goals_scored_per_season('Bayern Munich')

# 3. For the given team the proportion of wins, loses and draws playing home and away

In [213]:
def home_away_results(team):
    hometeam_data = df[df.HOMETEAM == team]
    hometeam_grouped = hometeam_data.groupby('SEASON').WON.value_counts().unstack()
    home_fig = go.Figure([go.Bar(x = hometeam_grouped.D.index, y = hometeam_grouped.D, name = 'Draw'),
                go.Bar(x = hometeam_grouped.A.index, y = hometeam_grouped.A, name = 'Loss'),
                go.Bar(x = hometeam_grouped.H.index, y = hometeam_grouped.H, name = 'Win')])
    home_fig.update_layout(title = '{} home results'.format(team))
    
    awayteam_data = df[df.AWAYTEAM == team]
    awayteam_grouped = awayteam_data.groupby('SEASON').WON.value_counts().unstack()
    away_fig = go.Figure([go.Bar(x = awayteam_grouped.D.index, y = awayteam_grouped.D, name = 'Draw'),
                go.Bar(x = awayteam_grouped.H.index, y = awayteam_grouped.H, name = 'Loss'),
                go.Bar(x = awayteam_grouped.A.index, y = awayteam_grouped.A, name = 'Win')])
    away_fig.update_layout(title = '{} away results'.format(team))
    home_fig.show(), away_fig.show()

In [250]:
home_away_results('Barcelona')

#### Another way of visualising the same information, that allows to choose the season you want 

In [267]:
def home_away_results_by_season(team):
    home_data = df[df.HOMETEAM == team]
    home_fig = go.Figure()
    for year in home_data.SEASON.unique():
        if year == home_data.SEASON.unique().max():
            home_fig.add_trace(go.Histogram(x = home_data[home_data.SEASON == year].WON, name = str(year)))
        else:
            home_fig.add_trace(go.Histogram(x = home_data[home_data.SEASON == year].WON, name = str(year),
                                           visible='legendonly'))
    home_fig.update_layout(title = '{} home results'.format(team),
                          xaxis = dict(tickvals = ['H', 'A', 'D'],
                                       ticktext = ['Win', 'Loss', 'Draw']))
    
    
    away_data = df[df.AWAYTEAM == team]
    away_fig = go.Figure()
    for year in away_data.SEASON.unique():
        if year == home_data.SEASON.unique().max():
            away_fig.add_trace(go.Histogram(x = away_data[away_data.SEASON == year].WON, name = str(year)))
        else:
            away_fig.add_trace(go.Histogram(x = away_data[away_data.SEASON == year].WON, name = str(year),
                                           visible = 'legendonly'))
    away_fig.update_layout(title = '{} away results'.format(team),
                          xaxis = dict(tickvals = ['D', 'A', 'H'],
                                       ticktext = ['Draw', 'Win', 'Loss']))
    home_fig.show() , away_fig.show()

In [268]:
home_away_results_by_season('Barcelona')

# 4. For the given country visualising the champions for all seasons and their points

In [381]:
grouped4_home = df.groupby(['SEASON', 'COUNTRY', 'HOMETEAM']).WON.value_counts().unstack()
grouped4_home[grouped4_home.isna()] = 0
grouped4_away = df.groupby(['SEASON', 'COUNTRY', 'AWAYTEAM']).WON.value_counts().unstack()
grouped4_away[grouped4_away.isna()] = 0

grouped4_home['Points_home'] = grouped4_home.D + 3*grouped4_home.H
grouped4_away['Points_away'] = grouped4_away.D + 3*grouped4_away.A

grouped4 = pd.concat([grouped4_home.Points_home, grouped4_away.Points_away], axis = 1)
grouped4['Points'] = grouped4.Points_home + grouped4.Points_away
grouped4 = grouped4.drop(columns = ['Points_home', 'Points_away']).reset_index()
grouped4.Points = grouped4.Points.astype(int)
grouped4 = grouped4.rename(columns={'HOMETEAM': 'TEAM'})
grouped4 = grouped4[grouped4.SEASON >= 1996]

In [382]:
idx = grouped4.groupby(['SEASON', 'COUNTRY'])['Points'].transform(max) == grouped4['Points']
champions = grouped4[idx]

In [435]:
def show_champions():
    fig = px.line(x = champions.SEASON, y = champions.Points, text = champions.TEAM, color = champions.COUNTRY)
    fig.update_layout(title = 'Champions', legend_title = '')
    fig.update_xaxes(title_text = 'Season')
    fig.update_yaxes(title_text = 'Points')
    fig.show(config = {'scrollZoom' : True})

In [436]:
show_champions()