In [39]:
def warn(*args, **kwargs):
    pass
import warnings
warnings.warn = warn

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sqlite3
import seaborn as sns

### Setting Up DB Connection ###
db = 'database.sqlite'
conn = sqlite3.connect(db)

matches = pd.read_sql("""SELECT c.name, l.name, m.season, 
            m.home_team_api_id, m.away_team_api_id, m.home_team_goal, m.away_team_goal
            FROM Match as m
            INNER JOIN Country as c
            ON m.country_id = c.id
            INNER JOIN League as l
            ON m.league_id = l.id
            WHERE c.name IN ('England', 'Germany', 'Italy', 'Spain')""", conn)

teams = pd.read_sql("""SELECT team_api_id, team_long_name
                        FROM Team""", conn)

for idx, row in matches.iterrows():
    if row['home_team_goal'] > row['away_team_goal']:
        matches.loc[idx, 'match_result'] = 'home team win'
    elif row['home_team_goal'] < row['away_team_goal']:
        matches.loc[idx, 'match_result'] = 'away team win'
    elif row['home_team_goal'] == row['away_team_goal']:
        matches.loc[idx, 'match_result'] = 'tie'
    for idx2, row2 in teams.iterrows():
        if row['home_team_api_id'] == row2['team_api_id']:
            matches.loc[idx, 'home_team_api_id'] = row2['team_long_name']
        elif row['away_team_api_id'] == row2['team_api_id']:
            matches.loc[idx, 'away_team_api_id'] = row2['team_long_name']

matches.columns = ['country', 'league', 'season', 'home_team', 'away_team', 'home_team_goal', 'away_team_goal', 'match_result']
matches.head(3)

Unnamed: 0,country,league,season,home_team,away_team,home_team_goal,away_team_goal,match_result
0,England,England Premier League,2008/2009,Manchester United,Newcastle United,1,1,tie
1,England,England Premier League,2008/2009,Arsenal,West Bromwich Albion,1,0,home team win
2,England,England Premier League,2008/2009,Sunderland,Liverpool,0,1,away team win


In [40]:
### Filling In Additional Info For Each Teams Results Over the Seasons ###

total_team_dfs = []
for league in list(matches.league.unique()):
    for season in list(matches.season.unique()):
        league_season = matches.loc[(matches['league'] == league) & (matches['season'] == season)]
        for team in list(league_season.home_team.unique()):
            team_df = league_season.loc[(league_season['home_team'] == team) | (league_season['away_team'] == team)]
            team_df['home_wins'] = 0
            team_df['away_wins'] = 0
            team_df['ties'] = 0
            team_df['home_losses'] = 0
            team_df['away_losses'] = 0
            team_df['goals_scored_home'] = np.nan
            team_df['goals_scored_away'] = np.nan
            team_df['goals_conceded_home'] = np.nan
            team_df['goals_conceded_away'] = np.nan
            team_df['team'] = team
            team_df['home_points'] = 0
            team_df['away_points'] = 0
            team_df['result'] = ''
            team_df['goal diff'] = 0
            for idx, row in team_df.iterrows():
                if row['home_team'] == team:
                    team_df.loc[idx, 'goals_scored_home'] = team_df.loc[idx, 'home_team_goal']
                    team_df.loc[idx, 'goals_conceded_home'] = team_df.loc[idx, 'away_team_goal']
                    team_df.loc[idx, 'goal_diff'] = team_df.loc[idx, 'home_team_goal'] - team_df.loc[idx, 'away_team_goal']
                elif row['away_team'] == team:
                    team_df.loc[idx, 'goals_scored_away'] = team_df.loc[idx, 'away_team_goal']
                    team_df.loc[idx, 'goals_conceded_away'] = team_df.loc[idx, 'home_team_goal']
                    team_df.loc[idx, 'goal_diff'] = team_df.loc[idx, 'away_team_goal'] - team_df.loc[idx, 'home_team_goal']
                if row['home_team'] == team and row['home_team_goal'] > row['away_team_goal']:
                    team_df.loc[idx, 'home_wins'] = 1
                    team_df.loc[idx, 'home_points'] = 3
                    team_df.loc[idx, 'result'] = 'home win'
                elif row['away_team'] == team and row['home_team_goal'] < row['away_team_goal']:
                    team_df.loc[idx, 'away_wins'] = 1
                    team_df.loc[idx, 'away_points'] = 3
                    team_df.loc[idx, 'result'] = 'away win'
                elif row['home_team'] == team and row['home_team_goal'] == row['away_team_goal']:
                    team_df.loc[idx, 'ties'] = 1
                    team_df.loc[idx, 'home_points'] = 1
                    team_df.loc[idx, 'result'] = 'home tie'
                elif row['away_team'] == team and row['home_team_goal'] == row['away_team_goal']:
                    team_df.loc[idx, 'ties'] = 1
                    team_df.loc[idx, 'away_points'] = 1
                    team_df.loc[idx, 'result'] = 'away tie'
                elif row['home_team'] == team and row['home_team_goal'] < row['away_team_goal']:
                    team_df.loc[idx, 'home_losses'] = 1
                    team_df.loc[idx, 'home_points'] = 0
                    team_df.loc[idx, 'result'] = 'home loss'
                elif row['away_team'] == team and row['home_team_goal'] > row['away_team_goal']:
                    team_df.loc[idx, 'away_losses'] = 1
                    team_df.loc[idx, 'away_points'] = 0
                    team_df.loc[idx, 'result'] = 'away loss'
                team_df['total_points'] = sum(team_df['home_points']) + sum(team_df['away_points'])
            total_team_dfs.append(team_df)
total_matches = pd.concat(total_team_dfs) 
total_matches.head(5)

Unnamed: 0,country,league,season,home_team,away_team,home_team_goal,away_team_goal,match_result,home_wins,away_wins,...,goals_scored_away,goals_conceded_home,goals_conceded_away,team,home_points,away_points,result,goal diff,goal_diff,total_points
0,England,England Premier League,2008/2009,Manchester United,Newcastle United,1,1,tie,0,0,...,,1.0,,Manchester United,1,0,home tie,0,0.0,90
10,England,England Premier League,2008/2009,Manchester United,West Ham United,2,0,home team win,1,0,...,,0.0,,Manchester United,3,0,home win,0,2.0,90
20,England,England Premier League,2008/2009,Manchester United,Hull City,4,3,home team win,1,0,...,,3.0,,Manchester United,3,0,home win,0,1.0,90
30,England,England Premier League,2008/2009,Arsenal,Manchester United,2,1,home team win,0,0,...,1.0,,2.0,Manchester United,0,0,away loss,0,-1.0,90
40,England,England Premier League,2008/2009,Manchester United,Stoke City,5,0,home team win,1,0,...,,0.0,,Manchester United,3,0,home win,0,5.0,90


In [41]:
### Calculating What Place Each Team Got in Each Season ###

import collections

total_league_results = []
for league in list(total_matches.league.unique()):
    for season in list(total_matches.season.unique()):
        league_season = total_matches.loc[(total_matches['league'] == league) & (total_matches['season'] == season)]
        points_dict = collections.defaultdict(list)
        for team in list(league_season.home_team.unique()):
            points_dict['team'].append(team)
            points_dict['season'].append(season)
            points_dict['total_points'].append(np.mean(league_season.loc[league_season['team'] == team]['total_points']))            
            points_dict['season_goal_diff'].append(np.sum(league_season.loc[league_season['team'] == team]['goal_diff']))
        points_df = pd.DataFrame(points_dict)
        points_df = points_df.sort_values(['total_points', 'season_goal_diff'], ascending=[False, False]).reset_index(drop=True)
        points_df['placement'] = points_df.index + 1
        total_league_results.append(points_df)
total_results = pd.concat(total_league_results)              
total_results.head(5)


Unnamed: 0,team,season,total_points,season_goal_diff,placement
0,Manchester United,2008/2009,90.0,44.0,1
1,Liverpool,2008/2009,86.0,50.0,2
2,Chelsea,2008/2009,83.0,44.0,3
3,Arsenal,2008/2009,72.0,31.0,4
4,Everton,2008/2009,63.0,18.0,5


In [42]:
### Merging with Total_Matches DF ###

total_matches = pd.merge(total_matches, total_results, how='inner', on=['team', 'season', 'total_points'])
total_matches.head(5)

Unnamed: 0,country,league,season,home_team,away_team,home_team_goal,away_team_goal,match_result,home_wins,away_wins,...,goals_conceded_away,team,home_points,away_points,result,goal diff,goal_diff,total_points,season_goal_diff,placement
0,England,England Premier League,2008/2009,Manchester United,Newcastle United,1,1,tie,0,0,...,,Manchester United,1,0,home tie,0,0.0,90,44.0,1
1,England,England Premier League,2008/2009,Manchester United,West Ham United,2,0,home team win,1,0,...,,Manchester United,3,0,home win,0,2.0,90,44.0,1
2,England,England Premier League,2008/2009,Manchester United,Hull City,4,3,home team win,1,0,...,,Manchester United,3,0,home win,0,1.0,90,44.0,1
3,England,England Premier League,2008/2009,Arsenal,Manchester United,2,1,home team win,0,0,...,2.0,Manchester United,0,0,away loss,0,-1.0,90,44.0,1
4,England,England Premier League,2008/2009,Manchester United,Stoke City,5,0,home team win,1,0,...,,Manchester United,3,0,home win,0,5.0,90,44.0,1


In [43]:
total_matches.to_csv('total_matches2.csv', index=False)