In [193]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
import statsmodels.formula.api as smf
import seaborn as sns
import gc
import os
import datetime
from scipy.stats import poisson
from scipy.stats import binom


# Import Data

In [159]:
soccerResults = pd.read_csv("results.csv")
soccerResults['date'] = pd.to_datetime(soccerResults['date'])

In [160]:
soccerResults['year'] = soccerResults['date'].dt.year

In [203]:
matches = soccerResults[soccerResults['date'] > '2018-01-01']

In [204]:
matches.head()

Unnamed: 0,date,home_team,away_team,home_score,away_score,tournament,city,country,neutral,year
39555,2018-01-02,Iraq,United Arab Emirates,0,0,Gulf Cup,Kuwait City,Kuwait,True,2018
39556,2018-01-02,Oman,Bahrain,1,0,Gulf Cup,Kuwait City,Kuwait,True,2018
39557,2018-01-05,Oman,United Arab Emirates,0,0,Gulf Cup,Kuwait City,Kuwait,True,2018
39558,2018-01-07,Estonia,Sweden,1,1,Friendly,Abu Dhabi,United Arab Emirates,True,2018
39559,2018-01-11,Denmark,Sweden,0,1,Friendly,Abu Dhabi,United Arab Emirates,True,2018


# Clean Data

In [205]:
to_drop = ['Italy','Turkey','Switzerland','Wales',
           'Belgium','Russia','Denmark','Finland',
           'Netherlands','Austria','Ukraine','Macedonia',
          'England','Croatia','Scotland','Czech Republic',
          'Spain','Sweden','Poland','Slovakia',
          'France','Portugal','Germany','Hungary']

In [206]:
groupa = matches[matches.home_team.isin(to_drop)]
groupaAway = matches[matches.away_team.isin(to_drop)]

In [207]:
groupa.head()

Unnamed: 0,date,home_team,away_team,home_score,away_score,tournament,city,country,neutral,year
39559,2018-01-11,Denmark,Sweden,0,1,Friendly,Abu Dhabi,United Arab Emirates,True,2018
39619,2018-03-22,Denmark,Panama,1,0,Friendly,Brøndby,Denmark,False,2018
39628,2018-03-22,Slovakia,United Arab Emirates,2,1,Friendly,Bangkok,Thailand,True,2018
39630,2018-03-23,Austria,Slovenia,3,0,Friendly,Klagenfurt,Austria,False,2018
39635,2018-03-23,Finland,North Macedonia,0,0,Friendly,Belek,Turkey,True,2018


# Teams to Check

In [238]:
Team1Name = "Italy"
Team2Name = "Spain"

# Functions


In [259]:
def getAttack(Team):
    teamAverage = groupa[groupa['home_team'] == Team]
    teamAwayAverage = groupaAway[groupaAway['away_team'] == Team]  
    teamAttTotal = (teamAverage['home_score'].mean() + teamAwayAverage['away_score'].mean()) / 2
    return teamAttTotal

def getDef(Team):
    teamAverage = groupa[groupa['home_team'] == Team]
    teamAwayAverage = groupaAway[groupaAway['away_team'] == Team]
    teamDefTotal = (teamAverage['away_score'].mean() + teamAwayAverage['home_score'].mean()) / 2
    return teamDefTotal

# Main Code

In [264]:
GroupAverage = groupa['home_score'].mean()
Group2Average = groupa['away_score'].mean()
GroupAwayAverage = groupaAway['away_score'].mean()
Group3Average = groupaAway['home_score'].mean()

GlobalAvg = (GroupAverage + Group2Average + GroupAwayAverage + Group3Average) / 4


GroupDef = groupa['away_score'].mean()
GroupAwayDef = groupaAway['home_score'].mean()
GroupAttTotal = (GroupAverage + GroupAwayAverage) / 2
GroupDefTotal = (GroupDef + GroupAwayDef) / 2

Team1Att = getAttack(Team1Name)
Team1Def = getDef(Team1Name)
Team2Att = getAttack(Team2Name)
Team2Def = getDef(Team2Name)


Team1ExG = Team1Att * Team2Def * GlobalAvg
Team2ExG = Team2Att * Team1Def * GlobalAvg

n = np.arange(0,6)
y2 = poisson.pmf(n,Team2ExG)
y = poisson.pmf(n,Team1ExG)


data = {Team1Name: y,
        Team2Name: y2}
  
df = pd.DataFrame(data)
cm = sns.light_palette("green", as_cmap=True)
s = df.style.background_gradient(cmap=cm)

###### Output

In [265]:
df.T

Unnamed: 0,0,1,2,3,4,5
Italy,0.076266,0.196273,0.252557,0.216654,0.139391,0.071745
Spain,0.16632,0.298352,0.267598,0.160009,0.071758,0.025744


Testing

###### Colour Scheme

In [267]:
s

Unnamed: 0,Italy,Spain
0,0.076266,0.16632
1,0.196273,0.298352
2,0.252557,0.267598
3,0.216654,0.160009
4,0.139391,0.071758
5,0.071745,0.025744
