In [349]:
import pandas as pd
from scipy.stats import poisson

In [350]:
df_matches=pd.read_csv('data/clean_matches.csv')
df_result=pd.read_csv('data/clean_result.csv')

##### 1. CALCULAR STRENGTH TEAM

In [351]:
df_home = df_result[['Home', 'GF', 'GC']]
df_away = df_result[['Away', 'GF', 'GC']]   


In [352]:
df_home=df_home.rename(columns={'Home':'Team'})
df_away=df_away.rename(columns={'Away':'Team','GF':'GC','GC':'GF'})

In [353]:
df_home_str=df_home.groupby(['Team']).mean()
df_away_str=df_away.groupby(['Team']).mean()

In [354]:
df_teams=pd.concat([df_home,df_away],ignore_index=True).groupby(['Team']).mean()

#### 2. FUNCION DE PREDICCION

In [355]:
import random

def predict_points(home, away, df_h,df_a):
    if home in df_teams.index and away in df_teams.index:
        # goals_scored * goals_conceded
        lamb_home = df_h.at[home,'GF'] * df_a.at[away,'GC']
        lamb_away = df_a.at[away,'GF'] * df_h.at[home,'GC']
        
        # Introduce un grado de impredecibilidad
        uncertainty = random.uniform(0.8, 1.2)  # Rango de impredecibilidad
        
        prob_home, prob_away, prob_draw = 0, 0, 0
        for x in range(0,11): #number of goals home team
            for y in range(0, 11): #number of goals away team
                p = poisson.pmf(x, lamb_home * uncertainty) * poisson.pmf(y, lamb_away * uncertainty)
                if x == y:
                    prob_draw += p
                elif x > y:
                    prob_home += p
                else:
                    prob_away += p
        
        points_home = 3 * prob_home + prob_draw
        points_away = 3 * prob_away + prob_draw
        return (points_home, points_away)
    else:
        return (0, 0)


#### 3. PROBANDO LA FUNCION

In [356]:
print(predict_points('Arsenal','Benfica',df_home_str,df_away_str))
print(predict_points('Benfica','Arsenal',df_home_str,df_away_str))

(2.8563127663199572, 0.002268898286390571)
(0.93466678782915, 1.8405880411658637)


In [357]:
predict_points('Porto','Real Madrid',df_home_str,df_away_str)

(1.527850791342096, 1.274494353508986)

#### 4. PREDICIENDO LA UCL

##### 4.1 CUARTOS DE FINAL

In [358]:
df_matches

Unnamed: 0,Home,Away,GF,GC
0,Arsenal,Bayern Munich,,
1,Bayern Munich,Arsenal,,
2,Atlético Madrid,Borussia Dortmund,,
3,Borussia Dortmund,Atlético Madrid,,
4,Real Madrid,Manchester City,,
5,Manchester City,Real Madrid,,
6,Paris Saint-Germain,Barcelona,,
7,Barcelona,Paris Saint-Germain,,
8,Winners of quarter-final 2,Winners of quarter-final 4,,
9,Winners of quarter-final 4,Winners of quarter-final 2,,


In [359]:
df_matches=df_matches.drop(['GF','GC'],axis=1)

In [360]:
df_quarters=df_matches[:8].copy()
df_semis=df_matches[8:12].copy()
df_final=df_matches[12:].copy()

In [361]:

def get_winner(df_fixture_updated):
    for index, row in df_fixture_updated.iterrows():
        home, away = row['Home'], row['Away']
        points_home, points_away = predict_points(home, away,df_home_str,df_away_str)
        if points_home > points_away:
            winner = home
        else:
            winner = away
        
        df_fixture_updated.loc[index, 'winner'] = winner
        df_fixture_updated.loc[index, 'points_home'] = points_home
        df_fixture_updated.loc[index, 'points_away'] = points_away

        

    return df_fixture_updated

In [362]:
get_winner(df_quarters)

Unnamed: 0,Home,Away,winner,points_home,points_away
0,Arsenal,Bayern Munich,Arsenal,2.800368,0.099771
1,Bayern Munich,Arsenal,Bayern Munich,2.390918,0.464161
2,Atlético Madrid,Borussia Dortmund,Atlético Madrid,2.466582,0.391686
3,Borussia Dortmund,Atlético Madrid,Borussia Dortmund,2.27104,0.491874
4,Real Madrid,Manchester City,Real Madrid,1.703619,1.120305
5,Manchester City,Real Madrid,Manchester City,2.157545,0.699714
6,Paris Saint-Germain,Barcelona,Paris Saint-Germain,2.689976,0.19253
7,Barcelona,Paris Saint-Germain,Barcelona,2.670891,0.034421


In [363]:
def total_points(home,away,df_quarters):
    valor_points_home1 = df_quarters.loc[df_quarters['Home'] == home, 'points_home'].values[0]
    valor_points_away1 = df_quarters.loc[df_quarters['Away'] == home, 'points_away'].values[0]
    valor_points_home2 = df_quarters.loc[df_quarters['Home'] == away, 'points_home'].values[0]
    valor_points_away2 = df_quarters.loc[df_quarters['Away'] == away, 'points_away'].values[0]
    total1=valor_points_away1+valor_points_home1
    total2=valor_points_away2+valor_points_home2
    if total1>total2:
        return home
    else: return away




In [364]:
df_quarters['match'] = ['quarter-final {}'.format(i//2 + 1) for i in range(len(df_quarters))]


In [365]:
df_quarters

Unnamed: 0,Home,Away,winner,points_home,points_away,match
0,Arsenal,Bayern Munich,Arsenal,2.800368,0.099771,quarter-final 1
1,Bayern Munich,Arsenal,Bayern Munich,2.390918,0.464161,quarter-final 1
2,Atlético Madrid,Borussia Dortmund,Atlético Madrid,2.466582,0.391686,quarter-final 2
3,Borussia Dortmund,Atlético Madrid,Borussia Dortmund,2.27104,0.491874,quarter-final 2
4,Real Madrid,Manchester City,Real Madrid,1.703619,1.120305,quarter-final 3
5,Manchester City,Real Madrid,Manchester City,2.157545,0.699714,quarter-final 3
6,Paris Saint-Germain,Barcelona,Paris Saint-Germain,2.689976,0.19253,quarter-final 4
7,Barcelona,Paris Saint-Germain,Barcelona,2.670891,0.034421,quarter-final 4


In [366]:
def update_matches(df_quarters,df_semis):
    for index,row in df_quarters.iterrows():
        home=df_quarters.loc[index,'Home']
        away=df_quarters.loc[index,'Away']
        winner=total_points(home,away,df_quarters)
        match=df_quarters.loc[index,'match']
        df_semis.replace({f'Winners of {match}':winner},inplace=True)
    df_semis['winner']='?'
    return df_semis
    


##### 4.2 SEMIFINALES

In [367]:
update_matches(df_quarters,df_semis)

Unnamed: 0,Home,Away,winner
8,Atlético Madrid,Barcelona,?
9,Barcelona,Atlético Madrid,?
10,Arsenal,Manchester City,?
11,Manchester City,Arsenal,?


In [368]:
df_semis['match'] = ['semi-final {}'.format(i//2 + 1) for i in range(len(df_semis))]


In [369]:
get_winner(df_semis)

Unnamed: 0,Home,Away,winner,match,points_home,points_away
8,Atlético Madrid,Barcelona,Atlético Madrid,semi-final 1,2.788889,0.14247
9,Barcelona,Atlético Madrid,Barcelona,semi-final 1,2.657256,0.249614
10,Arsenal,Manchester City,Arsenal,semi-final 2,2.94785,0.023443
11,Manchester City,Arsenal,Manchester City,semi-final 2,2.66912,0.24164


##### 4.3 FINAL

In [370]:
update_matches(df_semis,df_final)

Unnamed: 0,Home,Away,winner
12,Atlético Madrid,Arsenal,?


In [371]:

import random

def predict_points_final(home, away, uncertainty=0.1, unpredictability=0.1):
    if home in df_teams.index and away in df_teams.index:
        # goals_scored * goals_conceded
        lamb_home = df_teams.at[home,'GF'] * df_teams.at[away,'GC']
        lamb_away = df_teams.at[away,'GF'] * df_teams.at[home,'GC']
        prob_home, prob_away, prob_draw = 0, 0, 0
        for x in range(0,11): #number of goals home team
            for y in range(0, 11): #number of goals away team
                p = poisson.pmf(x, lamb_home) * poisson.pmf(y, lamb_away)
                if x == y:
                    prob_draw += p
                elif x > y:
                    prob_home += p
                else:
                    prob_away += p
        
        points_home = (3 * prob_home + prob_draw) * (1 - uncertainty) * random.uniform(1 - unpredictability, 1 + unpredictability)
        points_away = (3 * prob_away + prob_draw) * (1 - uncertainty) * random.uniform(1 - unpredictability, 1 + unpredictability)
        return (points_home, points_away)
    else:
        return (0, 0)


In [378]:
predict_points_final('Atlético Madrid','Arsenal')

(0.9570101815021834, 1.7449327111130646)

In [379]:
def get_winner_final(df_fixture_updated):
    for index, row in df_fixture_updated.iterrows():
        home, away = row['Home'], row['Away']
        points_home, points_away = predict_points_final(home, away)
        if points_home > points_away:
            winner = home
        else:
            winner = away
        df_fixture_updated.loc[index, 'winner'] = winner
    return df_fixture_updated

In [380]:
get_winner_final(df_final)

Unnamed: 0,Home,Away,winner
12,Atlético Madrid,Arsenal,Arsenal
