In [1]:
import pandas as pd

In [None]:
#change paths to the nice paths i used in latest projects

In [2]:
path_data = 'G:/My Drive/ML-AI/Projects/eurobasket_predictor/data/'

In [3]:
file='Matches.xlsx'

In [4]:
matches = pd.read_excel(path_data + file)


In [5]:
matches.head(2)

Unnamed: 0,date_match,team_1,score_team_1,team_2,score_team_2
0,801,Estonia,68,Lithuania,89
1,802,Iceland,61,Italy,87


In [6]:
classes = pd.read_excel(path_data + 'Teams Ranking Classes.xlsx')


In [7]:
classes.head(2)

Unnamed: 0,Team,Class
0,Belgium,Normal
1,Bosnia and Herzegovina,Normal


### Duplicate rows, make team and opponent cols

In [8]:
# Before calculating stats, transform the table to

#date_match, team, opponent, score_team, score_opponent (will have twice the number of rows)

In [9]:
matches.head(2)

Unnamed: 0,date_match,team_1,score_team_1,team_2,score_team_2
0,801,Estonia,68,Lithuania,89
1,802,Iceland,61,Italy,87


In [10]:
def get_df_two_rows_per_game(df):
    two_rows_per_game = pd.DataFrame(columns=['date_match','team','opponent','score_team','score_opponent'])
    for a_match in range(len(df)):
        # Adding team 1
        new_row = {"date_match": df.loc[a_match, 'date_match'], "team": df.loc[a_match, 'team_1'],
                   "opponent": df.loc[a_match, 'team_2'],"score_team": df.loc[a_match, 'score_team_1'],
                   "score_opponent": df.loc[a_match, 'score_team_2']}
        two_rows_per_game = pd.concat([two_rows_per_game, pd.DataFrame([new_row])], ignore_index=True)

        # Adding team 2
        new_row = {"date_match": df.loc[a_match, 'date_match'], "team": df.loc[a_match, 'team_2'],
                   "opponent": df.loc[a_match, 'team_1'],"score_team": df.loc[a_match, 'score_team_2'],
                   "score_opponent": df.loc[a_match, 'score_team_1']}
        two_rows_per_game = pd.concat([two_rows_per_game, pd.DataFrame([new_row])], ignore_index=True)

    return two_rows_per_game

In [11]:
df_two_rows_per_game = get_df_two_rows_per_game(matches)

### Add class

In [12]:
def add_classes(a_df, df_classes):
    df=a_df.copy()
    df["class_team"] = df["team"].map(df_classes.set_index("Team")["Class"])
    df["class_opponent"] = df["opponent"].map(df_classes.set_index("Team")["Class"])
    return df

In [13]:
df_with_classes = add_classes(df_two_rows_per_game, classes)
df_with_classes.head(3)

Unnamed: 0,date_match,team,opponent,score_team,score_opponent,class_team,class_opponent
0,801,Estonia,Lithuania,68,89,Normal,Top
1,801,Lithuania,Estonia,89,68,Top,Normal
2,802,Iceland,Italy,61,87,Normal,Top


### Calculate stats

In [14]:
df_with_classes.head(2)

Unnamed: 0,date_match,team,opponent,score_team,score_opponent,class_team,class_opponent
0,801,Estonia,Lithuania,68,89,Normal,Top
1,801,Lithuania,Estonia,89,68,Top,Normal


In [15]:
# Calculate stats of game

def add_winner_flags(a_df):
    df=a_df.copy()
    df['won'] = (df['score_team']>df['score_opponent']).astype(int)
    return df

def add_diff_points(a_df):
    df=a_df.copy()
    df['pts_diff']= df['score_team']-df['score_opponent']
    return df


In [16]:
df_with_classes = add_winner_flags(df_with_classes)
df_with_classes = add_diff_points(df_with_classes)

In [17]:
df_with_classes

Unnamed: 0,date_match,team,opponent,score_team,score_opponent,class_team,class_opponent,won,pts_diff
0,801,Estonia,Lithuania,68,89,Normal,Top,0,-21
1,801,Lithuania,Estonia,89,68,Top,Normal,1,21
2,802,Iceland,Italy,61,87,Normal,Top,0,-26
3,802,Italy,Iceland,87,61,Top,Normal,1,26
4,803,Iceland,Poland,90,92,Normal,Normal,0,-2
...,...,...,...,...,...,...,...,...,...
113,821,Spain,Germany,105,106,Top,Top,0,-1
114,821,Italy,Latvia,68,83,Top,Top,0,-15
115,821,Latvia,Italy,83,68,Top,Top,1,15
116,821,Serbia,Slovenia,106,72,Top,Top,1,34


### Aggregate per team

In [18]:
aggregated_df_raw = df_with_classes.groupby(['team',
                              'class_team',
                              'class_opponent']).agg({'won':['mean','count'],
                                                      'pts_diff': ['mean','count']}).reset_index()

In [19]:
aggregated_df_raw.columns = [
    "_".join([str(c) for c in col if c]) if isinstance(col, tuple) else col
    for col in aggregated_df_raw.columns
]


In [20]:
aggregated_df_raw.head(3)

Unnamed: 0,team,class_team,class_opponent,won_mean,won_count,pts_diff_mean,pts_diff_count
0,Argentina,Normal,Normal,1.0,1,14.0,1
1,Argentina,Normal,Top,0.0,1,-12.0,1
2,Belgium,Normal,Normal,0.25,4,-15.5,4


### Turn to wide

In [21]:
wide = (
    aggregated_df_raw.pivot_table(
        index=["team","class_team"] ,                               # rows
        columns=[ "class_opponent"],    # spread across columns
        values=["won_mean",  "pts_diff_mean", "won_count"]
    )
)

In [22]:
wide.columns = [
    f"{val}_{ct}" for val, ct in wide.columns
]
wide = wide.reset_index()
wide = wide.rename(columns={'won_count_Normal': 'count_Normal', 'won_count_Top': 'count_Top'})

#Reorder columns
wide= wide[['team', 'class_team', 'count_Top', 'count_Normal',	'won_mean_Top', 'won_mean_Normal', 'pts_diff_mean_Top', 'pts_diff_mean_Normal']]


### Drop argentina and spain b

In [23]:
wide=wide[~wide["team"].isin(["Argentina", "Spain B"])].reset_index(drop=True)


### Add row of averages (will make things easy and also for curiosity)

In [24]:
means = wide.drop(columns='team').groupby('class_team').mean().reset_index()
means

Unnamed: 0,class_team,count_Top,count_Normal,won_mean_Top,won_mean_Normal,pts_diff_mean_Top,pts_diff_mean_Normal
0,Normal,1.333333,3.428571,0.166667,0.472619,-17.708333,-1.184524
1,Top,3.4,2.125,0.528333,0.895833,1.146667,14.53125


In [25]:
means['team']='Average'

In [26]:
wide = pd.concat([wide, means]).reset_index(drop=True)

### Impute data

In [None]:
# We will add weighted values in this manner:

In [28]:
wide.tail(6)

Unnamed: 0,team,class_team,count_Top,count_Normal,won_mean_Top,won_mean_Normal,pts_diff_mean_Top,pts_diff_mean_Normal
20,Slovenia,Top,5.0,1.0,0.0,1.0,-18.4,12.0
21,Spain,Top,3.0,2.0,0.0,0.5,-4.666667,6.0
22,Sweden,Normal,,5.0,,0.2,,-3.4
23,Türkiye,Top,3.0,1.0,0.333333,1.0,-6.666667,14.0
24,Average,Normal,1.333333,3.428571,0.166667,0.472619,-17.708333,-1.184524
25,Average,Top,3.4,2.125,0.528333,0.895833,1.146667,14.53125


In [88]:
wide_weighted = wide.copy()


In [89]:
# Step 1: impute null values (assuming at least 1 game per team existed; otherwise predict the avg of slices)
# If won_mean_Top==null:
    # won_mean_Top = 0, count_Top = 1
# If won_mean_Normal==null and class_team==Top:
    # won_mean_Normal = 1, count_Normal = 1
# If won_mean_Normal==null and class_team==Normal:
    # won_mean_Normal = the avg per slice, count_Normal = 1

In [90]:
wide_weighted.loc[wide['won_mean_Top'].isna(), 'won_mean_Top'] = 0
wide_weighted.loc[wide['won_mean_Top'].isna(), 'count_Top'] = 1

wide_weighted.loc[(wide['won_mean_Normal'].isna()) & (wide['class_team']=='Top'), 'won_mean_Normal'] = 1
wide_weighted.loc[(wide['won_mean_Normal'].isna()) & (wide['class_team']=='Top') , 'count_Normal'] = 1

wide_weighted.loc[(wide['won_mean_Normal'].isna()) & (wide['class_team']=='Normal'), 'won_mean_Normal'] = wide.loc[(wide['team']=='Average')&(wide['class_team']=='Normal'), 'won_mean_Normal'].values[0]
wide_weighted.loc[(wide['won_mean_Normal'].isna()) & (wide['class_team']=='Normal') , 'count_Normal'] = 1


In [92]:

#For points difference:
wide_weighted.loc[(wide['won_mean_Top'].isna()) & (wide['class_team']=='Top'), 'pts_diff_mean_Top'] = wide.loc[(wide['team']=='Average')&(wide['class_team']=='Top'), 'pts_diff_mean_Top'].values[0]
wide_weighted.loc[(wide['won_mean_Top'].isna()) & (wide['class_team']=='Normal'), 'pts_diff_mean_Top'] = wide.loc[(wide['team']=='Average')&(wide['class_team']=='Normal'), 'pts_diff_mean_Top'].values[0]

wide_weighted.loc[(wide['won_mean_Normal'].isna()) & (wide['class_team']=='Top'), 'pts_diff_mean_Normal'] = wide.loc[(wide['team']=='Average')&(wide['class_team']=='Top'), 'pts_diff_mean_Normal'].values[0]
wide_weighted.loc[(wide['won_mean_Normal'].isna()) & (wide['class_team']=='Normal'), 'pts_diff_mean_Normal'] = wide.loc[(wide['team']=='Average')&(wide['class_team']=='Normal'), 'pts_diff_mean_Normal'].values[0]


In [94]:
# Step 2: add weights

wide_weighted_2 =wide_weighted.copy()

In [97]:
wide_weighted.tail(5)

Unnamed: 0,team,class_team,count_Top,count_Normal,won_mean_Top,won_mean_Normal,pts_diff_mean_Top,pts_diff_mean_Normal
21,Spain,Top,3.0,2.0,0.0,0.5,-4.666667,6.0
22,Sweden,Normal,1.0,5.0,0.0,0.2,-17.708333,-3.4
23,Türkiye,Top,3.0,1.0,0.333333,1.0,-6.666667,14.0
24,Average,Normal,1.333333,3.428571,0.166667,0.472619,-17.708333,-1.184524
25,Average,Top,3.4,2.125,0.528333,0.895833,1.146667,14.53125


In [101]:
wide_weighted

Unnamed: 0,team,class_team,count_Top,count_Normal,won_mean_Top,won_mean_Normal,pts_diff_mean_Top,pts_diff_mean_Normal
0,Belgium,Normal,1.0,4.0,0.0,0.25,-12.0,-15.5
1,Bosnia and Herzegovina,Normal,1.0,3.0,0.0,0.333333,-37.0,0.333333
2,Cyprus,Normal,1.0,1.0,0.0,0.0,-65.0,-40.0
3,Czechia,Normal,3.0,2.0,0.0,1.0,-19.0,16.5
4,Estonia,Normal,1.0,3.0,0.0,1.0,-21.0,6.0
5,Finland,Normal,1.0,4.0,0.0,1.0,-17.708333,22.25
6,France,Top,2.0,2.0,1.0,1.0,6.5,6.5
7,Georgia,Normal,1.0,5.0,0.0,0.0,-23.0,-10.6
8,Germany,Top,5.0,1.0,0.8,1.0,3.4,14.53125
9,Great Britain,Normal,2.0,2.0,0.0,0.0,-9.5,-6.0


### Compare teams using weighted values

In [None]:
#Brief description here of the logic used

In [133]:
def get_comparable_stats(team1, team2):
    class_team_1 = wide_weighted.loc[wide_weighted['team']==team1,'class_team'].values[0]
    class_team_2 = wide_weighted.loc[wide_weighted['team']==team2,'class_team'].values[0]
    count_Top_team_1 = wide_weighted.loc[wide_weighted['team']==team1,'count_Top'].values[0]
    count_Top_team_2 = wide_weighted.loc[wide_weighted['team']==team2,'count_Top'].values[0]
    count_Normal_team_1 = wide_weighted.loc[wide_weighted['team']==team1,'count_Normal'].values[0]
    count_Normal_team_2 = wide_weighted.loc[wide_weighted['team']==team2,'count_Normal'].values[0]
    won_mean_Top_team_1 = wide_weighted.loc[wide_weighted['team']==team1,'won_mean_Top'].values[0]
    won_mean_Top_team_2 = wide_weighted.loc[wide_weighted['team']==team2,'won_mean_Top'].values[0]
    won_mean_Normal_team_1 = wide_weighted.loc[wide_weighted['team']==team1,'won_mean_Normal'].values[0]
    won_mean_Normal_team_2 = wide_weighted.loc[wide_weighted['team']==team2,'won_mean_Normal'].values[0]
    pts_diff_mean_Top_team_1 = wide_weighted.loc[wide_weighted['team']==team1,'pts_diff_mean_Top'].values[0]
    pts_diff_mean_Top_team_2 = wide_weighted.loc[wide_weighted['team']==team2,'pts_diff_mean_Top'].values[0]
    pts_diff_mean_Normal_team_1 = wide_weighted.loc[wide_weighted['team']==team1,'pts_diff_mean_Normal'].values[0]
    pts_diff_mean_Normal_team_2 = wide_weighted.loc[wide_weighted['team']==team2,'pts_diff_mean_Normal'].values[0]

    avg_sliced_won_team_1_Top = wide_weighted.loc[(wide_weighted['team']=='Average')&(wide_weighted['class_team']==class_team_1), 'won_mean_Top'].values[0]
    avg_sliced_won_team_2_Top = wide_weighted.loc[(wide_weighted['team']=='Average')&(wide_weighted['class_team']==class_team_2), 'won_mean_Top'].values[0]
    avg_sliced_won_team_1_Normal = wide_weighted.loc[(wide_weighted['team']=='Average')&(wide_weighted['class_team']==class_team_1), 'won_mean_Normal'].values[0]
    avg_sliced_won_team_2_Normal = wide_weighted.loc[(wide_weighted['team']=='Average')&(wide_weighted['class_team']==class_team_2), 'won_mean_Normal'].values[0]

    avg_sliced_pts_team_1_Top = wide_weighted.loc[(wide_weighted['team']=='Average')&(wide_weighted['class_team']==class_team_1), 'pts_diff_mean_Top'].values[0]
    avg_sliced_pts_team_2_Top = wide_weighted.loc[(wide_weighted['team']=='Average')&(wide_weighted['class_team']==class_team_2), 'pts_diff_mean_Top'].values[0]
    avg_sliced_pts_team_1_Normal = wide_weighted.loc[(wide_weighted['team']=='Average')&(wide_weighted['class_team']==class_team_1), 'pts_diff_mean_Normal'].values[0]
    avg_sliced_pts_team_2_Normal = wide_weighted.loc[(wide_weighted['team']=='Average')&(wide_weighted['class_team']==class_team_2), 'pts_diff_mean_Normal'].values[0]

 

    #Calculate win pct and points diff for Team 2:
    if class_team_1== 'Normal': 
        factor_1= won_mean_Normal_team_2
        factor_1_pts=pts_diff_mean_Normal_team_2
        if count_Normal_team_2>1:
            if count_Top_team_2>1:  #Case 1: enough samples from both slices
                multiplier= count_Normal_team_2
                factor_2= won_mean_Top_team_2
                win_pct_team_2=(factor_1*multiplier + factor_2)/(multiplier+1)
                factor_2_pts=pts_diff_mean_Top_team_2
                pts_diff_team_2=(factor_1_pts*multiplier + factor_2_pts)/(multiplier+1)
            if count_Top_team_2==1: #Case 2: enough samples of the one of interest, but few of the other one
                multiplier= count_Normal_team_2
                factor_2= (won_mean_Top_team_2+avg_sliced_won_team_2_Top )/2
                win_pct_team_2=(factor_1*multiplier + factor_2)/(multiplier+1)
                factor_2_pts=(pts_diff_mean_Top_team_2+avg_sliced_pts_team_2_Top)/2
                pts_diff_team_2=(factor_1_pts*multiplier + factor_2_pts)/(multiplier+1)
        if count_Normal_team_2==1:
            if count_Top_team_2>1:  #Case 3: few samples of the one of interest but enough samples from the other one
                factor_2=avg_sliced_won_team_2_Normal
                factor_3=won_mean_Top_team_2
                win_pct_team_2=(factor_1 + factor_2 + factor_3)/3
                factor_2_pts=avg_sliced_pts_team_2_Normal
                factor_3_pts=pts_diff_mean_Top_team_2
                pts_diff_team_2=(factor_1_pts + factor_2_pts + factor_3_pts)/3
            if count_Top_team_2==1: #Case 4: few samples of both slices
                factor_2=avg_sliced_won_team_2_Normal
                factor_3=(won_mean_Top_team_2+avg_sliced_won_team_2_Top)/2
                win_pct_team_2=(factor_1 + factor_2 + factor_3)/3
                factor_2_pts=avg_sliced_pts_team_2_Normal
                factor_3_pts=(pts_diff_mean_Top_team_2+avg_sliced_pts_team_2_Top)/2
                pts_diff_team_2=(factor_1_pts + factor_2_pts + factor_3_pts)/3
    if class_team_1== 'Top': 
        factor_1=won_mean_Top_team_2
        factor_1_pts=pts_diff_mean_Top_team_2
        if count_Top_team_2>1:
            if count_Normal_team_2>1:  #Case 5: enough samples from both slices
                multiplier= count_Top_team_2
                factor_2=won_mean_Normal_team_2
                win_pct_team_2=(factor_1*multiplier + factor_2)/(multiplier+1)
                factor_2_pts= pts_diff_mean_Normal_team_2
                pts_diff_team_2= (factor_1_pts*multiplier + factor_2_pts)/(multiplier+1)
            if count_Normal_team_2==1: #Case 6: enough samples of the one of interest, but few of the other one
                multiplier= count_Top_team_2
                factor_2= (won_mean_Normal_team_2+avg_sliced_won_team_2_Normal )/2
                win_pct_team_2=(factor_1*multiplier + factor_2)/(multiplier+1)
                factor_2_pts= (pts_diff_mean_Normal_team_2+avg_sliced_pts_team_2_Normal )/2
                pts_diff_team_2 =(factor_1_pts*multiplier + factor_2_pts)/(multiplier+1)
        if count_Top_team_2==1:
            if count_Normal_team_2>1:  #Case 7: few samples of the one of interest but enough samples from the other one
                factor_2=avg_sliced_won_team_2_Top
                factor_3=won_mean_Normal_team_2
                win_pct_team_2=(factor_1 + factor_2 + factor_3)/3
                factor_2_pts= avg_sliced_pts_team_2_Top
                factor_3_pts= pts_diff_mean_Normal_team_2
                pts_diff_team_2= (factor_1_pts + factor_2_pts + factor_3_pts)/3
            if count_Normal_team_2==1: #Case 8: few samples of both slices
                factor_2=avg_sliced_won_team_2_Top
                factor_3=(won_mean_Normal_team_2+avg_sliced_won_team_2_Normal)/2
                win_pct_team_2=(factor_1 + factor_2 + factor_3)/3
                factor_2_pts= avg_sliced_pts_team_2_Top
                factor_3_pts= (pts_diff_mean_Normal_team_2+avg_sliced_pts_team_2_Normal)/2
                pts_diff_team_2= (factor_1_pts + factor_2_pts + factor_3_pts)/3

#############
    #Calculate win pct and points diff for Team 1:
    if class_team_2== 'Normal': 
        factor_1= won_mean_Normal_team_1
        factor_1_pts= pts_diff_mean_Normal_team_1
        if count_Normal_team_1>1:
            if count_Top_team_1>1:  #Case 1: enough samples from both slices
                multiplier= count_Normal_team_1
                factor_2= won_mean_Top_team_1
                win_pct_team_1=(factor_1*multiplier + factor_2)/(multiplier+1)
                factor_2_pts=pts_diff_mean_Top_team_1
                pts_diff_team_1=(factor_1_pts*multiplier + factor_2_pts)/(multiplier+1)
            if count_Top_team_1==1: #Case 2: enough samples of the one of interest, but few of the other one
                multiplier= count_Normal_team_1
                factor_2= (won_mean_Top_team_1+avg_sliced_won_team_1_Top )/2
                win_pct_team_1=(factor_1*multiplier + factor_2)/(multiplier+1)
                factor_2_pts=(pts_diff_mean_Top_team_1+avg_sliced_pts_team_1_Top)/2
                pts_diff_team_1=(factor_1_pts*multiplier + factor_2_pts)/(multiplier+1)
        if count_Normal_team_1==1:
            if count_Top_team_1>1:  #Case 3: few samples of the one of interest but enough samples from the other one
                factor_2=avg_sliced_won_team_1_Normal
                factor_3=won_mean_Top_team_1
                win_pct_team_1=(factor_1 + factor_2 + factor_3)/3
                factor_2_pts=avg_sliced_pts_team_1_Normal
                factor_3_pts=pts_diff_mean_Top_team_1
                pts_diff_team_1=(factor_1_pts + factor_2_pts + factor_3_pts)/3
            if count_Top_team_1==1: #Case 4: few samples of both slices
                factor_2=avg_sliced_won_team_1_Normal
                factor_3=(won_mean_Top_team_1+avg_sliced_won_team_1_Top)/2
                win_pct_team_1=(factor_1 + factor_2 + factor_3)/3
                factor_2_pts=avg_sliced_pts_team_1_Normal
                factor_3_pts=(pts_diff_mean_Top_team_1+avg_sliced_pts_team_1_Top)/2
                pts_diff_team_1=(factor_1_pts + factor_2_pts + factor_3_pts)/3
    if class_team_2== 'Top': 
        factor_1=won_mean_Top_team_1
        factor_1_pts=pts_diff_mean_Top_team_1
        if count_Top_team_1>1:
            if count_Normal_team_1>1:  #Case 5: enough samples from both slices
                multiplier= count_Top_team_1
                factor_2=won_mean_Normal_team_1
                win_pct_team_1=(factor_1*multiplier + factor_2)/(multiplier+1)
                factor_2_pts= pts_diff_mean_Normal_team_1
                pts_diff_team_1= (factor_1_pts*multiplier + factor_2_pts)/(multiplier+1)
            if count_Normal_team_1==1: #Case 6: enough samples of the one of interest, but few of the other one
                multiplier= count_Top_team_1
                factor_2= (won_mean_Normal_team_1+avg_sliced_won_team_1_Normal )/2
                win_pct_team_1=(factor_1*multiplier + factor_2)/(multiplier+1)
                factor_2_pts= (pts_diff_mean_Normal_team_1+avg_sliced_pts_team_1_Normal )/2
                pts_diff_team_1 =(factor_1_pts*multiplier + factor_2_pts)/(multiplier+1)
        if count_Top_team_1==1:
            if count_Normal_team_1>1:  #Case 7: few samples of the one of interest but enough samples from the other one
                factor_2=avg_sliced_won_team_1_Top
                factor_3=won_mean_Normal_team_1
                win_pct_team_1=(factor_1 + factor_2 + factor_3)/3
                factor_2_pts= avg_sliced_pts_team_1_Top
                factor_3_pts= pts_diff_mean_Normal_team_1
                pts_diff_team_1= (factor_1_pts + factor_2_pts + factor_3_pts)/3
            if count_Normal_team_1==1: #Case 8: few samples of both slices
                factor_2=avg_sliced_won_team_1_Top
                factor_3=(won_mean_Normal_team_1+avg_sliced_won_team_1_Normal)/2
                win_pct_team_1=(factor_1 + factor_2 + factor_3)/3
                factor_2_pts= avg_sliced_pts_team_1_Top
                factor_3_pts= (pts_diff_mean_Normal_team_1+avg_sliced_pts_team_1_Normal)/2
                pts_diff_team_1= (factor_1_pts + factor_2_pts + factor_3_pts)/3

    winner=''
    pts_difference=0
    if  win_pct_team_1>win_pct_team_2:
        winner=team1
        pts_difference= (pts_diff_team_1 - pts_diff_team_2)/2
    elif  win_pct_team_2>win_pct_team_1:
        winner=team2
        pts_difference= (pts_diff_team_2 - pts_diff_team_1)/2




    return {team1:{'win_chances':float(win_pct_team_1),
                   'pts_diff':float(pts_diff_team_1)},
            team2:{'win_chances':float(win_pct_team_2),
                   'pts_diff':float(pts_diff_team_2)},
            'Suggestion':{'winner': winner  ,
                          'pts_difference': float(pts_difference)  }
            }



In [136]:
get_comparable_stats('Portugal', 'Iceland')

{'Portugal': {'win_chances': 0.5166666666666667,
  'pts_diff': -1.1708333333333332},
 'Iceland': {'win_chances': 0.2708333333333333,
  'pts_diff': -6.213541666666666},
 'Suggestion': {'winner': 'Portugal', 'pts_difference': 2.5213541666666663}}

In [None]:
# Export tables and add them to dvc