In [1]:
# Imports
import enum
import math
import numpy as np
import pandas as pd
from datetime import date, datetime

In [2]:
# Load data
matches = pd.read_csv('./data/matches.csv')

# Update matches
new_matches = df2 = pd.DataFrame(
    [
        ['2021-06-03', 'Bolivia', 'Venezuela', 3, 1, 'FIFA World Cup qualification', 'La Paz', 'Bolivia', False],
        ['2021-06-03', 'Uruguay', 'Paraguay', 0, 0, 'FIFA World Cup qualification', 'Montevideo', 'Uruguay', False],
        ['2021-06-03', 'Argentina', 'Chile', 1, 1, 'FIFA World Cup qualification', 'Santiago del Estero', 'Argentina', False],
        ['2021-06-03', 'Peru', 'Colombia', 0, 3, 'FIFA World Cup qualification', 'Lima', 'Peru', False],
        ['2021-06-04', 'Brazil', 'Ecuador', 2, 0, 'FIFA World Cup qualification', 'Porto Alegre', 'Brazil', False],
        ['2021-06-08', 'Ecuador', 'Peru', 1, 2, 'FIFA World Cup qualification', 'Quito', 'Ecuador', False],
        ['2021-06-08', 'Venezuela', 'Uruguay', 0, 0, 'FIFA World Cup qualification', 'Caracas', 'Venezuela', False],
        ['2021-06-08', 'Colombia', 'Argentina', 2, 2, 'FIFA World Cup qualification', 'Barranquilla', 'Colombia', False],
        ['2021-06-08', 'Paraguay', 'Brazil', 0, 2, 'FIFA World Cup qualification', 'Asunción', 'Paraguay', False],
        ['2021-06-08', 'Chile', 'Bolivia', 1, 1, 'FIFA World Cup qualification', 'Santiago', 'Chile', False],
    ], 
    columns=['date', 'home_team', 'away_team', 'home_score', 'away_score', 'tournament', 'city', 'country', 'neutral']
)
matches = matches.append(new_matches)

# Load teams
CONMEBOL_TEAMS = {'Argentina': 'ARG', 'Brazil': 'BRA', 'Uruguay': 'URU', 'Bolivia': 'BOL', 'Paraguay': 'PAR', 'Chile': 'CHI', 'Peru': 'PER', 'Colombia': 'COL', 'Ecuador': 'ECU', 'Venezuela': 'VEN'}
CONMEBOL_TEAMS_NAMES = list(CONMEBOL_TEAMS.keys())

# Conmebol matches
conmebol_matches = matches.loc[matches['home_team'].isin(CONMEBOL_TEAMS_NAMES) & matches['away_team'].isin(CONMEBOL_TEAMS_NAMES)]

# Map dates
conmebol_matches['date'] = conmebol_matches['date'].apply(lambda match_date: datetime.strptime(match_date, "%Y-%m-%d").date())
conmebol_matches = conmebol_matches.sort_values(by=['date'])

# Map neutral
conmebol_matches['neutral'] = conmebol_matches.apply(lambda match: match['home_team'] != match['country'], axis=1)

# Map names
conmebol_matches['home_team'] = conmebol_matches['home_team'].apply(lambda team: CONMEBOL_TEAMS[team])
conmebol_matches['away_team'] = conmebol_matches['away_team'].apply(lambda team: CONMEBOL_TEAMS[team])


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  conmebol_matches['date'] = conmebol_matches['date'].apply(lambda match_date: datetime.strptime(match_date, "%Y-%m-%d").date())


In [3]:
# Simulation rules

TIER_1 = ['Friendly']
TIER_2 = ['Copa Lipton', 'Copa Newton', 'Copa Premio Honor Argentino', 'Copa Premio Honor Uruguayo', 'Copa Roca', 'Copa Chevallier Boutell', 'Copa Rio Branco', 'Copa Oswaldo Cruz', 'Pan American Championship', 'Copa del Pacífico', 'Copa Bernardo O\'Higgins', 'Atlantic Cup', 'Copa Paz del Chaco', 'Copa Carlos Dittborn', 'Copa Juan Pinto Durán', 'Copa Artigas', 'Brazil Independence Cup', 'Copa Ramón Castilla', 'Copa Félix Bogado', 'Gold Cup']
TIER_3 = ['FIFA World Cup qualification']
TIER_4 = ['Copa América', 'Confederations Cup', 'Mundialito']
TIER_5 = ['FIFA World Cup']

def tournament_value(tournament):
    if tournament in TIER_5:
        tournament_points = 5
    elif tournament in TIER_4:
        tournament_points = 4
    elif tournament in TIER_3:
        tournament_points = 3
    elif tournament in TIER_2:
        tournament_points = 2
    else:
        tournament_points = 1
    return math.sqrt(tournament_points)


BUCKETS = 10
CURRENT_YEAR = date.today().year
BUCKET_LAPSE = math.ceil((CURRENT_YEAR - conmebol_matches.iloc[0]['date'].year) / BUCKETS)

def date_value(match_date):
    return math.sqrt(BUCKETS - math.floor((CURRENT_YEAR - match_date.year) / BUCKET_LAPSE))


def result_value(home_score, away_score, neutral):
    home, tie, away = 0, 0, 0
    if home_score > away_score:
        home = 1
    elif home_score == away_score:
        tie = 1
    else:
        away = 1
        
    if not neutral:
        if tie != 0:
            tie += 0.25
        if away != 0:
            away += 0.25
        
    if home_score >= 3 + away_score:
        home += 0.5
    
    if away_score >= 3 + home_score:
        away += 0.5
        
    return home, tie, away


def total_points(match, result):
    date_points = date_value(match['date'])
    home, tie, away = result_value(match['home_score'], match['away_score'], match['neutral'])    
    tournament_points = tournament_value(match['tournament'])
    
    if result == 'team1':
        result_points = home if match['home_team'] < match['away_team'] else away
    elif result == 'team2':
        result_points = away if match['home_team'] < match['away_team'] else home
    else:
        result_points = tie

    return result_points * date_points * tournament_points


def match_name(home_team, away_team):
    if home_team < away_team:
        return f'{home_team}-{away_team}'
    else:
        return f'{away_team}-{home_team}'
    
def teams(match):
    return match.split('-')

In [4]:
# Team handicap

PRESENT_MATCHES = 8
PRESENT_MATCHES_MAX_POINTS = PRESENT_MATCHES * 3

def match_points(home_score, away_score, neutral):
    if home_score > away_score:
        home_points = 2.5
        away_points = 0
    elif home_score == away_score:
        home_points = 1
        away_points = 1
    else:
        home_points = 0
        away_points = 3
        
    if not neutral and away_points != 0:
        away_points += 1
        
    if home_score >= 3 + away_score:
        home_points += 1
    
    if away_score >= 3 + home_score:
        away_points += 1
        
    return home_points, away_points

handicaps = {}
for TEAM in CONMEBOL_TEAMS_NAMES:
    team_points = 0
    team_matches = matches.loc[(matches['home_team'] == TEAM) | (matches['away_team'] == TEAM)].tail(PRESENT_MATCHES)
    for _, match in team_matches.iterrows():
        home_points, away_points = match_points(match['home_score'], match['away_score'], match['neutral'])
    
        if match['home_team'] == TEAM:
            team_points += home_points
        elif match['away_team'] == TEAM:
            team_points += away_points
        
    handicaps[CONMEBOL_TEAMS[TEAM]] = math.sqrt(team_points / PRESENT_MATCHES_MAX_POINTS)

In [5]:
# Probabilities

class Results(enum.Enum):
    W = 1
    T = 0
    L = -1

conmebol_matches['name'] = conmebol_matches.apply(lambda match: match_name(match['home_team'], match['away_team']), axis=1)
conmebol_matches['team1_total'] = conmebol_matches.apply(lambda match: total_points(match, 'team1'), axis=1)
conmebol_matches['tie_total'] = conmebol_matches.apply(lambda match: total_points(match, 'tie'), axis=1)
conmebol_matches['team2_total'] = conmebol_matches.apply(lambda match: total_points(match, 'team2'), axis=1)

historic_simulation = conmebol_matches[['name', 'team1_total', 'tie_total', 'team2_total']]
historic_simulation = historic_simulation.groupby(['name'])

def final_prediction(team1_name, team2_name, historics, result):
    team1_total = sum(historics['team1_total']) * handicaps[team1_name]
    team2_total = sum(historics['team2_total']) * handicaps[team2_name]
    tie_total = sum(historics['tie_total'])
    
    full_total = team1_total + tie_total + team2_total
    result_total = team1_total if result == 'team1' else team2_total if result == 'team2' else tie_total
    
    return result_total / full_total

probabilities = {}
for match, historics in historic_simulation:
    team1_name, team2_name = teams(match)
    team1 = final_prediction(team1_name, team2_name, historics, 'team1')
    team2 = final_prediction(team1_name, team2_name, historics, 'team2')
    tie = final_prediction(team1_name, team2_name, historics, 'tie')
    
    if team1_name not in probabilities:
        probabilities[team1_name] = {}
        
    if team2_name not in probabilities:
        probabilities[team2_name] = {}
        
    probabilities[team1_name][team2_name] = { Results.W: team1, Results.T: tie, Results.L: team2 }
    probabilities[team2_name][team1_name] = { Results.W: team2, Results.T: tie, Results.L: team1 }
    

In [26]:
probabilities['ECU']['BRA']

{<Results.W: 1>: 0.04007693730837085,
 <Results.T: 0>: 0.12740431487386475,
 <Results.L: -1>: 0.8325187478177644}