<a href="https://colab.research.google.com/github/LuisFreire50/Banco-de-Dados/blob/main/Backtesting_da_Simula%C3%A7%C3%A3o_de_Partidas_de_Futebol_para_o_Match_Odds.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# League = 'E0' # Premier League
# League = 'SP1' # La Liga
# League = 'D1' # Bundesliga
League = 'F1' # Ligue 1
# League = 'I1' # Serie A

### Importando as Bibliotecas e Funções

In [None]:
import pandas as pd
import numpy as np
from scipy.stats import poisson

import warnings
warnings.filterwarnings("ignore")

def drop_reset_index(df):
    df = df.dropna()
    df = df.reset_index(drop=True)
    df.index += 1
    return df

def simulate_match(home_goals_for, home_goals_against, away_goals_for, away_goals_against, num_simulations=10000, random_seed=42):
    np.random.seed(random_seed)
    estimated_home_goals = (home_goals_for + away_goals_against) / 2
    estimated_away_goals = (away_goals_for + home_goals_against) / 2

    home_goals = poisson(estimated_home_goals).rvs(num_simulations)
    away_goals = poisson(estimated_away_goals).rvs(num_simulations)

    results = pd.DataFrame({
        'Home_Goals': home_goals,
        'Away_Goals': away_goals
    })

    return results

def top_results_df(simulated_results, top_n=10):

    result_counts = simulated_results.value_counts().head(top_n).reset_index()
    result_counts.columns = ['Home_Goals', 'Away_Goals', 'Count']

    sum_top_counts = result_counts['Count'].sum()
    result_counts['Probability'] = result_counts['Count'] / sum_top_counts

    return result_counts

def plot_profit_acu(dataframe, title_text):
    dataframe['Profit_acu'] = dataframe.Profit.cumsum()
    n_apostas = dataframe.shape[0]
    profit = round(dataframe.Profit_acu.tail(1).item(), 2)
    ROI = round((dataframe.Profit_acu.tail(1) / n_apostas * 100).item(), 2)
    drawdown = dataframe['Profit_acu'] - dataframe['Profit_acu'].cummax()
    drawdown_maximo = round(drawdown.min(), 2)
    winrate_medio = round((dataframe['Profit'] > 0).mean() * 100, 2)
    desvio_padrao = round(dataframe['Profit'].std(), 2)
    dataframe.Profit_acu.plot(title=title_text, xlabel='Entradas', ylabel='Stakes')
    print("Método:",title_text)
    print("Profit:", profit, "stakes em", n_apostas, "jogos")
    print("ROI:", ROI, "%")
    print("Drawdown Maximo Acumulado:", drawdown_maximo)
    print("Winrate Medio:", winrate_medio, "%")
    print("Desvio Padrao:", desvio_padrao)
    print("")

### Importando a Base de Dados

In [None]:
df = pd.read_csv(f"https://www.football-data.co.uk/mmz4281/2223/{League}.csv")
df = df[['Date','HomeTeam','AwayTeam','FTHG','FTAG','B365H','B365D','B365A']]
df.columns = ['Date','Home','Away','Goals_H','Goals_A','Odd_H','Odd_D','Odd_A']
df = drop_reset_index(df)

### Criando as Variáveis

In [None]:
# Média de Gols Marcados
df['Media_GM_H'] = df.groupby('Home')['Goals_H'].rolling(window=5, min_periods=5).mean().reset_index(0,drop=True)
df['Media_GM_A'] = df.groupby('Away')['Goals_A'].rolling(window=5, min_periods=5).mean().reset_index(0,drop=True)

df['Media_GM_H'] = df.groupby('Home')['Media_GM_H'].shift(1)
df['Media_GM_A'] = df.groupby('Away')['Media_GM_A'].shift(1)

# Média de Gols Sofridos
df['Media_GS_H'] = df.groupby('Home')['Goals_A'].rolling(window=5, min_periods=5).mean().reset_index(0,drop=True)
df['Media_GS_A'] = df.groupby('Away')['Goals_H'].rolling(window=5, min_periods=5).mean().reset_index(0,drop=True)

df['Media_GS_H'] = df.groupby('Home')['Media_GS_H'].shift(1)
df['Media_GS_A'] = df.groupby('Away')['Media_GS_A'].shift(1)

In [None]:
df = drop_reset_index(df)
df

### Backtesting

In [None]:
df['Prob_Home'] = ''
df['Prob_Draw'] = ''
df['Prob_Away'] = ''

In [None]:
for k in range(len(df)):

    i = k + 1

    Team_01 = df['Home'][i]
    Team_02 = df['Away'][i]

    # Média de Gols Marcados
    Media_GM_H = df['Media_GM_H'][i]
    Media_GM_A = df['Media_GM_A'][i]

    # Média de Gols Sofridos
    Media_GS_H = df['Media_GS_H'][i]
    Media_GS_A = df['Media_GS_A'][i]

    # Simular Partidas
    simulated_results = simulate_match(Media_GM_H, Media_GS_H, Media_GM_A, Media_GS_H)
    results = top_results_df(simulated_results)
    results = drop_reset_index(results)

    # Análise dos resultados
    Home = sum(results['Home_Goals'] >  results['Away_Goals'])
    Draw = sum(results['Home_Goals'] == results['Away_Goals'])
    Away = sum(results['Home_Goals']  < results['Away_Goals'])
    df['Prob_Home'][i] = float(Home/len(results))
    df['Prob_Draw'][i] = float(Draw/len(results))
    df['Prob_Away'][i] = float(Away/len(results))

In [None]:
df

In [None]:
flt1 = (df.Prob_Home >= 0.5)
df1 = df[flt1]
df1 = drop_reset_index(df1)

df1['Profit'] = -1
df1.loc[(df1['Goals_H'] > df1['Goals_A']),'Profit'] = df1['Odd_H']-1

In [None]:
flt2 = (df.Prob_Home >= 0.5) & (df.Odd_H < df.Odd_A)
df2 = df[flt2]
df2 = drop_reset_index(df2)

df2['Profit'] = -1
df2.loc[(df2['Goals_H'] > df2['Goals_A']),'Profit'] = df2['Odd_H']-1

In [None]:
flt3 = (df.Prob_Home >= 0.5) & (df.Odd_H > df.Odd_A)
df3 = df[flt3]
df3 = drop_reset_index(df3)

df3['Profit'] = -1
df3.loc[(df3['Goals_H'] > df3['Goals_A']),'Profit'] = df3['Odd_H']-1

### Resultados

In [None]:
plot_profit_acu(df1, 'Back Home')

In [None]:
plot_profit_acu(df2, 'Back Home Favoritos')

In [None]:
plot_profit_acu(df3, 'Back Home Zebras')