In [1]:
# Process
# =================
from main import process

year, month, day= '2022', '03', '16'

df_games_of_day,\
df_games_acum,\
df_fply_gold,\
df_class,\
df_class_consolidate,\
df_risk,\
monte_carlo_results,\
monte_carlo_summary = process(False, year, month, day)

<Response [200]>
Análisis completado. Revisa el archivo 'betting_analysis.xlsx' para ver los resultados.


In [1]:
# Libraries
# =================
import os
import pandas as pd

# Parameters
# =================
from src.parameters import Parameters

# Data
# =================
from src.data_clean import get_data
from src.data_ranking import atp_ranking_last_date
from src.data_scrapped_clean import scrapped_data_organized

# Scrapping
# =================
from src.scrapping import scrapping_tennis_data


# Classification
# =================
from src.class_preprocessing import data_to_class
from src.class_classification import train_classification, fn_classification

# Risk
# =================
from src.risk_analysis import risk_analysis_montecarlo

# Process
# =================
from main import process

# Warnings
# =================
import warnings
warnings.filterwarnings("ignore")

In [None]:
# Train
# =================
df_raw = pd.read_csv(os.path.join(Parameters.train_path, Parameters.nombre_archivo_raw))
df_raw = df_raw.rename(columns={'AvgW':'pl1_bet','AvgL':'pl2_bet'})
df_bronze, df_silver, df_gold = get_data(df_raw, 
                                        True, 
                                        Parameters.train_path, 
                                        Parameters.nombre_archivo_gold)

X, y, preprocessor = data_to_class(df_gold)
train_classification(X, y, preprocessor, Parameters.results_path, Parameters.models_path)

In [2]:
year, month, day= '2022', '03', '05'
n_date = f'{year}-{month}-{day}'
ndate = f'{year}{month}{day}'

In [3]:
df_games_of_day, df_games_acum = scrapping_tennis_data(year,month,day,
                                                    Parameters.req_headers,
                                                    Parameters.scraped_path, 
                                                    Parameters.file_match_result, 
                                                    Parameters.file_fields_desc,
                                                    Parameters.file_players_desc,
                                                    Parameters.file_games, 
                                                    Parameters.daily_dump_path)

<Response [200]>


In [2]:
# df_games_of_day = pd.read_csv(os.path.join(Parameters.daily_dump_path, 'df_games_20220306.csv'), sep='|')

In [7]:
# Pred
# =================

atp_all = atp_ranking_last_date(Parameters.utils_path, "atp_mens_tour", n_date)
df_fply, df_fply_bronze, df_fply_silver, df_fply_gold = scrapped_data_organized(df_games_of_day,
                                                                                Parameters.utils_path, 
                                                                                Parameters.daily_dump_path,
                                                                                Parameters.file_paises,
                                                                                f'df_games_cleaned_{ndate.replace("-","")}.csv',
                                                                                atp_all)

# df_to_pred                              = pd.read_csv(os.path.join(results_path, file_data_scrapped_cleaned))
df_to_pred                              = df_fply_gold.copy()
X, y, preprocessor                      = data_to_class(df_to_pred)
df_class, \
df_class_consolidate, \
reporte_clasificacion                   = fn_classification(df_to_pred, X, n_date, Parameters.results_path)

In [11]:
# Risk
# =================

df_risk, monte_carlo_results, monte_carlo_summary = risk_analysis_montecarlo(df_class,
                                                                            Parameters.risk_free_rate, 
                                                                            Parameters.risk_tolerance, 
                                                                            Parameters.total_money, 
                                                                            Parameters.num_simulations, 
                                                                            Parameters.num_bets,
                                                                            Parameters.plots_path,
                                                                            Parameters.results_path,
                                                                            f'{Parameters.name_monte_carlo_dist}_{ndate}.png',
                                                                            f'{Parameters.name_ev_comparation}_{ndate}.png',
                                                                            f'{Parameters.name_sharpe_ratio_comparison}_{ndate}.png',
                                                                            f'{Parameters.file_betting_analysis}_{ndate}.xlsx')

Análisis completado. Revisa el archivo 'betting_analysis.xlsx' para ver los resultados.


In [12]:
from scipy.optimize import minimize
import numpy as np

# Define the decision variables: amount of money to bet on each player (initial values are proportional to the Sharpe Ratio)
initial_money = df['Money_to_Bet'].values

# Constraints: Total money to bet (let's assume $100 as an example) 
total_money = 100

# Define the optimization function: minimize the negative utility (since we want to maximize utility)
# Utility = EV * Prob_Win - Risk factor (penalize low Sharpe ratios and high deviations)
def utility_distribution(x):
    EV = df['Best_Bet_EV'].values
    prob_win = df['Prob_Win'].values
    sharpe_ratio = df['Sharpe_Ratio_pl1'].values # Assuming we focus on pl1
    std_dev = df['Std_Dev_pl1'].values # Assuming we focus on pl1
    
    # Calculate utility: positive part is based on EV and Prob_Win, negative part penalizes risk
    utility = np.sum(x * (EV * prob_win)) - np.sum(x * (std_dev / (1 + np.abs(sharpe_ratio))))
    
    # Return negative utility because we want to maximize the positive utility
    return -utility

# Constraints: the sum of all money to be distributed should equal the total money available
constraints = [{'type': 'eq', 'fun': lambda x: np.sum(x) - total_money}]

# Bounds: Each amount of money should be non-negative
bounds = [(0, None)] * len(initial_money)

# Run the optimization
result = minimize(utility_distribution, initial_money, bounds=bounds, constraints=constraints)

# Check the optimized amounts of money to bet on each player
result.x


<Response [200]>
Análisis completado. Revisa el archivo 'betting_analysis.xlsx' para ver los resultados.
