In [1]:
# Libraries
# =================
import os
import pandas as pd

# Parameters
# =================
from src.parameters import Parameters

# Data
# =================
from src.data_clean import get_data
from src.data_ranking import atp_ranking_last_date
from src.data_scrapped_clean import scrapped_data_organized

# Scrapping
# =================
from src.scrapping import scrapping_tennis_data


# Classification
# =================
from src.class_preprocessing import data_to_class
from src.class_classification import train_classification, fn_classification

# Risk
# =================
from src.risk_analysis import risk_analysis_montecarlo

# Optimization
# =================
from src.optimization import Optimization

# Warnings
# =================
import warnings
warnings.filterwarnings("ignore")

In [2]:
# Date to process
# =================
year, month, day= '2022', '03', '16'
n_date = f'{year}-{month}-{day}'
ndate = f'{year}{month}{day}'

In [None]:
# Train
# =================
df_raw = pd.read_csv(os.path.join(Parameters.train_path, Parameters.nombre_archivo_raw))
df_raw = df_raw.rename(columns={'AvgW':'pl1_bet','AvgL':'pl2_bet'})
df_bronze, df_silver, df_gold = get_data(df_raw, 
                                        True, 
                                        Parameters.train_path, 
                                        Parameters.nombre_archivo_gold)

X, y, preprocessor = data_to_class(df_gold)
train_classification(X, y, preprocessor, Parameters.results_path, Parameters.models_path)

In [3]:
# Scrapping
# =================
df_games_of_day, df_games_acum = scrapping_tennis_data(year,month,day,
                                                    Parameters.req_headers,
                                                    Parameters.scraped_path, 
                                                    Parameters.file_match_result, 
                                                    Parameters.file_fields_desc,
                                                    Parameters.file_players_desc,
                                                    Parameters.file_games, 
                                                    Parameters.daily_dump_path)

<Response [200]>


In [2]:
# df_games_of_day = pd.read_csv(os.path.join(Parameters.daily_dump_path, 'df_games_20220306.csv'), sep='|')

In [7]:
# Pred
# =================

atp_all = atp_ranking_last_date(Parameters.utils_path, "atp_mens_tour", n_date)
df_fply, df_fply_bronze, df_fply_silver, df_fply_gold = scrapped_data_organized(df_games_of_day,
                                                                                Parameters.utils_path, 
                                                                                Parameters.daily_dump_path,
                                                                                Parameters.file_paises,
                                                                                f'df_games_cleaned_{ndate.replace("-","")}.csv',
                                                                                atp_all)

# df_to_pred                              = pd.read_csv(os.path.join(results_path, file_data_scrapped_cleaned))
df_to_pred                              = df_fply_gold.copy()
X, y, preprocessor                      = data_to_class(df_to_pred)
df_class, \
df_class_consolidate, \
reporte_clasificacion                   = fn_classification(df_to_pred, X, n_date, Parameters.results_path)

In [11]:
# Risk
# =================
df_risk, monte_carlo_results, monte_carlo_summary = risk_analysis_montecarlo(df_class,
                                                                            Parameters.risk_free_rate, 
                                                                            Parameters.risk_tolerance, 
                                                                            Parameters.total_money, 
                                                                            Parameters.num_simulations, 
                                                                            Parameters.num_bets,
                                                                            Parameters.plots_path,
                                                                            Parameters.results_path,
                                                                            f'{Parameters.name_monte_carlo_dist}_{ndate}.png',
                                                                            f'{Parameters.name_ev_comparation}_{ndate}.png',
                                                                            f'{Parameters.name_sharpe_ratio_comparison}_{ndate}.png',
                                                                            f'{Parameters.file_betting_analysis}_{ndate}.xlsx',
                                                                            False)

Análisis completado. Revisa el archivo 'betting_analysis.xlsx' para ver los resultados.


In [None]:

# Optimization
# =================

# df_risk = pd.read_excel(os.path.join(Parameters.results_path,
#                                               f'{Parameters.file_betting_analysis}_{ndate}.xlsx'),
#                                               sheet_name='Betting Decisions')

# df_risk['match'] = df_risk['pl1'] +'\n' + df_risk['pl2']

optimizer = Optimization(df_risk, Parameters.total_money, Parameters.max_loss_percentage, Parameters.min_percentage)
df_risk_optimized = optimizer.optimize()
df_risk_optimized = risk_analysis_montecarlo(df_risk_optimized,
                                            Parameters.risk_free_rate, 
                                            Parameters.risk_tolerance, 
                                            Parameters.total_money, 
                                            Parameters.num_simulations, 
                                            Parameters.num_bets,
                                            Parameters.plots_path,
                                            Parameters.results_path,
                                            f'{Parameters.name_monte_carlo_dist}_{ndate}__Optimized.png',
                                            f'{Parameters.name_ev_comparation}_{ndate}_Optimized.png',
                                            f'{Parameters.name_sharpe_ratio_comparison}_{ndate}_Optimized.png',
                                            f'{Parameters.file_betting_analysis}_{ndate}_Optimized.xlsx',
                                            True)

In [104]:
start_date = pd.to_datetime('2022-03-01')
end_date = pd.to_datetime('2022-03-17')
list_dates = pd.date_range(start=start_date, end=end_date).tolist()
total_money_daily = Parameters.total_money / len(list_dates)

df_class = pd.read_excel(os.path.join(Parameters.results_path,'df_class.xlsx'))

for f, date in enumerate(list_dates):

    year, month, day= str(list_dates[f].year), \
                    str(list_dates[f].month).zfill(2), \
                    str(list_dates[f].day).zfill(2)

    n_date = f'{year}-{month}-{day}'
    ndate = f'{year}{month}{day}'
    df_class_date = df_class[(df_class['date_run'] == date.strftime('%Y-%m-%d'))]
    # Risk
    # =================
    df_risk = risk_analysis_montecarlo(df_class_date,
                                        Parameters.risk_free_rate, 
                                        Parameters.risk_tolerance, 
                                        total_money_daily, 
                                        Parameters.num_simulations, 
                                        Parameters.num_bets,
                                        Parameters.plots_path,
                                        Parameters.results_path,
                                        f'{Parameters.name_monte_carlo_dist}_{ndate}.png',
                                        f'{Parameters.name_ev_comparation}_{ndate}.png',
                                        f'{Parameters.name_sharpe_ratio_comparison}_{ndate}.png',
                                        f'{Parameters.file_betting_analysis}_{ndate}.xlsx',
                                        False)
        
        
    optimizer = Optimization(df_risk, total_money_daily, Parameters.max_loss_percentage, Parameters.min_percentage)
    df_risk_optimized = optimizer.optimize()
    df_risk_optimized = risk_analysis_montecarlo(df_risk_optimized,
                                                Parameters.risk_free_rate, 
                                                Parameters.risk_tolerance, 
                                                total_money_daily, 
                                                Parameters.num_simulations, 
                                                Parameters.num_bets,
                                                Parameters.plots_path,
                                                Parameters.results_path,
                                                f'{Parameters.name_monte_carlo_dist}_{ndate}__Optimized.png',
                                                f'{Parameters.name_ev_comparation}_{ndate}_Optimized.png',
                                                f'{Parameters.name_sharpe_ratio_comparison}_{ndate}_Optimized.png',
                                                f'{Parameters.file_betting_analysis}_{ndate}_Optimized.xlsx',
                                                True)

Análisis completado. Revisa el archivo betting_analysis_20220301.xlsx' para ver los resultados.
Análisis completado. Revisa el archivo betting_analysis_20220301_Optimized.xlsx' para ver los resultados.
Análisis completado. Revisa el archivo betting_analysis_20220302.xlsx' para ver los resultados.
Análisis completado. Revisa el archivo betting_analysis_20220302_Optimized.xlsx' para ver los resultados.
Análisis completado. Revisa el archivo betting_analysis_20220303.xlsx' para ver los resultados.
Análisis completado. Revisa el archivo betting_analysis_20220303_Optimized.xlsx' para ver los resultados.
Análisis completado. Revisa el archivo betting_analysis_20220304.xlsx' para ver los resultados.
Análisis completado. Revisa el archivo betting_analysis_20220304_Optimized.xlsx' para ver los resultados.
Análisis completado. Revisa el archivo betting_analysis_20220305.xlsx' para ver los resultados.
Análisis completado. Revisa el archivo betting_analysis_20220305_Optimized.xlsx' para ver los re

In [105]:

from src.parameters import Parameters
import os 
import pandas as pd

start_date = pd.to_datetime('2022-03-01')
end_date = pd.to_datetime('2022-03-17')
list_dates = pd.date_range(start=start_date, end=end_date).tolist()

total_money_daily = Parameters.total_money / len(list_dates)

list_files_optimize = [x for x in os.listdir(Parameters.results_path) if 'Optimized' in x]

df_all_optimized = pd.DataFrame()
for file_optimized in list_files_optimize:
    df_file_optimized = pd.read_excel(os.path.join(Parameters.results_path, file_optimized))
    df_file_optimized['date'] = file_optimized.split("_")[2]
    df_all_optimized = pd.concat([df_all_optimized, df_file_optimized])
num_apuestas = len(df_all_optimized)
df_all_optimized['Money_earned'] = df_all_optimized['Money_to_Bet'] * df_all_optimized['Payout']
df_winner = df_all_optimized[(df_all_optimized['Result'] == df_all_optimized['Class'])]
df_winner.reset_index(drop=True,  inplace=True)


Durante el 2022-03-01 y el 2022-03-17, hiciste 68 apuestas
de las cuales ganaste en 39,
es decir, ganaste un 57.4% de las veces 
Diariamente invertiste 5.88.
Obtuviste una ganancia 52


In [121]:
import numpy as np

alpha = df_all_optimized.groupby(['date'])[['Money_earned']].sum().rename(columns={'Money_earned':'Possible_Money_earned'})
betha = df_all_optimized.groupby(['date'])[['match']].count().rename(columns={'match':'Num_bets'})
gamma = df_winner.groupby(['date'])[['Money_earned']].sum()
delta = df_winner.groupby(['date'])[['match']].count().rename(columns={'match':'Num_winner_bets'})

iota = betha.join(delta).fillna(0)
iota['Num_winner_bets'] = iota['Num_winner_bets'].astype(int)
iota['perc_winner_bets'] = np.where(iota['Num_winner_bets'] == 0, 0,  iota['Num_winner_bets'] / iota['Num_bets'])
iota['perc_winner_bets'] = round(iota['perc_winner_bets']*100,2).astype(str) + '%'

iota = iota.join(alpha).join(gamma).fillna(0)
iota['perc_Money_earned'] = np.where(iota['Money_earned'] == 0, 0,  iota['Money_earned'] / iota['Possible_Money_earned'])
iota['perc_Money_earned'] = (iota['perc_Money_earned']*100).astype(int).astype(str) + '%'

iota['Money_earned_without_initial'] = iota['Money_earned'] - total_money_daily

Money_earned = iota['Money_earned'].sum()
num_apuestas_victoriosas = iota['Num_winner_bets'].sum()

print(f"Durante el {start_date.strftime('%Y-%m-%d')} y el {end_date.strftime('%Y-%m-%d')}, hiciste {num_apuestas} apuestas\n\
de las cuales ganaste en {num_apuestas_victoriosas},\n\
es decir, ganaste un {round((num_apuestas_victoriosas/num_apuestas)*100,1)}% de las veces \n\
Diariamente invertiste {round(total_money_daily,2)}.\n\
Obtuviste una ganancia {int(Money_earned)}\n\
Tu inversión eran {Parameters.total_money}\n\
Por ende, perdiste {Parameters.total_money - Money_earned}\n\
Destruyendo tu patrimonio un {(Parameters.total_money - Money_earned)/Parameters.total_money}\n\
    ")
iota


Durante el 2022-03-01 y el 2022-03-17, hiciste 68 apuestas
de las cuales ganaste en 39,
es decir, ganaste un 57.4% de las veces 
Diariamente invertiste 5.88.
Obtuviste una ganancia 52
Tu inversión eran 100
Por ende, perdiste 47.772
Destruyendo tu patrimonio un 0.47772
    


Unnamed: 0_level_0,Num_bets,Num_winner_bets,perc_winner_bets,Possible_Money_earned,Money_earned,perc_Money_earned,Money_earned_without_initial
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
20220301,2,1,50.0%,12.8759,12.3539,95%,6.471547
20220302,1,1,100.0%,13.5828,13.5828,100%,7.700447
20220303,1,0,0.0%,13.2888,0.0,0%,-5.882353
20220304,13,11,84.62%,15.0106,4.1963,27%,-1.686053
20220305,7,3,42.86%,11.4978,0.9976,8%,-4.884753
20220306,1,0,0.0%,9.4668,0.0,0%,-5.882353
20220307,1,0,0.0%,14.2296,0.0,0%,-5.882353
20220308,6,2,33.33%,20.1767,0.7453,3%,-5.137053
20220309,5,3,60.0%,17.8872,1.1194,6%,-4.762953
20220310,3,2,66.67%,22.7408,0.6815,2%,-5.200853
