In [27]:
import re
import os
import csv
import math
import pickle
import random
import numpy as np
import pandas as pd
from ast import literal_eval
from pathlib import Path
from tqdm import tqdm
from itertools import product
from collections import Counter
from _html_parser import ParsingDataPrepare
from _prepare_stat_data import StatDataPrepare
from _astro_constants import AstrologicalPoints, AstrologicalConstants, MoonDaysCalculate

pd.set_option('display.max_columns', 200)  # or 1000
pd.set_option('display.max_rows', 2000)  # or 1000
pd.set_option('display.max_colwidth', 100)  # or 199

%load_ext autoreload 
%autoreload 2

### Research 08_23

#### If file for statistic exist go to Statistic research block

In [2]:
with open('pickle_files/aspected_files/asp_files_05_10_2023/df_games_with_points_and_aspects_col_05_10_2023_to_27_05_2022', 'rb') as f:
    df = pickle.load(f)

print('shape -', df.shape[0])
df.tail(1)    

shape - 103301


KeyboardInterrupt: 

In [None]:
df.game_id.unique().shape[0]

In [None]:
df_events_1 = pd.read_csv('csv_files/events_and_aspects_data/events_games_05_10_2023_to_26_05_2022.csv',  names=['game_id', 'data'], dtype={'game_id': str}, 
                           converters={'data': literal_eval})
df_events_2 = pd.read_csv('csv_files/events_and_aspects_data/events_games_colab_05_10_2023_to_27_05_2022.csv',  names=['game_id', 'data'], dtype={'game_id': str}, 
                           converters={'data': literal_eval})

df_events = pd.concat([df_events_1, df_events_2], ignore_index=True)
print('shape - df_events  -', df_events.shape[0])   
df_events.tail(1)

In [None]:
print('shape - df_events_sec  -', df_events.shape[0]) 
df_events.drop_duplicates(subset='game_id', keep='first', inplace=True)
df_events.reset_index(drop=True, inplace=True)
print('shape - df_events_sec  -', df_events.shape[0]) 

In [None]:
df_events['bets'] = df_events.data.map(lambda x: x[-1])
df_events.drop(columns='data', inplace=True)
df_events.tail(1)

In [None]:
cols = ['1', 'X', '2', 'ТМ 2.5', 'ТБ 2.5']
data = [x[1] for x in df_events.bets.tolist() if type(x) != float]
df_bets = pd.DataFrame.from_dict(data)
df_bets.columns = cols
print('shape - ', df_bets.shape)
df_bets.tail()

In [None]:
df_events = pd.merge(df_events, df_bets, how='left', left_index=True, right_index=True)
df_events.drop(columns='bets', inplace=True)
print('shape - ', df_events.shape[0])
df_events.tail()

In [None]:
df_events.drop_duplicates(subset='game_id', keep='first', inplace=True)
print('shape - ', df_events.shape[0])
df_events.tail()

In [None]:
print('shape - ', df.shape[0])
df = pd.merge(df, df_events, how='left', left_on='game_id', right_on='game_id')
print('shape - ', df.shape[0])
df.tail(3)

In [None]:
df.insert(14, '1', df.pop('1'))
df.insert(15, 'X', df.pop('X'))
df.insert(16, '2', df.pop('2'))
df.insert(17, 'ТМ 2.5', df.pop('ТМ 2.5'))
df.insert(18, 'ТБ 2.5', df.pop('ТБ 2.5'))

In [None]:
df.rename(columns={'ТМ 2.5':'un_2.5', 'ТБ 2.5':'ov_2.5'}, inplace=True)

In [None]:
df.head(2)

In [None]:
df.game_utc.map(lambda x: str(x)[:8]).value_counts(dropna=False)[:3]

In [None]:
# For addind 'game_id' in df_table_asps and if df with table aspects don't exist
_ = df[['game_id', 'aspects']].apply(lambda x: [y.update({'game_id': x[0]}) for y in x[1]], axis=1)

In [None]:
Stop *******************************************************************************************************

In [None]:
# with open('pickle_files/aspected_files/asp_files_26_03_2021/df_table_asps_26_03_2021_to_03_11_2013_events_top_and_first_colab', 'rb') as f:
#     df_table_asps = pickle.load(f)
# df_table_asps.tail()

In [None]:
# If df with table aspects don't exist
df_table_asps = pd.DataFrame()

for i in tqdm(df.index):
    df_table_asps = pd.concat([df_table_asps, pd.DataFrame(df.aspects[i])])

In [None]:
print('shape -', df_table_asps.shape[0])
df_table_asps.tail()

In [None]:
# Save df with aspects.
# file = open('pickle_files/aspected_files/df_table_asps_05_10_2023_to_27_05_2022', 'wb')
# pickle.dump(df_table_asps_all, file) 
# file.close()

#### Get aspects and create moon mansions

In [None]:
df.drop(columns=['sun', 'saturn'], inplace=True)
constants = ['SUN', 'MERCURY', 'VENUS', 'MARS', 'JUPITER', 'SATURN']

for obj in constants:
    df[str.lower(obj)] = AstrologicalPoints.calculate_astro_objects(df.charts, obj)
    
    antes_name     = 'antes_' + obj.lower()
    df[antes_name] = AstrologicalPoints.antes_objects_calc(df, obj.lower())

In [None]:
df.tail(1)

In [None]:
print('shape - df - ', df.shape)
print('shape - df_table_asps   - ', df_table_asps.shape)

In [None]:
df_game_asps = df.merge(df_table_asps, how='left',left_on='game_id', right_on='game_id')
df_game_asps.reset_index(drop=True, inplace=True)

In [None]:
df_game_asps.tail(1)

In [None]:
nakshatras = AstrologicalConstants.nakshatras

df_game_asps['nakshatras'] = df_game_asps.apply(lambda x: MoonDaysCalculate.calculate_moon_mansions(nakshatras, x[re.sub(' ', '_', x['s_point'].lower())].lon) \
                                                          if x['f_point'] == 'Moon' else None, axis=1)

In [None]:
df_game_asps.nakshatras.value_counts(dropna=False)[:3]

In [None]:
df_game_asps['day_month_yr'] = df_game_asps.game_utc.map(lambda x: str(x)[:10])
df_game_asps.day_month_yr.value_counts()[:3]

In [None]:
df_game_asps.drop(columns=['charts', 'asc', 'desc', 'mc', 'ic', 'pars_fortuna', 'sun', 'moon', 'saturn', 'uranus', 'neptune', 'pluto', 'chiron', 'north_node', 'south_node',
               'pars_spirit', 'pars_glory', 'pars_crest', 'pars_rock', 'ruler_asc', 'ruler_desc', 'ruler_mc', 'ruler_ic', 'ruler_pars_fortuna',
               'ruler_pars_spirit', 'ruler_pars_glory', 'ruler_pars_crest', 'ruler_pars_rock', 'antes_moon', 'antes_ruler_asc', 'antes_ruler_desc',
               'antes_ruler_mc', 'antes_ruler_ic', 'antes_pars_fortuna', 'antes_pars_spirit', 'antes_uranus', 'antes_neptune', 'antes_pluto',
               'antes_chiron', 'antes_north_node', 'antes_south_node','antes_ruler_pars_fortuna', 'antes_ruler_pars_spirit', 
               'antes_sun', 'mercury', 'antes_mercury', 'venus', 'antes_venus', 'mars', 'antes_mars', 'jupiter', 'antes_jupiter', 'antes_saturn',
                'aspects', 'orb', 'longs', 'orb_char', 'comp_id', 'teams_ids', 'transform_id',  'iso_code',], inplace=True) # 

In [None]:
df_game_asps.head(1)

In [None]:
# Transform to tuple
col_names = ['fmain_ch', 'fsec_ch', 'smain_ch',	'ssec_ch', ] #'den_point'
for col in col_names:
    df_game_asps[col] = df_game_asps[col].map(lambda x: tuple(x) if type(x) != float else None) 

##### Prepare aspects and fix revers acpects if it exist

In [None]:
unique_points_lt = df_game_asps[['f_point', 's_point']].apply(lambda x: (x[0], x[1]), axis=1).unique().tolist()
print('len - ', len(unique_points_lt))
unique_points_lt[-2:]

In [None]:
unique_points_lt_cp = unique_points_lt.copy()

In [None]:
for points in unique_points_lt:
    if points[::-1] in unique_points_lt:
        unique_points_lt.remove(points[::-1]) 

len(unique_points_lt)         

In [None]:
not_unique_points_lt = [x for x in unique_points_lt_cp if x not in unique_points_lt]
print('len - ', len(not_unique_points_lt))
not_unique_points_lt[:3]

In [None]:
reverse_inds_sr = df_game_asps[['f_point', 's_point']].apply(lambda x: 1 if (x[0], x[1]) in not_unique_points_lt else None, axis=1)

In [None]:
reverse_inds_lt = reverse_inds_sr[reverse_inds_sr == 1].index.tolist()
print('len - ', len(reverse_inds_lt))
reverse_inds_lt[:3]

##### Transform pair aspects to uniq if exist

In [None]:
# Rearrange (f_point: Pluto - s_point: Mercury) and (f_point: Mercury - s_point: Pluto) aspects at one aspect and 'main_ch', 'sec_ch' columns too.
df_aspects = df_game_asps[~df_game_asps.index.isin(reverse_inds_lt)].copy()
print('shape - ', df_aspects.shape)

In [None]:
df_for_reverse = df_game_asps[df_game_asps.index.isin(reverse_inds_lt)].copy()
print('shape - ', df_for_reverse.shape)

In [None]:
# Count revers points: len(unique_points_lt) - (len(unique_points_lt) aftere remove reverse points)

if df_for_reverse.shape[0] > 0:
    print('shape -', df_for_reverse.shape[0])
    
    points_sr  = df_for_reverse[['f_point', 's_point']].apply(  lambda x: (x[1], x[0]), axis=1)
    main_ch_sr = df_for_reverse[['fmain_ch', 'smain_ch']].apply(lambda x: (x[1], x[0]), axis=1)
    sec_ch_sr  = df_for_reverse[['fsec_ch', 'ssec_ch']].apply(  lambda x: (x[1], x[0]), axis=1)
    
    print('len -', len(points_sr.unique()))
    
    df_for_reverse.f_point = points_sr.map(lambda x: x[0])
    df_for_reverse.s_point = points_sr.map(lambda x: x[1])

    df_for_reverse.fmain_ch = main_ch_sr.map(lambda x: x[0])
    df_for_reverse.smain_ch = main_ch_sr.map(lambda x: x[1])

    df_for_reverse.fsec_ch = sec_ch_sr.map(lambda x: x[0])
    df_for_reverse.ssec_ch = sec_ch_sr.map(lambda x: x[1])

In [None]:
df_aspects = pd.concat([df_aspects, df_for_reverse]).sort_index()
print('shape - ', df_aspects.shape)
df_aspects.tail(1)

##### Fix errors in aspects orbs values

In [None]:
df_orbs = df_aspects.tr_orb.value_counts(dropna=False, ascending=True).reset_index()

In [None]:
# 'index' col it's tr_orb values
df_orbs.head().sort_values(by='index', ascending=False)

In [None]:
df_aspects.tr_orb = pd.to_numeric(df_aspects.tr_orb, errors='coerce')

In [None]:
df_aspects.tr_orb = df_aspects.tr_orb.map(lambda x: StatDataPrepare.fix_erorr_aspects_values(x))

##### Remove some parses and their planets roles.

In [None]:
main_points = ['Moon', 
               'Pars Fortuna', 'Pars Spirit', 
               'Pluto', 'Neptune', 'Uranus',  'Chiron', 
               'Asc', 'Desc',  'MC',  'IC',
               'Sun', 'Mercury', 'Venus', 'Mars', 'Jupiter', 'Saturn',  'North Node', 'South Node', 
               'Antes Moon', 
               'Antes Pars Fortuna', 'Antes Pars Spirit', 
               'Antes Pluto', 'Antes Neptune', 'Antes Uranus',  'Antes Chiron', 
               'Antes Sun', 'Antes Mercury', 'Antes Venus', 'Antes Mars', 'Antes Jupiter', 'Antes Saturn',  'Antes North Node', 'Antes South Node']

# Don't used
# main_roles = ['ruler_asc',  'ruler_desc',  'ruler_mc', 'ruler_ic',
#                'ruler_pars_fortuna',
#                'antes_ruler_asc', 'antes_ruler_desc', 'antes_ruler_mc', 'antes_ruler_ic',
#                'antes_ruler_pars_fortuna'] 

sec_roles  = ['ruler_pars_spirit', 'antes_ruler_pars_spirit']

In [None]:
df_main_asps = df_aspects[df_aspects.f_point.isin(main_points) & df_aspects.s_point.isin(main_points)].copy()
print('shape - ', df_main_asps.shape[0])

In [None]:
df_main_asps.reset_index(drop=True, inplace=True)

In [None]:
df_main_asps.fsec_ch = df_main_asps.fsec_ch.map(lambda x: ''.join([y for y in x if y in sec_roles]))
df_main_asps.ssec_ch = df_main_asps.ssec_ch.map(lambda x: ''.join([y for y in x if y in sec_roles]))

In [None]:
# Concatinate main and secondary roles
df_main_asps.fmain_ch = df_main_asps[['fmain_ch', 'fsec_ch']].apply(lambda x: x[0] + (x[1],) if x[1] != '' else x[0], axis=1)
df_main_asps.smain_ch = df_main_asps[['smain_ch', 'ssec_ch']].apply(lambda x: x[0] + (x[1],) if x[1] != '' else x[0], axis=1)

In [None]:
df_main_asps.drop(columns=['fsec_ch', 'ssec_ch'], inplace=True)

##### Rename values for min length

In [None]:
point_names = {'Sun':'Sn', 'Moon':'Mn', 'Mercury':'Me', 'Venus':'Vn', 'Mars':'Ms', 'Jupiter':'Ju', 'Saturn':'St', 'Chiron':'Ch', 'Uranus':'Ur', 'Neptune':'Np',
               'Pluto':'Pl', 'North Node':'NN', 'South Node':'SN', 'Asc':'H1', 'Desc':'H7', 'MC':'H10', 'IC':'H4', 'Pars Fortuna':'PF', 'Pars Spirit':'PS',
               'Antes Sun':'ASn', 'Antes Moon':'AMn', 'Antes Mercury':'AMe', 'Antes Venus':'AVn', 'Antes Mars':'AMs', 'Antes Jupiter':'AJu', 'Antes Saturn':'ASt', 
               'Antes Chiron':'ACh', 'Antes Uranus':'AUr', 'Antes Neptune':'ANp', 'Antes Pluto':'APl', 'Antes North Node':'ANN', 'Antes South Node':'ASN', 
               'Antes Pars Fortuna':'APF', 'Antes Pars Spirit':'APS'}

type_names  = {'Con':'c', 'Opp':'o', 'Sque':'q', 'Trin':'t', 'Sixt':'i'}

char_names  = {'ruler_asc':'V1', 'ruler_desc':'V7', 'ruler_mc':'V10', 'ruler_ic':'V4', 'ruler_pars_fortuna':'DPF', 'ruler_pars_spirit':'DPS',
              'antes_ruler_asc':'AV1', 'antes_ruler_desc':'AV7', 'antes_ruler_mc':'AV10', 'antes_ruler_ic':'AV4', 'antes_ruler_pars_fortuna':'ADPF',
              'antes_ruler_pars_spirit':'ADPS'}

In [None]:
df_main_asps.f_point = df_main_asps.f_point.map(lambda x: [v for k, v in point_names.items() if x == k][0])
df_main_asps.s_point = df_main_asps.s_point.map(lambda x: [v for k, v in point_names.items() if x == k][0])

df_main_asps.type = df_main_asps.type.map(lambda x: [v for k, v in type_names.items() if x == k][0])

In [None]:
df_main_asps.fmain_ch = df_main_asps.fmain_ch.map(lambda x: [v for k, v in char_names.items() for r in x if r == k])
df_main_asps.smain_ch = df_main_asps.smain_ch.map(lambda x: [v for k, v in char_names.items() for r in x if r == k])

In [None]:
# Concatinate two and more characteristic values in string
df_main_asps.fmain_ch = df_main_asps.fmain_ch.map(lambda x: '-'.join(x))
df_main_asps.smain_ch = df_main_asps.smain_ch.map(lambda x: '-'.join(x))

In [None]:
# Replace f_point and s_point to characteristic
df_main_asps['f_role'] = df_main_asps[['f_point', 'fmain_ch']].apply(lambda x: x[1] if x[1] != '' else x[0], axis=1)
df_main_asps['s_role'] = df_main_asps[['s_point', 'smain_ch']].apply(lambda x: x[1] if x[1] != '' else x[0], axis=1)

In [None]:
# Remove nodeterminate roles
nodet_points = ['Me', 'Vn', 'Ms', 'Ju', 'AMe', 'AVn', 'AMs', 'AJu']
remove_inds  = []

for nb in df_main_asps.index.to_list():
    if df_main_asps.iloc[nb].f_role in nodet_points or df_main_asps.iloc[nb].s_role in nodet_points:
        remove_inds.append(nb)

df_main_asps.drop(index=remove_inds, inplace=True)       
df_main_asps.reset_index(drop=True, inplace=True)        

In [None]:
# Remove Moon aspects with nodeterminate roles planets
nodet_points_mn = ['Sn', 'Me', 'Vn', 'Ms', 'Ju', 'St', 'ASn', 'AMe', 'AVn', 'AMs', 'AJu', 'ASt']
remove_inds_mn  = []

for nb in df_main_asps.index.to_list():
    if (df_main_asps.iloc[nb].f_role == 'Mn') and (df_main_asps.iloc[nb].s_role in nodet_points_mn):
        remove_inds_mn.append(nb)

df_main_asps.drop(index=remove_inds_mn, inplace=True)       
df_main_asps.reset_index(drop=True, inplace=True)        

In [None]:
print('shape - ', df_main_asps.shape[0])

In [None]:
# Create 'in' - 'out' for houses points aspect with their dispositors
houses_lt = ['H1', 'H7', 'H10', 'H4']
desps_lt  = ['V1', 'V7', 'V10', 'V4', 'AV1', 'AV7', 'AV10', 'AV4']

df_main_asps.s_role = df_main_asps[['f_role', 's_role', 'approach']].apply(lambda x: x[1] +'-'+ x[2][:2] if x[0] in houses_lt and any(val in x[1] 
                                                                                                           for val in desps_lt) else x[1], axis=1)

In [None]:
# Remove divergent aspects for Houses and parses dispositors and parses and their anteses
rmv_asps_lt = ['V1', 'V7', 'V10', 'V4', 'AV1', 'AV7', 'AV10', 'AV4', 'PF', 'PS', 'APF', 'APS', 'DPF', 'ADPF', 'DPS', 'ADPS']

diver_parts_asps = df_main_asps[['f_role', 's_role', 'approach']].apply(lambda x: x.name if any(val in x[0] for val in rmv_asps_lt) and any(val in x[1] for val in rmv_asps_lt)\
                                                           and x[2] == 'diver'else None, axis=1)

diver_parts_asps_inds = diver_parts_asps[~diver_parts_asps.isna()].index.to_list()
print('shape - ', df_main_asps.shape[0])
df_main_asps = df_main_asps[~df_main_asps.index.isin(diver_parts_asps_inds)].copy()
print('shape - ', df_main_asps.shape[0])
df_main_asps.reset_index(drop=True, inplace=True)

In [None]:
# Concatinate points and roles
df_main_asps.f_role = df_main_asps[['f_point', 'f_role']].apply(lambda x: '-'.join(x) if x[0] != x[1] else x[0], axis=1)
df_main_asps.s_role = df_main_asps[['s_point', 's_role']].apply(lambda x: '-'.join(x) if x[0] != x[1] else x[0], axis=1)

In [None]:
df_main_asps['fs_asps'] = df_main_asps[['f_role', 'type', 's_role']].apply(lambda x: '_'.join(x), axis=1)

In [None]:
df_main_asps['day_month_yr'] = df_main_asps.game_utc.map(lambda x: str(x)[:10])

In [None]:
fs_asps_count = df_main_asps.groupby('day_month_yr').fs_asps.value_counts().to_dict()

In [None]:
df_asps_count              = pd.DataFrame.from_dict(fs_asps_count.items())
df_asps_count['day_month_yr'] = [x[0] for x in df_asps_count[0]]
df_asps_count['fs_asps']   = [x[1] for x in df_asps_count[0]]

df_asps_count.rename(columns={1 :'cnt_asp_for_day'}, inplace=True)
df_asps_count.drop(columns=0, inplace=True)

In [None]:
df_asps_count.head()

In [None]:
df_main_asps = df_main_asps.merge(df_asps_count, how='left', left_on=['day_month_yr', 'fs_asps'], right_on=['day_month_yr', 'fs_asps'])

In [None]:
# Concatinate points and roles without planets names:
df_main_asps['f_role_wt_pls'] = df_main_asps.f_role.map(lambda x: re.sub(r'^(.*?)-', '', x) if x[:2] != 'Mn' else x)
df_main_asps['s_role_wt_pls'] = df_main_asps.s_role.map(lambda x: re.sub(r'^(.*?)-', '', x))

In [None]:
df_main_asps['fs_asps_wt_pls'] = df_main_asps[['f_role_wt_pls', 'type', 's_role_wt_pls']].apply(lambda x: '_'.join(x), axis=1)

In [None]:
fs_asps_count_wt_pls = df_main_asps.groupby('day_month_yr').fs_asps_wt_pls.value_counts().to_dict()

In [None]:
df_asps_cnt_wt_pls                   = pd.DataFrame.from_dict(fs_asps_count_wt_pls.items())
df_asps_cnt_wt_pls['day_month_yr']      = [x[0] for x in df_asps_cnt_wt_pls[0]]
df_asps_cnt_wt_pls['fs_asps_wt_pls'] = [x[1] for x in df_asps_cnt_wt_pls[0]]

df_asps_cnt_wt_pls.rename(columns={1 :'cnt_asp_wt_pls_for_day'}, inplace=True)
df_asps_cnt_wt_pls.drop(columns=0, inplace=True)

In [None]:
# [x for x in df_asps_cnt_wt_pls.fs_asps_wt_pls if x.startswith('Mn')]

In [None]:
df_asps_cnt_wt_pls.tail()

In [None]:
df_main_asps = df_main_asps.merge(df_asps_cnt_wt_pls, how='left', left_on=['day_month_yr', 'fs_asps_wt_pls'], right_on=['day_month_yr', 'fs_asps_wt_pls'])

In [None]:
# Remove 'games_id' without goals
sr_wt_goals = df_main_asps[['game_id', 'goals']].apply(lambda x: x[0] if re.findall('-', str(x[1])) else None, axis=1)
sr_wt_goals = sr_wt_goals[~sr_wt_goals.isna()]
print('goals_isna_game_ids -', sr_wt_goals)
df_main_asps = df_main_asps[~df_main_asps.game_id.isin(sr_wt_goals)].copy()
df_main_asps.reset_index(drop=True, inplace=True)

In [None]:
Stop ***************************************************************************************************************************

In [None]:
# df_main_asps['1'].value_counts(dropna=False)

In [None]:
df_main_asps.tail(1)

In [None]:
df_main_asps.game_utc = pd.to_datetime(df_main_asps.game_utc, format='%d.%m.%Y %H:%M')
df_main_asps          = df_main_asps.sort_values(by='game_utc')

In [None]:
# Create df for statistic research
# file = open('pickle_files/aspected_files/df_for_statistic_05_10_2023_to_27_05_2022', 'wb')
# pickle.dump(df_main_asps, file) 
# file.close()

### Statistic research

In [None]:
# Full start from this row = Anomaly period with a lot of losses "2021.04" to "2022.05" in top and sec leagues

In [28]:
with open('pickle_files/aspected_files/df_all_parses_for_statistic_05_10_2023_to_03_11_2013_w_moon_days', 'rb') as f:  
    df_main_asps = pickle.load(f)   

In [29]:
df_main_asps.head(1)

Unnamed: 0,game_id,ligue_header,season_id,game_utc,goals,ligue,actual_city,actual_country,f_team,s_team,pos_f,pos_s,pts_f,pts_s,1,X,2,un_2.5,ov_2.5,country,city,lat,lon,f_point,s_point,type,approach,sing,tr_orb,bp_asp,den_point,smain_ch,fmain_ch,f_role,s_role,fs_asps,day_month_yr,cnt_asp_for_day,f_role_wt_pls,s_role_wt_pls,fs_asps_wt_pls,cnt_asp_wt_pls_for_day,month_yr,moon_day,nakshatras
0,10934224,Torneo Federal A,38,2013-11-03 22:30:00,2:0,thr,,,Juventud Unida Universitario,Union Mar del Plata,2,8,38,28,,,,,,Argentina,Luis,37s56,57w47,Mn,ASn,q,moon_conv,equal,1.2,no,(),AV4,,Mn,ASn-AV4,Mn_q_ASn-AV4,2013-11-03,5,Mn,AV4,Mn_q_AV4,5,11.2013,0,


In [30]:
# df_main_asps.fs_asps_wt_pls.value_counts(dropna=False).head(100)

In [31]:
# with open('pickle_files/aspected_files/df_for_statistic_05_10_2023_to_03_11_2013_w_moon_days', 'rb') as f:
#     df_main_asps_1 = pickle.load(f)  
#     # df_main_asps = pickle.load(f)
    
# # For new data research    
# with open('pickle_files/aspected_files/df_for_statistic_01_02_2024_to_06_10_2023', 'rb') as f:
#     df_main_asps_2 = pickle.load(f)     
#     # df_main_asps = pickle.load(f)
    
# df_main_asps = pd.concat([df_main_asps_1, df_main_asps_2], ignore_index=True)    

In [32]:
df_main_asps['month_yr'] = df_main_asps.game_utc.map(lambda x: str(x)[5:7] + '.' + str(x)[0:4])

In [33]:
print('unique games -', len(df_main_asps.game_id.unique()))

unique games - 663804


In [34]:
# df_main_asps = df_main_asps[(df_main_asps.game_utc >= '2021.04') & (df_main_asps.game_utc <= '2022.04')].copy()

In [35]:
# Anomaly period with a lot of losses "2021.04" to "2022.05" in top and sec leagues
df_anomaly_per = df_main_asps[(df_main_asps.game_utc >= '2021.04') & (df_main_asps.game_utc <= '2022.05')].copy()

In [36]:
print('min date', min(df_anomaly_per.game_utc))
print('max date', max(df_anomaly_per.game_utc))

min date 2021-04-01 00:00:00
max date 2022-05-01 00:00:00


In [37]:
print('shape -', df_main_asps.shape[0])
df_main_asps = df_main_asps[~df_main_asps.game_id.isin(df_anomaly_per.game_id)].copy()
print('shape -', df_main_asps.shape[0])

shape - 7161449
shape - 6249505


In [38]:
# In 'Argentina' state more location errors 
print('shape -', df_main_asps.shape[0])
df_main_asps = df_main_asps[df_main_asps.country != 'Argentina'].copy()
print('shape -', df_main_asps.shape[0])

shape - 6249505
shape - 6107419


In [39]:
print('min date', min(df_main_asps.game_utc))
print('max date', max(df_main_asps.game_utc))

min date 2013-11-03 23:00:00
max date 2023-10-06 02:00:00


In [40]:
print('unique games -', len(df_main_asps.game_id.unique()))

unique games - 565619


In [41]:
df_main_asps.tail(1)

Unnamed: 0,game_id,ligue_header,season_id,game_utc,goals,ligue,actual_city,actual_country,f_team,s_team,pos_f,pos_s,pts_f,pts_s,1,X,2,un_2.5,ov_2.5,country,city,lat,lon,f_point,s_point,type,approach,sing,tr_orb,bp_asp,den_point,smain_ch,fmain_ch,f_role,s_role,fs_asps,day_month_yr,cnt_asp_for_day,f_role_wt_pls,s_role_wt_pls,fs_asps_wt_pls,cnt_asp_wt_pls_for_day,month_yr,moon_day,nakshatras
7161448,1989897,CONCACAF Central American Cup,Cup,2023-10-06 02:00:00,3:0,thr,,,Alajuelense,Cartagines,,,,,1.6,4.15,4.7,2.3,1.55,Costa Rica,Alajuela,10n0,84w12,Me,ANp,c,conv,equal,2.17,no,(),,DPS,Me-DPS,ANp,Me-DPS_c_ANp,2023-10-06,3,DPS,ANp,DPS_c_ANp,3,10.2023,21,


In [42]:
df_main_asps.day_month_yr.value_counts(dropna=False)[:10]

2019-09-07    8662
2019-09-08    8497
2023-07-15    8413
2015-03-08    8028
2019-10-13    7917
2016-10-30    7505
2015-02-08    7436
2014-09-06    7322
2020-01-12    7273
2015-09-05    7263
Name: day_month_yr, dtype: int64

In [43]:
df_main_asps['week_day'] = df_main_asps.game_utc.map(lambda x: x.weekday())
df_main_asps['day']      = df_main_asps.day_month_yr.map(lambda x: x[:2])

In [44]:
df_main_asps['goals_count'] = df_main_asps.goals.map(lambda x: int(x.split(':')[0]) + int(x.split(':')[1]))
df_main_asps.goals_count.value_counts(dropna=False)

2     1402436
3     1309055
1     1100799
4      873291
5      512232
0      473512
6      250508
7      110997
8       44586
9       17973
10       6198
11       2832
12       1171
13        774
14        299
15        210
16        138
17         83
18         63
19         63
20         36
21         33
22         30
24         20
23         16
35         14
32         14
41         11
28          9
26          8
30          8
Name: goals_count, dtype: int64

#### Moon aspects calculate

In [45]:
# # Count Moon aspects per game for all planets, parses and their anteses
# def count_Mn(arr):
#     return (arr == 'Mn').sum()

# gb_mn_count = df_main_asps.groupby('game_id').agg({'f_point': count_Mn}).reset_index()
# gb_mn_count.rename(columns={'f_point':'count_Mn'}, inplace=True)
# gb_mn_count.tail(3)

In [46]:
# # Count main Moon aspects per game for V-planets, parses and their anteses and dispositors
# main_vals = ['V1', 'V4', 'V7', 'V10', 'PF', 'PS'] # anteses must be find to

# gb_mn_main_count = df_main_asps[df_main_asps.s_role_wt_pls.str.contains('|'.join(main_vals))].groupby('game_id').agg({'f_point': count_Mn}).reset_index()
# gb_mn_main_count.rename(columns={'f_point':'count_Mn_main'}, inplace=True)
# gb_mn_main_count.tail(3)

In [47]:
# # Count houses Moon aspects per game 
# house_vals = ['H1', 'H4', 'H7', 'H10'] 

# gb_mn_houses_count = df_main_asps[df_main_asps.f_point.str.contains('|'.join(house_vals))].groupby('game_id').agg({'s_point': count_Mn}).reset_index()
# gb_mn_houses_count.rename(columns={'s_point':'count_Mn_houses'}, inplace=True)
# gb_mn_houses_count.count_Mn_houses.value_counts()

In [48]:
# print('shape -', df_main_asps.shape[0])
# df_main_asps = df_main_asps.merge(gb_mn_count, how='left', left_on='game_id', right_on='game_id')
# df_main_asps = df_main_asps.merge(gb_mn_main_count, how='left', left_on='game_id', right_on='game_id')
# df_main_asps = df_main_asps.merge(gb_mn_houses_count, how='left', left_on='game_id', right_on='game_id')
# print('shape -', df_main_asps.shape[0])
# df_main_asps.tail(1)

In [49]:
# df_main_asps['all_count_Mn']           = df_main_asps.count_Mn + df_main_asps.count_Mn_houses
# df_main_asps['count_Mn_main_w_houses'] = df_main_asps.count_Mn_main + df_main_asps.count_Mn_houses

##### Correlation calculate

In [50]:
# col_1 = 'goals_count'
# col_2 = 'all_count_Mn' # 'count_Mn', 'count_Mn_main', 'count_Mn_houses', 'all_count_Mn', 'count_Mn_main_w_houses'

# pearson_corr  = df_main_asps[[col_1, col_2]].corr(method='pearson')
# kendall_corr  = df_main_asps[[col_1, col_2]].corr(method='kendall')
# spearman_corr = df_main_asps[[col_1, col_2]].corr(method='spearman')

#### ***************************************************************************************************************

In [51]:
df_main_asps['1'] = pd.to_numeric(df_main_asps['1'], errors='coerce')
df_main_asps['2'] = pd.to_numeric(df_main_asps['2'], errors='coerce')
df_main_asps['X'] = pd.to_numeric(df_main_asps['X'], errors='coerce')
df_main_asps['un_2.5'] = pd.to_numeric(df_main_asps['un_2.5'], errors='coerce')
df_main_asps['ov_2.5'] = pd.to_numeric(df_main_asps['ov_2.5'], errors='coerce')

df_main_asps.tr_orb = pd.to_numeric(df_main_asps.tr_orb, errors='coerce')

In [52]:
# df_main_asps.ligue.value_counts(dropna=False)

In [53]:
# Remove some ligues 
print('shape -', df_main_asps.shape[0])
# ligues = ['top',] # 'top', 'sec', 'thr'
# df_main_asps = df_main_asps[df_main_asps.ligue.isin(ligues)].copy() 
df_main_asps = df_main_asps[~df_main_asps['un_2.5'].map(lambda x: np.isnan(x))].copy()
print('shape -', df_main_asps.shape[0])  
df_main_asps.reset_index(drop=True, inplace=True)
df_main_asps.tail(1)

shape - 6107419
shape - 3128859


Unnamed: 0,game_id,ligue_header,season_id,game_utc,goals,ligue,actual_city,actual_country,f_team,s_team,pos_f,pos_s,pts_f,pts_s,1,X,2,un_2.5,ov_2.5,country,city,lat,lon,f_point,s_point,type,approach,sing,tr_orb,bp_asp,den_point,smain_ch,fmain_ch,f_role,s_role,fs_asps,day_month_yr,cnt_asp_for_day,f_role_wt_pls,s_role_wt_pls,fs_asps_wt_pls,cnt_asp_wt_pls_for_day,month_yr,moon_day,nakshatras,week_day,day,goals_count
3128858,1989897,CONCACAF Central American Cup,Cup,2023-10-06 02:00:00,3:0,thr,,,Alajuelense,Cartagines,,,,,1.6,4.15,4.7,2.3,1.55,Costa Rica,Alajuela,10n0,84w12,Me,ANp,c,conv,equal,2.17,no,(),,DPS,Me-DPS,ANp,Me-DPS_c_ANp,2023-10-06,3,DPS,ANp,DPS_c_ANp,3,10.2023,21,,4,20,3


In [54]:
# Remove friendly games 
print('shape -', df_main_asps.shape[0])
df_main_asps = df_main_asps[df_main_asps.ligue_header != 'Friendly'].copy()
# df_main_asps = df_main_asps[~df_main_asps['un_2.5'].map(lambda x: np.isnan(x))].copy()
print('shape -', df_main_asps.shape[0])  
df_main_asps.reset_index(drop=True, inplace=True)
df_main_asps.tail(1)

shape - 3128859
shape - 3054806


Unnamed: 0,game_id,ligue_header,season_id,game_utc,goals,ligue,actual_city,actual_country,f_team,s_team,pos_f,pos_s,pts_f,pts_s,1,X,2,un_2.5,ov_2.5,country,city,lat,lon,f_point,s_point,type,approach,sing,tr_orb,bp_asp,den_point,smain_ch,fmain_ch,f_role,s_role,fs_asps,day_month_yr,cnt_asp_for_day,f_role_wt_pls,s_role_wt_pls,fs_asps_wt_pls,cnt_asp_wt_pls_for_day,month_yr,moon_day,nakshatras,week_day,day,goals_count
3054805,1989897,CONCACAF Central American Cup,Cup,2023-10-06 02:00:00,3:0,thr,,,Alajuelense,Cartagines,,,,,1.6,4.15,4.7,2.3,1.55,Costa Rica,Alajuela,10n0,84w12,Me,ANp,c,conv,equal,2.17,no,(),,DPS,Me-DPS,ANp,Me-DPS_c_ANp,2023-10-06,3,DPS,ANp,DPS_c_ANp,3,10.2023,21,,4,20,3


In [55]:
df_main_asps.ligue_header.value_counts(dropna=False).head()

Premier League      207709
Regionalliga         69504
Super League         68459
Serie C              67227
Primera División     66617
Name: ligue_header, dtype: int64

In [56]:
# Remove games without bets coeff
print('shape -', df_main_asps.shape[0])
df_main_asps = df_main_asps[~df_main_asps['1'].map(lambda x: np.isnan(x))].copy()
# df_main_asps = df_main_asps[~df_main_asps['un_2.5'].map(lambda x: np.isnan(x))].copy()
print('shape -', df_main_asps.shape[0])  
df_main_asps.reset_index(drop=True, inplace=True)
df_main_asps.tail(1)

shape - 3054806
shape - 3054754


Unnamed: 0,game_id,ligue_header,season_id,game_utc,goals,ligue,actual_city,actual_country,f_team,s_team,pos_f,pos_s,pts_f,pts_s,1,X,2,un_2.5,ov_2.5,country,city,lat,lon,f_point,s_point,type,approach,sing,tr_orb,bp_asp,den_point,smain_ch,fmain_ch,f_role,s_role,fs_asps,day_month_yr,cnt_asp_for_day,f_role_wt_pls,s_role_wt_pls,fs_asps_wt_pls,cnt_asp_wt_pls_for_day,month_yr,moon_day,nakshatras,week_day,day,goals_count
3054753,1989897,CONCACAF Central American Cup,Cup,2023-10-06 02:00:00,3:0,thr,,,Alajuelense,Cartagines,,,,,1.6,4.15,4.7,2.3,1.55,Costa Rica,Alajuela,10n0,84w12,Me,ANp,c,conv,equal,2.17,no,(),,DPS,Me-DPS,ANp,Me-DPS_c_ANp,2023-10-06,3,DPS,ANp,DPS_c_ANp,3,10.2023,21,,4,20,3


#### Moon days calculate

In [57]:
# df_main_asps['num_lat'] = df_main_asps.lat.map(lambda x: MoonDaysCalculate.transform_lat_lon_to_nimeric(x))
# df_main_asps['num_lon'] = df_main_asps.lon.map(lambda x: MoonDaysCalculate.transform_lat_lon_to_nimeric(x))

In [58]:
# # lat or lon format:  (-1, 7, 0)
# moon_lat = df_main_asps.num_lat.map(lambda x: ( int(re.split('\.', str(x))[0]), int(re.split('\.', str(x))[1]), 0 ))
# moon_lon = df_main_asps.num_lon.map(lambda x: ( int(re.split('\.', str(x))[0]), int(re.split('\.', str(x))[1]), 0 ))
# moon_lon[:3]

In [59]:
# # date_time: (2022, 9, 16, 19, 0, 0)
# moon_date_splt = df_main_asps.game_utc.map(lambda x: ( int(re.split('[- :]', str(x))[0]), int(re.split('[- :]', str(x))[1]), int(re.split('[- :]', str(x))[2]), 
#                                                        int(re.split('[- :]', str(x))[3]), int(re.split('[- :]', str(x))[4]), int(re.split('[- :]', str(x))[5]) ))
# moon_date_splt[:3]

In [60]:
# df_moon_days = pd.DataFrame({'moon_lat': moon_lat, 'moon_lon': moon_lon, 'moon_date_splt': moon_date_splt})
# df_moon_days.head(3)

In [61]:
# df_moon_days['moon_day'] = df_moon_days.apply(lambda x: MoonDaysCalculate.moon_day_calculate(lat=x[0], lon=x[1], date_time=x[2]), axis=1)
# print('shape - ', df_moon_days.shape[0])
# df_moon_days.head(3)

In [62]:
# df_main_asps['moon_day'] = df_moon_days.moon_day

In [63]:
# df_main_asps.moon_day.value_counts(dropna=False)[:3]

In [64]:
# df_main_asps.drop(columns=['num_lat', 'num_lon'], inplace=True)

In [65]:
# file = open('pickle_files/aspected_files/df_for_statistic_05_10_2023_to_03_11_2013_w_moon_days', 'wb')
# pickle.dump(df_main_asps, file)  
# file.close()

#### Count goals research

In [66]:
# df_main_asps['bets_result_un_2.5'] = df_main_asps[['goals_count', 'un_2.5']].apply(lambda x: x[1] if x[0] < 3 else 0, axis=1)
# df_main_asps['bets_result_ov_2.5'] = df_main_asps[['goals_count', 'ov_2.5']].apply(lambda x: x[1] if x[0] > 2 else 0, axis=1)

In [67]:
# df_unique_games = df_main_asps.drop_duplicates(subset='game_id', keep='first')
# print('shape -', df_unique_games.shape[0])  
# df_unique_games.reset_index(drop=True, inplace=True)
# df_unique_games.tail(1)

In [68]:
# df_unique_games[df_unique_games['bets_result_ov_2.5'].isna()].shape[0]

In [69]:
# df_unique_games[df_unique_games['bets_result_un_2.5'].isna()].shape[0]

In [70]:
# print('shape -', df_unique_games.shape[0])
# df_unique_games = df_unique_games[(~df_unique_games['bets_result_un_2.5'].isna()) & (~df_unique_games['bets_result_ov_2.5'].isna())]
# print('shape -', df_unique_games.shape[0])  
# df_unique_games.reset_index(drop=True, inplace=True)
# df_unique_games.tail(1)

In [71]:
# print('shape -', df_main_asps.shape[0])
# df_main_asps = df_main_asps[(~df_main_asps['bets_result_un_2.5'].isna()) & (~df_main_asps['bets_result_ov_2.5'].isna())]
# print('shape -', df_main_asps.shape[0])  
# df_main_asps.reset_index(drop=True, inplace=True)
# df_main_asps.tail(1)

In [72]:
# # Remove some asoects
# remove_aspects = ['Ch_o_ANp', 'Pl_o_ANN', 'SN_c_APl', 'St_c_APl', 'NN_c_AUr', 'Ur_o_ASN', 'Np_o_AUr', 'Np_c_SN', 'SN_c_ANN', 'Ur_c_NN', 'NN_c_ACh', 'Pl_c_SN', 'Ch_o_ASN', 'Ch_c_SN ', 'Ch_o_AUr']
# print('shape -', df_main_asps.shape[0])
# df_main_asps = df_main_asps[~df_main_asps.fs_asps_wt_pls.isin(remove_aspects)]
# df_main_asps.reset_index(drop=True, inplace=True)
# print('shape -', df_main_asps.shape[0])

In [73]:
# print('games under 2.5 goals     - ', df_unique_games[df_unique_games.goals_count < 3].goals_count.count())
# print('mean bets value under 2.5 - ', round(df_unique_games['un_2.5'].mean(), 3))
# print('sum bets result under 2.5 - ', round(df_unique_games['bets_result_un_2.5'].sum(), 3))

In [74]:
# print('games over 2.5 goals     - ', df_unique_games[df_unique_games.goals_count > 2].goals_count.count())
# print('mean bets value over 2.5 - ', round(df_unique_games['ov_2.5'].mean(), 3))
# print('sum bets result over 2.5 - ', round(df_unique_games['bets_result_ov_2.5'].sum(), 3))

In [75]:
# # Remove some valuable orbs
# print('shape -', df_main_asps.shape[0])
# df_main_asps = df_main_asps[df_main_asps.tr_orb <= 1]
# print('shape -', df_main_asps.shape[0])  
# df_main_asps.reset_index(drop=True, inplace=True)
# df_main_asps.tail(1)

In [76]:
# col_mn = 'count_Mn_main' # 'count_Mn', 'count_Mn_main', 'count_Mn_houses', 'all_count_Mn', 'count_Mn_main_w_houses'

# df_mn_count = df_main_asps[~df_main_asps[col_mn].isna()].copy()
# df_mn_count.reset_index(drop=True, inplace=True)
# df_mn_count.tail(2)

In [77]:
# df_mn_count.drop_duplicates(subset='game_id', keep='first', inplace=True)
# df_mn_count[col_mn].value_counts(dropna=False)[:3]

In [78]:
# df_nakshatras = df_main_asps[~df_main_asps.nakshatras.isna()].copy()
# df_nakshatras.reset_index(drop=True, inplace=True)
# df_nakshatras.tail(2)

In [79]:
# df_nakshatras.drop_duplicates(subset='game_id', keep='first', inplace=True)
# df_nakshatras.nakshatras.value_counts(dropna=False)[:3]

In [80]:
# col_names     = [col_mn] # 'fs_asps', 'fs_asps_wt_pls', 'moon_day', 'day',  'week_day', 'nakshatras', col_mn
# df_stat_goals = df_mn_count.groupby(col_names).agg({'game_id':'nunique', 'bets_result_un_2.5':'sum', 'bets_result_ov_2.5':'sum'}) # df_main_asps, df_unique_games, df_nakshatras, df_mn_count 

# df_stat_goals['profit_un_2.5'] = round(df_stat_goals['bets_result_un_2.5'] / df_stat_goals.game_id, 2)
# df_stat_goals['profit_ov_2.5'] = round(df_stat_goals['bets_result_ov_2.5'] / df_stat_goals.game_id, 2)

In [81]:
# df_stat_goals[(df_stat_goals.game_id > 300)].sort_values(by='profit_un_2.5', ascending=False).head()

In [82]:
# df_stat_goals[(df_stat_goals.game_id > 300)].sort_values(by='profit_ov_2.5', ascending=False).head()

In [83]:
# first_asp_lt = df_main_asps[(df_main_asps.fs_asps_wt_pls == 'V7-DPS_c_ANp')].game_id.to_list()
# len(first_asp_lt)

In [84]:
# sec_asp_lt = df_main_asps[(df_main_asps.fs_asps_wt_pls == 'Pl_o_DPF')].game_id.to_list()
# len(sec_asp_lt)

In [85]:
# both_asps_lt_1 = [x for x in sec_asp_lt if x in first_asp_lt]
# both_asps_lt_2 = [y for y in first_asp_lt if y in sec_asp_lt]
# both_asps_lt = list(set(both_asps_lt_1 + both_asps_lt_2))
# len(both_asps_lt)

In [86]:
# df_main_asps[df_main_asps.game_id.isin(both_asps_lt)]#.game_id.unique()

In [87]:
# df_main_asps[df_main_asps.game_id.isin(both_asps_lt)]['bets_result_un_2.5']

In [88]:
# df_main_asps[df_main_asps.game_id.isin(both_asps_lt)]['bets_result_un_2.5'].mean()

In [89]:
# df_main_asps[df_main_asps.game_id.isin(both_asps_lt)]['bets_result_ov_2.5'].mean()

In [90]:
# col_names     = ['fs_asps', 'cnt_asp_for_day', 'fs_asps_wt_pls', 'cnt_asp_wt_pls_for_day'][2:3]
# col_names     = ['fs_asps_wt_pls', 'month_yr']
# df_stat_goals = df_main_asps.groupby(col_names).agg({'game_id':'nunique', 'bets_result_un_2.5':'sum', 'bets_result_ov_2.5':'sum'})

# df_stat_goals['profit_un_2.5'] = round(df_stat_goals['bets_result_un_2.5'] / df_stat_goals.game_id, 2)
# df_stat_goals['profit_ov_2.5'] = round(df_stat_goals['bets_result_ov_2.5'] / df_stat_goals.game_id, 2)

In [91]:
# df_stat_goals[(df_stat_goals.game_id > 1500)].sort_values(by='profit_ov_2.5', ascending=False).head()

In [92]:
# col = 'V7-DPS_c_ANp'
# df_stat_goals[df_stat_goals.index.isin([col], level=0)]#.game_id.count()

In [93]:
# col_prof = ['profit_un_2.5', 'profit_ov_2.5'][0]

# df_stat_goals[df_stat_goals.index.isin([col], level=0)][col_prof].mean()

In [94]:
# df_stat_goals[df_stat_goals.index.isin([col], level=0)][col_prof].count()

In [95]:
# df_stat_goals[df_stat_goals.index.isin([col], level=0) & (df_stat_goals[col_prof] >= 1)][col_prof].count()

In [96]:
# df_stat_goals[df_stat_goals.index.isin([col], level=0) & (df_stat_goals[col_prof] >= 1)][col_prof].mean()

In [97]:
# df_stat_goals[df_stat_goals.index.isin([col], level=0) & (df_stat_goals[col_prof] <= 1)][col_prof].count()

In [98]:
# df_stat_goals[df_stat_goals.index.isin([col], level=0) & (df_stat_goals[col_prof] <= 1)][col_prof].mean()

In [99]:
# Stop

#### ******************************************************************************************************

In [100]:
# Remove games with error coeff (more value than 60)
max_bet = 35

print('shape -', df_main_asps.shape[0])
df_main_asps = df_main_asps[df_main_asps[['1', '2']].apply(lambda x: x[0] < max_bet and x[1] < max_bet, axis=1)].copy()
print('shape -', df_main_asps.shape[0])  
df_main_asps.reset_index(drop=True, inplace=True)
df_main_asps.tail(1)

shape - 3054754
shape - 3045051


Unnamed: 0,game_id,ligue_header,season_id,game_utc,goals,ligue,actual_city,actual_country,f_team,s_team,pos_f,pos_s,pts_f,pts_s,1,X,2,un_2.5,ov_2.5,country,city,lat,lon,f_point,s_point,type,approach,sing,tr_orb,bp_asp,den_point,smain_ch,fmain_ch,f_role,s_role,fs_asps,day_month_yr,cnt_asp_for_day,f_role_wt_pls,s_role_wt_pls,fs_asps_wt_pls,cnt_asp_wt_pls_for_day,month_yr,moon_day,nakshatras,week_day,day,goals_count
3045050,1989897,CONCACAF Central American Cup,Cup,2023-10-06 02:00:00,3:0,thr,,,Alajuelense,Cartagines,,,,,1.6,4.15,4.7,2.3,1.55,Costa Rica,Alajuela,10n0,84w12,Me,ANp,c,conv,equal,2.17,no,(),,DPS,Me-DPS,ANp,Me-DPS_c_ANp,2023-10-06,3,DPS,ANp,DPS_c_ANp,3,10.2023,21,,4,20,3


In [101]:
print('max 1 - ', max(df_main_asps['1']))
print('max 2 - ', max(df_main_asps['2']))

max 1 -  34.0
max 2 -  34.0


In [102]:
# Determinate Fav - Pre roles and result:
fav_coef = 0.5 # 0.5 ######################################################################################################################################################################################################################################################### 
# df_main_asps['host_role'] = df_main_asps[['1', '2']].apply(lambda x: ParsingDataPrepare.roles_determinate([x[0], x[1]], fav_coef, 'bets_sum_point'), axis=1)

# All favorits aspects for hosts
df_main_asps['host_role'] = 'Fav'
df_main_asps['result']    = df_main_asps[['host_role', 'goals']].apply(lambda x: StatDataPrepare.get_results(x[0],int(re.sub(':.*', '',x[1])),int(re.sub('.*:','',x[1]))), axis=1)

#### Fav goals calculate

In [103]:
df_main_asps['fav_goals'] = df_main_asps[['host_role', 'goals']].apply(lambda x: StatDataPrepare.fav_goals_calculate(x[0], x[1]), axis=1)

#### Calculate statistic for count aspects

In [104]:
df_unique_game_id = df_main_asps.drop_duplicates(subset='game_id', keep='first')
print('shape -', df_unique_game_id.shape[0])
df_unique_game_id.head(1)

shape - 283231


Unnamed: 0,game_id,ligue_header,season_id,game_utc,goals,ligue,actual_city,actual_country,f_team,s_team,pos_f,pos_s,pts_f,pts_s,1,X,2,un_2.5,ov_2.5,country,city,lat,lon,f_point,s_point,type,approach,sing,tr_orb,bp_asp,den_point,smain_ch,fmain_ch,f_role,s_role,fs_asps,day_month_yr,cnt_asp_for_day,f_role_wt_pls,s_role_wt_pls,fs_asps_wt_pls,cnt_asp_wt_pls_for_day,month_yr,moon_day,nakshatras,week_day,day,goals_count,host_role,result,fav_goals
0,10934817,1. Lig,38,2013-11-07 16:00:00,0:1,sec,Gaziantep,Turkey,Gaziantep BB,Mersin İdmanyurdu,14,6,41,60,3.1,3.15,2.4,1.1,6.5,Turkey,Gaziantep,37n3,3.7e+23,Mn,APS,t,moon_conv,equal,0.39,no,(),,,Mn,APS,Mn_t_APS,2013-11-07,7,Mn,APS,Mn_t_APS,7,11.2013,4,,3,20,1,Fav,fav_loss,-1


In [105]:
df_unique_game_id.host_role.value_counts(dropna=False)

Fav    283231
Name: host_role, dtype: int64

In [106]:
# Keep Fav - Pre host_roles
print('shape - ', df_main_asps.shape[0])
df_main_asps = df_main_asps[df_main_asps.host_role.isin(['Fav', 'Pre'])].copy()
df_main_asps.reset_index(drop=True, inplace=True)
print('shape - ', df_main_asps.shape[0])

shape -  3045051
shape -  3045051


In [107]:
df_main_asps.host_role.value_counts(dropna=False)

Fav    3045051
Name: host_role, dtype: int64

In [108]:
df_main_asps.tail(1)

Unnamed: 0,game_id,ligue_header,season_id,game_utc,goals,ligue,actual_city,actual_country,f_team,s_team,pos_f,pos_s,pts_f,pts_s,1,X,2,un_2.5,ov_2.5,country,city,lat,lon,f_point,s_point,type,approach,sing,tr_orb,bp_asp,den_point,smain_ch,fmain_ch,f_role,s_role,fs_asps,day_month_yr,cnt_asp_for_day,f_role_wt_pls,s_role_wt_pls,fs_asps_wt_pls,cnt_asp_wt_pls_for_day,month_yr,moon_day,nakshatras,week_day,day,goals_count,host_role,result,fav_goals
3045050,1989897,CONCACAF Central American Cup,Cup,2023-10-06 02:00:00,3:0,thr,,,Alajuelense,Cartagines,,,,,1.6,4.15,4.7,2.3,1.55,Costa Rica,Alajuela,10n0,84w12,Me,ANp,c,conv,equal,2.17,no,(),,DPS,Me-DPS,ANp,Me-DPS_c_ANp,2023-10-06,3,DPS,ANp,DPS_c_ANp,3,10.2023,21,,4,20,3,Fav,fav_win,3


In [109]:
# Remove all game with Moon V1-V7 roles, change all extra roles to 'Mn'

remove_mn_lt = list(set(df_main_asps[df_main_asps.f_role.isin(['Mn-V1', 'Mn-V7', 'Mn-V1-DPS'])].game_id.tolist()))
print('len -', len(remove_mn_lt))
print(remove_mn_lt[:3])

print('sahpe - ', df_main_asps.shape[0])
df_main_asps = df_main_asps[~df_main_asps.game_id.isin(remove_mn_lt)].reset_index(drop=True).copy()
print('sahpe - ', df_main_asps.shape[0])

df_main_asps.f_role_wt_pls = df_main_asps.f_role_wt_pls.map(lambda x:  'Mn' if ''.join(re.findall('Mn-', str(x))) == 'Mn-' else x)

len - 3223
['14141486', '11917972', '14154287']
sahpe -  3045051
sahpe -  3009284


In [110]:
# Remove multiroles desccription such as V7-V10-DPF change it on V7 and other 

find_values_lt = ['AV1-', 'V1-', 'AV7-', 'V7-', 'ADPF', 'DPF', 'ADPS', 'DPS', 'AV10-', 'V10-', 'AV4-', 'V4-'] # subsequence is matter

for find_value in find_values_lt:
    
    changed_value = re.sub('-', '', find_value)
    
    df_main_asps.f_role_wt_pls = df_main_asps.f_role_wt_pls.map(lambda x:  changed_value if ''.join(re.findall(find_value, str(x))) == find_value else x)
    df_main_asps.s_role_wt_pls = df_main_asps.s_role_wt_pls.map(lambda x:  changed_value if ''.join(re.findall(find_value, str(x))) == find_value else x)

In [111]:
# Create 'fs_asps_wt_pls' clumn witht new parameters
df_main_asps.fs_asps_wt_pls = df_main_asps[['f_role_wt_pls', 'type', 's_role_wt_pls']].apply(lambda x: str(x[0]) +'_'+ str(x[1]) +'_'+ str(x[2]), axis=1) 

In [112]:
# Remove some aspects 
remove_aspects = ['Ch_o_ANp', 'Pl_o_ANN', 'SN_c_APl', 'St_c_APl', 'St_c_Pl', 'St_o_AUr', 'NN_c_AUr', 'Mn_t_St', 'Mn_o_St', 'Ur_o_ASN', 'Np_o_AUr', 'Np_c_SN', 'SN_c_ANN', 'Ur_c_NN', 'NN_c_ACh', 'Pl_c_SN', 'Ch_o_ASN', 
                  'Ch_c_SN ', 'Ch_o_AUr', 'Ch_c_SN', 
                  'Sn_o_Ch', 'Sn_c_ANp', 'Sn_c_ACh', 'Sn_c_Ur', 'Sn_c_APl', 'Sn_o_Np', 'Sn_c_AUr', 'Sn_o_AUr', 'Sn_o_Ur', 'Sn_c_Np', 'Sn_o_ANp', 'Sn_c_Ch', 'Sn_o_APl', 'Sn_o_Pl', 'Sn_o_ACh', 'Sn_o_St', 'Sn_c_Pl', 'Sn_c_St',
                  'H7_c_Sn', 'H10_c_Sn', 'Mn_t_Sn', 'Mn_q_Sn', 'Mn_i_Sn', 'H4_c_Sn']
print('shape -', df_main_asps.shape[0])
df_main_asps = df_main_asps[~df_main_asps.fs_asps_wt_pls.isin(remove_aspects)]
df_main_asps.reset_index(drop=True, inplace=True)
print('shape -', df_main_asps.shape[0])

shape - 3009284
shape - 2883433


In [113]:
# Remove some aspects 
points = ['NN', 'SN', 'ANN', 'ASN']
hightplanets_lt = ['Pl', 'Np', 'Ch', 'Ur', 'APl', 'ANp', 'ACh', 'AUr']

remove_moon_nodes_lt = list(set(df_main_asps[(df_main_asps.f_role.isin(['Mn'])) & (df_main_asps.s_point.isin(points))].fs_asps_wt_pls.to_list()))
print('remove_moon_nodes_lt -', remove_moon_nodes_lt[:5], '       len -', len(remove_moon_nodes_lt))
remove_sat_nodes_lt = list(set(df_main_asps[(df_main_asps.f_role.isin(['St'])) & (df_main_asps.s_point.isin(points))].fs_asps_wt_pls.to_list()))
print('remove_sat_nodes_lt -', remove_sat_nodes_lt[:5], '    len -', len(remove_sat_nodes_lt))
remove_sun_nodes_lt = list(set(df_main_asps[(df_main_asps.f_role.isin(['Sn'])) & (df_main_asps.s_point.isin(points))].fs_asps_wt_pls.to_list()))
print('remove_sun_nodes_lt -', remove_sun_nodes_lt[:5], '    len -', len(remove_sun_nodes_lt))
remove_houses_nodes_lt = list(set(df_main_asps[(df_main_asps.f_role.isin(['H1', 'H4', 'H7', 'H10'])) & (df_main_asps.s_point.isin(points))].fs_asps_wt_pls.to_list()))
print('remove_houses_nodes_lt -', remove_houses_nodes_lt[:5], '    len -', len(remove_houses_nodes_lt))
remove_hightplanets_nodes_lt = list(set(df_main_asps[(df_main_asps.f_role.isin(hightplanets_lt)) & (df_main_asps.s_point.isin(points))].fs_asps_wt_pls.to_list()))
print('remove_hightplanets_nodes_lt -', remove_hightplanets_nodes_lt[:5], '    len -', len(remove_hightplanets_nodes_lt))
remove_sec_role_hightplanets_lt = list(set(df_main_asps[(df_main_asps.f_role.isin(points)) & (df_main_asps.s_point.isin(hightplanets_lt))].fs_asps_wt_pls.to_list()))
print('remove_sec_role_hightplanets_lt -', remove_sec_role_hightplanets_lt[:5], '    len -', len(remove_sec_role_hightplanets_lt))

remove_aspects_2 = ['SN_c_ANN'] + remove_moon_nodes_lt + remove_sat_nodes_lt + remove_sun_nodes_lt + remove_houses_nodes_lt + remove_hightplanets_nodes_lt + \
                   remove_sec_role_hightplanets_lt

print('shape -', df_main_asps.shape[0])
df_main_asps = df_main_asps[~df_main_asps.fs_asps_wt_pls.isin(remove_aspects_2)].reset_index(drop=True).copy()
print('shape -', df_main_asps.shape[0])

remove_moon_nodes_lt - ['Mn_c_ANN', 'Mn_q_ASN', 'Mn_o_ANN', 'Mn_q_ANN', 'Mn_c_ASN']        len - 10
remove_sat_nodes_lt - ['St_c_ASN', 'St_o_ANN']     len - 2
remove_sun_nodes_lt - ['Sn_o_ASN', 'Sn_c_ASN', 'Sn_c_ANN', 'Sn_o_ANN', 'Sn_c_NN']     len - 5
remove_houses_nodes_lt - ['H1_c_SN', 'H4_c_NN', 'H10_c_SN', 'H1_c_NN', 'H1_c_ASN']     len - 16
remove_hightplanets_nodes_lt - ['Np_o_ASN']     len - 1
remove_sec_role_hightplanets_lt - ['SN_c_Np', 'SN_c_Ur', 'SN_c_Pl', 'ANN_c_Np', 'NN_c_Ur']     len - 6
shape - 2883433
shape - 2687438


In [114]:
# Remove opposition aspects if connection exist
remove_opp_lt = list()

for asp in list(set(df_main_asps.fs_asps_wt_pls.values)):
    node_lt  = ['NN', 'SN', 'ANN', 'ASN']
    asp_char = asp.split('_')[1]
    
    if (asp_char == 'c') and (asp.split('_')[2] in node_lt):
        find_opp = asp.split('_')[0] +'_o_'+ asp.split('_')[2]
        
        if find_opp in df_main_asps.fs_asps_wt_pls.values:  
            remove_opp_lt.append(find_opp)
        else:
            None
            
remove_opp_lt = list(set(remove_opp_lt))   
# print(str(remove_opp_lt))
remove_opp_lt_total = ['V4_o_ANN', 'V7_o_ASN', 'V1_o_ANN', 'V10_o_ASN', 'V10_o_ANN', 'V4_o_ASN', 'PS_o_ASN', 'V1_o_ASN', 'V7_o_ANN', 'PS_o_ANN']

print('shape -', df_main_asps.shape[0])
df_main_asps = df_main_asps[~df_main_asps.fs_asps_wt_pls.isin(remove_opp_lt_total)].reset_index(drop=True).copy()
print('shape -', df_main_asps.shape[0])

shape - 2687438
shape - 2651273


In [115]:
df_main_asps.tail(1)

Unnamed: 0,game_id,ligue_header,season_id,game_utc,goals,ligue,actual_city,actual_country,f_team,s_team,pos_f,pos_s,pts_f,pts_s,1,X,2,un_2.5,ov_2.5,country,city,lat,lon,f_point,s_point,type,approach,sing,tr_orb,bp_asp,den_point,smain_ch,fmain_ch,f_role,s_role,fs_asps,day_month_yr,cnt_asp_for_day,f_role_wt_pls,s_role_wt_pls,fs_asps_wt_pls,cnt_asp_wt_pls_for_day,month_yr,moon_day,nakshatras,week_day,day,goals_count,host_role,result,fav_goals
2651272,1989897,CONCACAF Central American Cup,Cup,2023-10-06 02:00:00,3:0,thr,,,Alajuelense,Cartagines,,,,,1.6,4.15,4.7,2.3,1.55,Costa Rica,Alajuela,10n0,84w12,Me,ANp,c,conv,equal,2.17,no,(),,DPS,Me-DPS,ANp,Me-DPS_c_ANp,2023-10-06,3,DPS,ANp,DPS_c_ANp,3,10.2023,21,,4,20,3,Fav,fav_win,3


In [116]:
# df_main_asps.approach.value_counts(dropna=False)

In [117]:
# df_main_asps.fs_asps_wt_pls.value_counts(dropna=False)

In [118]:
# df_main_asps.fs_asps_wt_pls.unique()

In [119]:
# Add in house and out house aspects
# Fortunre and Spirit only antises state in houses
# Only Moon without her antis with houses cuspide

In [120]:
# Create file for ml  #####################################################################################################################################################

# df_group       = df_main_asps.groupby('game_id').agg({'fs_asps_wt_pls': list})
# df_expanded    = df_group['fs_asps_wt_pls'].apply(pd.Series).add_prefix('Value_').reset_index()
# df_ml_game_ids = df_main_asps.drop_duplicates(subset='game_id', keep='first')
# print('shape -', df_ml_game_ids.shape[0])
# df_ml_game_ids = df_ml_game_ids.merge(df_expanded, how='left', left_on='game_id', right_on='game_id')
# print('shape -', df_ml_game_ids.shape[0])

In [121]:
# df_ml_game_ids.head()

In [122]:
# file = open('pickle_files/dl_ml/df_ml_game_ids_top_ligs_fav_coef_05_from_05_10_2023_to_03_11_2013_second_remove', 'wb')
# pickle.dump(df_ml_game_ids, file) 
# file.close()

In [123]:
# Stop *****************************************

#### Statistic calculate not for ml ######################################################

In [124]:
# Statistic research
win_bets_stat  = StatDataPrepare.fav_win_bets_statistic_coeff
drow_bets_stat = StatDataPrepare.fav_drow_bets_statistic_coeff
loss_bets_stat = StatDataPrepare.fav_loss_bets_statistic_coeff

In [125]:
# Statistic research
df_main_asps['win_bets']  = df_main_asps[['host_role', 'result', '1', '2']].apply(lambda x: win_bets_stat(x[0], x[1], x[2], x[3]), axis=1)
df_main_asps['drow_bets'] = df_main_asps[['result', 'X']].apply(lambda x: drow_bets_stat(x[0], x[1]), axis=1)
df_main_asps['loss_bets'] = df_main_asps[['host_role', 'result', '1', '2']].apply(lambda x: loss_bets_stat(x[0], x[1], x[2], x[3]), axis=1)

In [126]:
# calculate for each aspect's group - if mean daily value 'fav_win' more than mean all values in dataframe = 'fav_win', aslo for 'fav_drow' and 'fav_loss' .....

In [127]:
# Statistic research
fav_win_mean  = df_unique_game_id.shape[0] / df_unique_game_id.result.value_counts().fav_win
fav_drow_mean = df_unique_game_id.shape[0] / df_unique_game_id.result.value_counts().fav_drow
fav_loss_mean = df_unique_game_id.shape[0] / df_unique_game_id.result.value_counts().fav_loss

print('fav_win_mean  - ', fav_win_mean)
print('fav_drow_mean - ', fav_drow_mean)
print('fav_loss_mean - ', fav_loss_mean)

fav_win_mean  -  2.2680797905138657
fav_drow_mean -  3.9381943575411227
fav_loss_mean -  3.2768091629548217


In [128]:
# Statistic research
fav_win_home_mean  = df_unique_game_id[df_unique_game_id.host_role == 'Fav'].shape[0] / df_unique_game_id[df_unique_game_id.host_role == 'Fav'].result.value_counts().fav_win
fav_drow_home_mean = df_unique_game_id[df_unique_game_id.host_role == 'Fav'].shape[0] / df_unique_game_id[df_unique_game_id.host_role == 'Fav'].result.value_counts().fav_drow
fav_loss_home_mean = df_unique_game_id[df_unique_game_id.host_role == 'Fav'].shape[0] / df_unique_game_id[df_unique_game_id.host_role == 'Fav'].result.value_counts().fav_loss

print('fav_win_home_mean  - ', fav_win_home_mean)
print('fav_drow_home_mean - ', fav_drow_home_mean)
print('fav_loss_home_mean - ', fav_loss_home_mean)

fav_win_home_mean  -  2.2680797905138657
fav_drow_home_mean -  3.9381943575411227
fav_loss_home_mean -  3.2768091629548217


In [129]:
# Statistic research
# fav_win_away_mean  = df_unique_game_id[df_unique_game_id.host_role == 'Pre'].shape[0] / df_unique_game_id[df_unique_game_id.host_role == 'Pre'].result.value_counts().fav_win
# fav_drow_away_mean = df_unique_game_id[df_unique_game_id.host_role == 'Pre'].shape[0] / df_unique_game_id[df_unique_game_id.host_role == 'Pre'].result.value_counts().fav_drow
# fav_loss_away_mean = df_unique_game_id[df_unique_game_id.host_role == 'Pre'].shape[0] / df_unique_game_id[df_unique_game_id.host_role == 'Pre'].result.value_counts().fav_loss

# print('fav_win_away_mean  - ', fav_win_away_mean)
# print('fav_drow_away_mean - ', fav_drow_away_mean)
# print('fav_loss_away_mean - ', fav_loss_away_mean)

In [130]:
sdp = StatDataPrepare

In [131]:
Stop ******************************************************************

SyntaxError: invalid syntax (3891121879.py, line 1)

#### Frouly aspects and active days only statistic research

In [None]:
# pl_pars_lt = ['Ju', 'St', 'Sn', 'PF', 'PS', 'PC', 'PR', 'PG']

# pl_pars_asps_lt = list(df_main_asps[['f_point', 'type', 's_point']].apply(lambda x: x[0] +'_'+ x[1] +'_'+ x[2] if x[0] in pl_pars_lt and x[2] in pl_pars_lt else None, axis=1).dropna().unique())
# print('len -', len(pl_pars_asps_lt))
# pl_pars_asps_lt[:3]

In [None]:
# sr_pl_pars_asps = df_main_asps[['f_point', 'type', 's_point']].apply(lambda x: x[0] +'_'+ x[1] +'_'+ x[2] if x[0] in pl_pars_lt and x[2] in pl_pars_lt else None, axis=1).dropna().to_frame()

In [None]:
# sr_pl_pars_asps.rename(columns={0:'sr_pl_pars_asps'}, inplace=True)

In [None]:
# df_main_asps = df_main_asps.merge(sr_pl_pars_asps, how='left', left_index=True, right_index=True).fillna(0)

In [None]:
# df_main_asps.fs_asps_wt_pls = df_main_asps[['fs_asps_wt_pls', 'sr_pl_pars_asps']].apply(lambda x: x[0] if x[1] == 0 else x[1], axis=1)

In [None]:
# df_main_asps.drop(columns='sr_pl_pars_asps', inplace=True)

In [None]:
# parses_lt    = ['PF', 'PS', 'PC', 'PR', 'PG']
# parses_asps_lt = list(df_main_asps.fs_asps_wt_pls.map(lambda x: x if x.split('_')[0] in parses_lt and x.split('_')[2] in parses_lt else None).dropna().unique())
# print('len unique -', len(parses_asps_lt))
# str(parses_asps_lt[:10])

In [None]:
# tr_planets_lt      = ['Pl', 'Np', 'Ur', 'Ch']
# tr_planets_asps_lt = list(df_main_asps.fs_asps_wt_pls.map(lambda x: x if x.split('_')[0] in parses_lt + tr_planets_lt and x.split('_')[2] in parses_lt + tr_planets_lt else None).dropna().unique())
# print('len unique -', len(tr_planets_asps_lt))
# tr_planets_asps_lt = [asp for asp in tr_planets_asps_lt if asp not in parses_asps_lt]
# print('len unique -', len(tr_planets_asps_lt))
# str(tr_planets_asps_lt[:10])

In [None]:
# houses_asps_lt = list(df_main_asps.fs_asps_wt_pls.map(lambda x: x if x.split('_')[0].startswith('H') and x.split('_')[2] in parses_lt else None).dropna().unique())
# print('len unique -', len(houses_asps_lt))
# str(houses_asps_lt[:10])

In [None]:
# str(df_main_asps.fs_asps_wt_pls.unique()) 

In [132]:
book_asps_lt = ['H7_c_Mn', 'Mn_i_V1', 'Mn_i_V10', 'H10_c_AV1', 'H7_c_APl', 'H10_c_AV1', 'H7_c_AMn', 'Mn_t_APF', 'H10_c_AV10', 'H1_c_AV7', 'Mn_q_APF', 'H7_c_Pl', 'Mn_t_PF', 'DPF_o_APF', 'Mn_t_V7', 
                'H10_c_AV4', 'Mn_i_V4', 'Mn_q_AV7', 'PF_o_Pl', 'SN_c_APF', 'H10_c_V7', 'V1_o_APF', 'Mn_i_APF', 'Mn_q_V1', 'H10_c_V1', 'Ur_o_APF', 'H1_c_Pl', 'Mn_t_AV10', 'Mn_i_PF', 'PF_c_Ur', 
                'Mn_t_AV7', 'H1_c_Mn', 'Np_o_APF', 'H10_c_APl',  'Mn_t_AV1', 'H10_c_St', 'H10_c_Pl', 'PF_o_V7', 'H1_c_AV1', 'V1_c_APF', 'Mn_c_AV7', 'H1_c_Np', 'Mn_o_APF', 'H1_c_AV4', 'Pl_o_APF', 
                'Mn_q_PF', 'H7_c_APF', 'V7_o_APF', 'Mn_o_PF', 'Mn_q_V10', 'H10_c_Mn', 'Mn_c_AV4', 'Mn_i_AV7', 'H1_c_APF', 'Mn_c_AV10', 'Ur_c_APF', 'Mn_c_AV1', 'Mn_i_AV1', 'H1_c_V10', 'PF_o_Ur', 'Mn_q_V7',
                'PF_c_SN', 'DPF_c_APF', 'H7_c_V10', 'Mn_i_AV4', 'H10_c_AMn', 'H4_c_V7', 'H10_c_ANp', 'H7_c_ANp', 'Np_c_APF', 'Mn_q_AV4', 'Mn_q_AV1', 'Mn_t_V1', 'PF_c_V1', 'PF_c_NN', 'H4_c_AV1', 'PF_c_V7', 
                'Mn_c_APF', 'Mn_i_AV10', 'H4_c_AMn','Mn_o_V4', 'Mn_o_V1', 'H10_c_Ur', 'PF_c_Pl', 'Mn_o_V7', 'Mn_q_AV10', 'H7_c_V4', 'H7_c_V1', 'H4_c_Mn', 'Mn_t_AV4', 'H4_c_Np', 'H1_c_St', 'Sn_c_APF', 
                'H1_c_AMn', 'Mn_i_V7', 'Mn_o_AV4', 'Mn_t_V4', 'NN_c_APF', 'Pl_c_APF', 'V7_c_APF', 'PF_c_Np', 'Mn_q_V4', 'Mn_o_AV7', 'Mn_o_AV10', 'H7_c_AV1', 'Mn_o_AV1', 'PF_c_Sn', 'H4_c_ANp', 'H7_c_AV7',
                'PF_o_V1', 'PF_o_Sn', 'H1_c_V4', 'H4_c_V1', 'Mn_c_V1', 'Mn_c_V7', 'Mn_c_V4', 'H1_c_V1', 'Sn_o_APF', 'St_o_APF', 'H10_c_V4', 'PF_o_St', 'PF_c_DPF', 'H7_c_Np', 'Mn_t_V10', 'H1_c_ANp', 
                'PF_o_DPF', 'H10_c_AV7', 'AUr_c_PF', 'PF_c_St', 'H4_c_St', 'Mn_c_V10', 'H7_c_St', 'H1_c_V7', 'H4_c_Pl', 'H10_c_V10', 'H4_c_AV4', 'H1_c_APl', 'H7_c_V7', 'H7_c_AV10', 'Mn_c_PF', 'AUr_o_PF', 
                'PF_c_ANN', 'ANp_o_PF', 'PF_c_ASN', 'APl_c_PF', 'APl_o_PF', 'St_c_APF', 'ANp_c_PF']

# book_asps_lt = pl_pars_asps_lt + parses_asps_lt + tr_planets_asps_lt + houses_asps_lt 
# print('len -', len(book_asps_lt))
# str(book_asps_lt[:10])

# Remove 'DPF_o_PF' aspect with meeting only one time and exist similar 'PF_o_DPF' aspect.
# 'PF', 'PS','PC', 'PR', 'PG'

In [133]:
print('shape - ', df_main_asps.shape[0])
df_book_asps = df_main_asps[df_main_asps.fs_asps_wt_pls.isin(book_asps_lt)].copy()
df_book_asps['fs_asps_wt_pls_cp'] = df_book_asps.fs_asps_wt_pls
df_book_asps.set_index(keys=['game_id', 'fs_asps_wt_pls'], inplace=True)
print('shape - ', df_book_asps.shape[0])

shape -  2651273
shape -  626971


In [134]:
df_book_asps.index[:3]

MultiIndex([('10906805',  'H7_c_Mn'),
            ('10934817', 'Mn_i_V10'),
            ('10906804', 'Mn_t_APF')],
           names=['game_id', 'fs_asps_wt_pls'])

In [135]:
one_degree_asps = ['ANp_c_PF', 'ANp_o_PF', 'APl_c_PF', 'APl_o_PF', 'AUr_c_PF', 'AUr_o_PF', 'H10_c_ANp', 'H10_c_APl', 'H10_c_Pl', 'H10_c_Ur', 'H1_c_ANp', 'H1_c_APl', 'H1_c_Np', 'H1_c_Pl', 'H4_c_ANp',
                   'H4_c_Np', 'H4_c_Pl', 'H7_c_ANp', 'H7_c_APl', 'H7_c_Np', 'H7_c_Pl',  'Np_c_APF', 'PF_c_Np', 'PF_c_Pl', 'PF_c_Ur', 'PF_o_Pl', 'PF_o_Ur', 'Pl_c_APF', 'Pl_o_APF', 'Ur_c_APF', 'Ur_o_APF']              
# one_degree_asps = tr_planets_asps_lt
# one_degree_asps[:3]

In [136]:
sr_book_asps = df_book_asps[['fs_asps_wt_pls_cp', 'tr_orb']].apply(lambda x: x[0] if x[0] not in one_degree_asps else x[0] if x[1] <= 1.02 else None, axis=1).dropna()
print('shape -', sr_book_asps.shape[0])
sr_book_asps.head()

shape - 539112


game_id   fs_asps_wt_pls
10906805  H7_c_Mn            H7_c_Mn
10934817  Mn_i_V10          Mn_i_V10
10906804  Mn_t_APF          Mn_t_APF
          Mn_i_V10          Mn_i_V10
10906805  Mn_i_V1            Mn_i_V1
dtype: object

In [137]:
print('shape - ', df_book_asps.shape[0])
df_book_asps = df_book_asps[(df_book_asps.index.isin(sr_book_asps.index))].copy()
print('shape - ', df_book_asps.shape[0])

shape -  626971
shape -  539112


In [138]:
two_degree_asps = ['H10_c_St', 'H1_c_APF', 'H1_c_St', 'H4_c_St', 'H7_c_APF', 'H7_c_St', 'NN_c_APF', 'Np_o_APF', 'PF_c_ANN', 'PF_c_ASN', 'PF_c_NN', 'PF_c_SN', 'PF_c_Sn', 'PF_c_St', 'PF_o_Sn', 'PF_o_St',
                   'SN_c_APF', 'Sn_c_APF', 'Sn_o_APF', 'St_c_APF', 'St_o_APF', 'H10_c_AMn', 'H10_c_Mn', 'H1_c_AMn', 'H1_c_Mn', 'H4_c_AMn', 'H4_c_Mn', 'H7_c_AMn', 'H7_c_Mn']

# two_degree_asps = pl_pars_asps_lt + parses_asps_lt + houses_asps_lt
# two_degree_asps[:3]

In [139]:
sr_book_asps_two = df_book_asps[['fs_asps_wt_pls_cp', 'tr_orb']].apply(lambda x: x[0] if x[0] not in two_degree_asps else x[0] if x[1] <= 2.03 else None, axis=1).dropna()
print('shape -', sr_book_asps_two.shape[0])
sr_book_asps_two.head()

shape - 513927


game_id   fs_asps_wt_pls
10906805  H7_c_Mn            H7_c_Mn
10934817  Mn_i_V10          Mn_i_V10
10906804  Mn_t_APF          Mn_t_APF
          Mn_i_V10          Mn_i_V10
10906805  Mn_i_V1            Mn_i_V1
dtype: object

In [140]:
print('shape - ', df_book_asps.shape[0])
df_book_asps = df_book_asps[(df_book_asps.index.isin(sr_book_asps_two.index))].copy()
print('shape - ', df_book_asps.shape[0])

shape -  539112
shape -  513927


In [141]:
in_out_asps = ['H10_c_AV1', 'H10_c_AV1', 'H10_c_AV10', 'H10_c_AV4', 'H10_c_AV7', 'H10_c_V1', 'H10_c_V10', 'H10_c_V4', 'H10_c_V7', 'H1_c_AV1', 'H1_c_AV4', 'H1_c_AV7', 'H1_c_V1', 'H1_c_V10', 
               'H1_c_V4', 'H1_c_V7', 'H4_c_AV1', 'H4_c_AV4', 'H4_c_V1', 'H4_c_V7', 'H7_c_AV1', 'H7_c_AV10', 'H7_c_AV7', 'H7_c_V1', 'H7_c_V10', 'H7_c_V4', 'H7_c_V7']

# Add 'in' and 'out' to each aspekts(approach):
# out_stat         1681
# in_stat          1627
# in_stat_weak      282
# out_stat_weak     257

In [142]:
df_book_asps.fs_asps_wt_pls_cp = df_book_asps[['fs_asps_wt_pls_cp', 'approach']].apply(lambda x: x[0] if x[0] not in in_out_asps else x[0] +'_'+ x[1].split('_')[0] , axis=1)

In [143]:
df_book_asps.fs_asps_wt_pls_cp.value_counts()[19:21]

Mn_q_V10    5163
Mn_c_V1     5079
Name: fs_asps_wt_pls_cp, dtype: int64

In [144]:
df_book_asps[['fs_asps_wt_pls_cp', 'approach']].apply(lambda x: x[1] if x[0].startswith('Mn') else None, axis=1).dropna().value_counts()
# Add 'cl' to each aspekts(approach) and save only them:
# 'moon_conv_clear_compl'
# 'moon_conv_compl'
# 'moon_conv_compl_weak'

moon_conv_clear_compl            97461
moon_conv_compl_weak             61849
moon_diver                       44645
moon_conv                        42546
moon_conv_compl                  20051
moon_conv_denide                 19709
moon_diver_weak                   3440
moon_moon_conv_denide_denide       942
moon_diver_denide                  833
moon_moon_diver_denide_denide      207
dtype: int64

In [145]:
compl_lt          = ['moon_conv_clear_compl', 'moon_conv_compl_weak', 'moon_conv_compl']
sr_book_asps_moon = df_book_asps[['fs_asps_wt_pls_cp', 'approach']].apply(lambda x: x[0] if not x[0].startswith('Mn') else x[0] + '_cl' if x[1] in compl_lt else None, axis=1).dropna()
print('shape -', sr_book_asps_moon.shape[0])

shape - 401605


In [146]:
print('shape - ', df_book_asps.shape[0])
df_book_asps = df_book_asps[(df_book_asps.index.isin(sr_book_asps_moon.index))].copy()
df_book_asps.fs_asps_wt_pls_cp = sr_book_asps_moon
print('shape - ', df_book_asps.shape[0])

shape -  513927
shape -  401605


In [147]:
df_book_asps.fs_asps_wt_pls_cp.value_counts(dropna=False)[:3]

Mn_o_PF_cl     9419
Mn_t_PF_cl     8789
Mn_i_APF_cl    7522
Name: fs_asps_wt_pls_cp, dtype: int64

In [148]:
df_book_asps[df_book_asps.fs_asps_wt_pls_cp == 'PF_c_DPF'].approach.value_counts()
# Add 'cl' to each aspekts(approach) and save only them:
# 'conv_compl'

conv_compl      1486
diver_weak      1445
diver_denide     561
conv_denide      496
Name: approach, dtype: int64

In [149]:
pf_compl_lt = ['PF_c_DPF', 'PF_o_DPF'] 
sr_book_asps_pf = df_book_asps[['fs_asps_wt_pls_cp', 'approach']].apply(lambda x: x[0] if x[0] not in pf_compl_lt else x[0] + '_cl' if x[1] == 'conv_compl'  else None, axis=1).dropna()
print('shape -', sr_book_asps_pf.shape[0])
# sr_book_asps_pf.value_counts()
# PF_c_DPF_cl     1779

shape - 397233


In [150]:
print('shape - ', df_book_asps.shape[0])
df_book_asps = df_book_asps[(df_book_asps.index.isin(sr_book_asps_pf.index))].copy()
df_book_asps.fs_asps_wt_pls_cp = sr_book_asps_pf
print('shape - ', df_book_asps.shape[0])

shape -  401605
shape -  397233


In [151]:
df_book_asps[df_book_asps.fs_asps_wt_pls_cp == 'DPF_c_APF'].approach.value_counts()
# Add 'cl' to each aspekts(approach) and save only them:
# 'conv'

conv           1416
conv_weak       202
conv_denide      83
Name: approach, dtype: int64

In [152]:
dpf_compl_lt = ['DPF_o_APF', 'DPF_c_APF'] 
sr_book_asps_dpf = df_book_asps[['fs_asps_wt_pls_cp', 'approach']].apply(lambda x: x[0] if x[0] not in dpf_compl_lt else x[0] + '_cl' if x[1] == 'conv'  else None, axis=1).dropna()
print('shape -', sr_book_asps_dpf.shape[0])
# sr_book_asps_dpf.value_counts()
# DPF_c_APF_cl     1412

shape - 396781


In [153]:
print('shape - ', df_book_asps.shape[0])
df_book_asps = df_book_asps[(df_book_asps.index.isin(sr_book_asps_dpf.index))].copy()
df_book_asps.fs_asps_wt_pls_cp = sr_book_asps_dpf
print('shape - ', df_book_asps.shape[0])

shape -  397233
shape -  396781


In [154]:
df_book_asps[df_book_asps.fs_asps_wt_pls_cp == 'PF_o_V1'].approach.value_counts()
# Add 'cl' to each aspekts(approach) and save only them:
# 'conv' and 'conv_compl'

conv            1556
conv_compl       386
diver_weak       312
conv_weak        231
conv_denide      188
diver_denide     167
Name: approach, dtype: int64

In [155]:
pf_vld_compl_lt = ['PF_c_V1', 'PF_o_V1', 'PF_c_V7', 'PF_o_V7', 'V1_c_APF', 'V1_o_APF', 'V7_c_APF', 'V7_o_APF'] 
sr_book_asps_pf_vld = df_book_asps[['fs_asps_wt_pls_cp', 'approach']].apply(lambda x: x[0] if x[0] not in pf_vld_compl_lt else x[0] + '_cl' if x[1] in ['conv', 'conv_compl']  else None, axis=1).dropna()
print('shape -', sr_book_asps_pf_vld.shape[0])
# sr_book_asps_pf_vld.value_counts()
# PF_o_V1_cl        1656

shape - 392150


In [156]:
print('shape - ', df_book_asps.shape[0])
df_book_asps = df_book_asps[(df_book_asps.index.isin(sr_book_asps_pf_vld.index))].copy()
df_book_asps.fs_asps_wt_pls_cp = sr_book_asps_dpf
print('shape - ', df_book_asps.shape[0])

shape -  396781
shape -  392150


In [157]:
fav_asps_lt = ['DPF_c_APF_cl', 'H10_c_AMn', 'H10_c_AV4_in', 'H10_c_AV7_in', 'H10_c_Mn', 'H10_c_Ur', 
               'H10_c_V4_in', 'H10_c_V7_in', 'H1_c_AMn', 'H7_c_APl', 'H1_c_APF',  'H1_c_AV4_in', 'H1_c_AV7_in', 'H1_c_Mn', 
               'H1_c_V4_in', 'H1_c_V7_in', 'H4_c_AV1_out', 'H4_c_Pl', 'H4_c_St', 'H4_c_V1_out', 'H7_c_AV10_out', 'H7_c_AV1_out', 'H7_c_Pl', 'H7_c_St', 'H7_c_V10_out',
               'H7_c_V1_out', 'Mn_c_APF_cl', 'Mn_c_AV10_cl', 'Mn_c_AV1_cl', 'Mn_c_PF_cl', 'Mn_c_V10_cl', 'Mn_c_V1_cl', 'Mn_i_APF_cl', 'Mn_i_AV10_cl', 'Mn_i_AV1_cl', 'Mn_i_PF_cl', 'Mn_i_V10_cl', 
               'Mn_i_V1_cl', 'Mn_o_AV10_cl', 'Mn_o_AV1_cl', 'Mn_o_V1_cl', 'Mn_q_AV10_cl', 'Mn_q_AV1_cl', 'Mn_q_V10_cl', 'Mn_q_V1_cl', 'Mn_t_APF_cl', 'Mn_t_AV10_cl', 'Mn_t_AV1_cl', 'Mn_t_PF_cl', 
               'Mn_t_V10_cl', 'Mn_t_V1_cl', 'NN_c_APF', 'PF_c_DPF_cl', 'PF_c_NN', 'PF_c_Ur', 'PF_c_V1_cl', 'V1_c_APF_cl', 'V7_o_APF_cl']

pre_asps_lt = ['DPF_o_APF_cl', 'H10_c_APl', 'H10_c_AV4_out', 'H10_c_AV7_out', 'H10_c_Pl', 'H10_c_St', 'H10_c_V4_out', 'H10_c_V7_out', 'H1_c_APl', 'H1_c_AV4_out', 'H1_c_AV7_out', 'H1_c_Pl', 'H1_c_St',
               'H1_c_V1_out', 'H1_c_V4_out', 'H1_c_V7_out', 'H4_c_AMn', 'H4_c_AV1_in', 'H4_c_Mn', 'H4_c_V1_in',  'H7_c_AMn', 'H7_c_APF', 
               'H7_c_AV10_in', 'H7_c_AV1_in',  'H7_c_Mn', 'H7_c_V10_in', 'H7_c_V1_in', 'Mn_c_AV4_cl', 'Mn_c_AV7_cl',
               'Mn_c_V4_cl', 'Mn_c_V7_cl', 'Mn_i_AV4_cl', 'Mn_i_AV7_cl', 'Mn_i_V4_cl', 'Mn_i_V7_cl', 'Mn_o_APF_cl', 'Mn_o_AV4_cl', 'Mn_o_AV7_cl', 'Mn_o_PF_cl', 'Mn_o_V4_cl', 'Mn_o_V7_cl', 'Mn_q_APF_cl',
               'Mn_q_AV4_cl', 'Mn_q_AV7_cl', 'Mn_q_PF_cl', 'Mn_q_V4_cl', 'Mn_q_V7_cl', 'Mn_t_AV4_cl', 'Mn_t_AV7_cl', 'Mn_t_V4_cl', 'Mn_t_V7_cl', 'PF_c_Pl', 'PF_c_SN', 'PF_c_Sn', 'PF_c_St', 'PF_c_V7_cl',
               'PF_o_DPF_cl', 'PF_o_Pl', 'PF_o_Sn', 'PF_o_St', 'PF_o_Ur', 'PF_o_V1_cl', 'PF_o_V7_cl', 'Pl_c_APF', 'Pl_o_APF', 'SN_c_APF', 'Sn_c_APF', 'Sn_o_APF', 'St_c_APF', 'St_o_APF', 'V1_o_APF_cl', 
               'V7_c_APF_cl']
# Not using
# 'Ur_c_APF', 'Ur_o_APF', 'H10_c_ANp', 'H10_c_Np', 'H1_c_ANp', 'H1_c_Np', 'H4_c_ANp', 'H4_c_Np', 'H7_c_ANp', 'H7_c_Np', 'Np_c_APF', 'Np_o_APF', 'Np_c_PF', 'Np_o_PF', 'PF_c_Np'
# Fav - 'H10_c_AV10_in', 'H10_c_AV10_out', 'H10_c_AV1_in', 'H10_c_AV1_out', 'H10_c_V10_in', 'H10_c_V10_out', 'H10_c_V1_in', 'H10_c_V1_out', 'H1_c_AV1_in', 'H1_c_AV1_out', 'H1_c_V10_in', 
# 'H1_c_V10_out', 'H1_c_V1_in', 
# Pre - 'H4_c_AV4_in', 'H4_c_AV4_out', 'H4_c_V7_in', 'H4_c_V7_out', 'H7_c_AV7_in', 'H7_c_AV7_out', 'H7_c_V4_in', 'H7_c_V4_out', 'H7_c_V7_in', 'H7_c_V7_out',

print('len fav_asps_lt -', len(fav_asps_lt))
print('len pre_asps_lt -', len(pre_asps_lt))

len fav_asps_lt - 58
len pre_asps_lt - 72


In [158]:
print('shape - ', df_book_asps.shape[0])
df_book_asps = df_book_asps[(df_book_asps.fs_asps_wt_pls_cp.isin(fav_asps_lt)) | (df_book_asps.fs_asps_wt_pls_cp.isin(pre_asps_lt))].copy()
print('shape - ', df_book_asps.shape[0])

shape -  392150
shape -  305727


In [159]:
df_book_asps.fs_asps_wt_pls_cp.value_counts(dropna=False)[:5]

Mn_o_PF_cl     9419
Mn_t_PF_cl     8789
Mn_i_APF_cl    7522
Mn_q_APF_cl    7311
Mn_t_APF_cl    7300
Name: fs_asps_wt_pls_cp, dtype: int64

In [160]:
df_book_asps['fav_asps'] = df_book_asps.fs_asps_wt_pls_cp.map(lambda x: x if x in fav_asps_lt else None)
df_book_asps['pre_asps'] = df_book_asps.fs_asps_wt_pls_cp.map(lambda x: x if x in pre_asps_lt else None)

In [161]:
df_book_asps.fav_asps.value_counts(dropna=False)[:5]

None           158352
Mn_t_PF_cl       8789
Mn_i_APF_cl      7522
Mn_t_APF_cl      7300
Mn_i_V1_cl       6589
Name: fav_asps, dtype: int64

In [162]:
df_book_asps.pre_asps.value_counts(dropna=False)[:5]

None           147375
Mn_o_PF_cl       9419
Mn_q_APF_cl      7311
Mn_t_V7_cl       6430
Mn_i_V7_cl       6345
Name: pre_asps, dtype: int64

In [163]:
# Only some liague ################################################################################################# Ligue #########################################################################################
print('shape - ', df_book_asps.shape[0])
df_book_asps = df_book_asps[df_book_asps.ligue.isin(['top'])].copy() # 'sec', 'thr'
print('shape - ', df_book_asps.shape[0])

shape -  305727
shape -  80917


In [164]:
# Only some countries
# print('shape - ', df_book_asps.shape[0])
# df_book_asps = df_book_asps[df_book_asps.country.isin(['United Kingdom', 'Spain', 'France', 'Germany', 'Italy',])]# 'Turkey', 'Romania', 'Portugal', 'Denmark', ])].copy() # 'sec', 'thr'
# print('shape - ', df_book_asps.shape[0])

In [165]:
# df_book_asps.country.value_counts(dropna=False)
# Brazil            629
# Romania           340
# Spain             336
# United Kingdom    332
# Portugal          280
# France            227
# Germany           209
# Belarus           193
# Russia            167
# Turkey            162
# Denmark           139
# Greece            133
# Italy             117
# Poland            115
# Uruguay           109
# Sweden            105
# Croatia            96
# Finland            94
# Serbia             91
# Belgium            80
# Czech Republic     71
# Netherlands        65
# Ukraine            58
# Hungary            50
# Switzerland        39
# Austria            22
# Brazilia           17
# England            15
# Bulgaria           12
# Czechia             7
# Scotland            6
# Monaco              4
# Liechtenstein       3
# Wales               2

In [166]:
df_book_asps.game_utc.map(lambda x: x.day_name()[:3]).value_counts() 

Sat    31281
Sun    25911
Fri     8821
Mon     4918
Wed     4884
Tue     3026
Thu     2076
Name: game_utc, dtype: int64

In [167]:
# Only active days games time
df_book_asps['day_name'] = df_book_asps.game_utc.map(lambda x: x.day_name()[:3])
print('shape - ', df_book_asps.shape[0])
df_book_asps = df_book_asps[~df_book_asps.day_name.isin(['Sat', 'Sun'])].copy()
print('shape - ', df_book_asps.shape[0])

shape -  80917
shape -  23725


In [168]:
df_book_asps.reset_index(inplace=True)
df_book_asps.drop(columns='fs_asps_wt_pls', inplace=True)
df_book_asps.rename(columns={'fs_asps_wt_pls_cp':'fs_asps_wt_pls'}, inplace=True)

In [169]:
def bets_description(win_bets: float, drow_bets: float, loss_bets: float):
    if win_bets != 0:
        return 'win_' + str(win_bets) 
    elif drow_bets != 0:
        return 'drow_' + str(drow_bets)
    else:
        return 'loss_' + str(loss_bets)
    
df_book_asps['bets_desc'] = df_book_asps[['win_bets', 'drow_bets', 'loss_bets']].apply(lambda x: bets_description(x[0], x[1], x[2]), axis=1)  

In [170]:
fs_asps_wt_pls_lt  = df_book_asps.groupby('game_id').fs_asps_wt_pls.apply(tuple)
fav_asps_wt_pls_lt = df_book_asps.groupby('game_id').fav_asps.apply(tuple)
pre_asps_wt_pls_lt = df_book_asps.groupby('game_id').pre_asps.apply(tuple)

In [171]:
df_book_asps.drop(columns=['fs_asps_wt_pls', 'fav_asps', 'pre_asps'], inplace=True)

In [172]:
df_book_asps = df_book_asps.merge(fs_asps_wt_pls_lt,  how='left', left_on='game_id', right_index=True)
df_book_asps = df_book_asps.merge(fav_asps_wt_pls_lt, how='left', left_on='game_id', right_index=True)
df_book_asps = df_book_asps.merge(pre_asps_wt_pls_lt, how='left', left_on='game_id', right_index=True)

In [173]:
df_book_asps.rename(columns={'fs_asps_wt_pls':'fs_asps_wt_pls_lt', 'fav_asps':'fav_asps_lt', 'pre_asps':'pre_asps_lt'}, inplace=True)

In [174]:
df_book_asps.drop_duplicates(subset='game_id', inplace=True)

In [175]:
df_book_asps.fav_asps_lt = df_book_asps.fav_asps_lt.map(lambda x: [var for var in x if var != None])
df_book_asps.pre_asps_lt = df_book_asps.pre_asps_lt.map(lambda x: [var for var in x if var != None])

In [176]:
df_book_asps.fav_asps_lt.value_counts(dropna=False).head(3)

[]              6136
[Mn_q_V1_cl]     383
[Mn_t_PF_cl]     359
Name: fav_asps_lt, dtype: int64

In [177]:
df_book_asps['fav_cnt_asps'] = df_book_asps.fav_asps_lt.map(lambda x: len(x))
df_book_asps['pre_cnt_asps'] = df_book_asps.pre_asps_lt.map(lambda x: len(x))

In [178]:
def aspects_goals_concatinate(host_role: str, fav_cnt_asps: int, pre_cnt_asps: int):
    if host_role == 'Fav':
        return str(fav_cnt_asps) +':'+ str(pre_cnt_asps)
    else:
        return str(pre_cnt_asps) +':'+ str(fav_cnt_asps)

df_book_asps['asps_goals'] = df_book_asps[['host_role', 'fav_cnt_asps', 'pre_cnt_asps']].apply(lambda x: aspects_goals_concatinate(x[0], x[1], x[2]), axis=1)

In [179]:
# Only one aspect
df_book_asps['asps_cnt'] = df_book_asps.fs_asps_wt_pls_lt.map(lambda x: len(x))
print('shape - ', df_book_asps.shape[0])
df_book_asps = df_book_asps[df_book_asps.asps_cnt == 1].copy()
print('shape - ', df_book_asps.shape[0])

shape -  15384
shape -  9104


In [180]:
# Only unique date-time
print('shape - ', df_book_asps.shape[0])
df_book_asps.drop_duplicates(subset='game_utc', keep=False, inplace=True)
print('shape - ', df_book_asps.shape[0])

shape -  9104
shape -  4107


In [181]:
# Create df for check
df_check = df_book_asps[df_book_asps.game_utc >= '2022.10'].copy()
# df_check = df_book_asps[(df_book_asps.game_utc >= '2020.11') & (df_book_asps.game_utc < '2021.04')].copy()
print('df_check shape - ', df_check.shape[0])
print('min date', min(df_check.game_utc))
print('max date', max(df_check.game_utc))

df_check shape -  585
min date 2022-10-03 16:00:00
max date 2023-10-05 23:00:00


In [182]:
# df_book_asps = df_book_asps[~df_book_asps.game_id.isin(df_check.game_id)].copy()
df_book_asps = df_book_asps[(~df_book_asps.game_id.isin(df_check.game_id)) & (df_book_asps.game_utc < '2020.11')].copy()
print('df_book_asps shape - ', df_book_asps.shape[0])
print('min date', min(df_book_asps.game_utc))
print('max date', max(df_book_asps.game_utc))

df_book_asps shape -  2971
min date 2013-11-07 21:30:00
max date 2020-10-30 19:00:00


In [183]:
df_book_asps.reset_index(drop=True, inplace=True)

In [184]:
df_book_asps.tail(1)

Unnamed: 0,game_id,ligue_header,season_id,game_utc,goals,ligue,actual_city,actual_country,f_team,s_team,pos_f,pos_s,pts_f,pts_s,1,X,2,un_2.5,ov_2.5,country,city,lat,lon,f_point,s_point,type,approach,sing,tr_orb,bp_asp,den_point,smain_ch,fmain_ch,f_role,s_role,fs_asps,day_month_yr,cnt_asp_for_day,f_role_wt_pls,s_role_wt_pls,cnt_asp_wt_pls_for_day,month_yr,moon_day,nakshatras,week_day,day,goals_count,host_role,result,fav_goals,win_bets,drow_bets,loss_bets,day_name,bets_desc,fs_asps_wt_pls_lt,fav_asps_lt,pre_asps_lt,fav_cnt_asps,pre_cnt_asps,asps_goals,asps_cnt
2970,1442616,Premier League,306,2020-10-30 19:00:00,2:0,top,Wolverhampton,United Kingdom,Wolverhampton,Crystal Palace,13,14,45,44,1.98,3.26,4.56,1.56,2.5,United Kingdom,Wolverhampton,52n35,2w7,H10,ASn,c,in_stat,equal,2.1,no,(),AV4,,H10,ASn-AV4-in,H10_c_ASn-AV4-in,2020-10-30,9,H10,AV4,12,10.202,13,,4,20,2,Fav,fav_win,2,1.98,0.0,0.0,Fri,win_1.98,"(H10_c_AV4_in,)",[H10_c_AV4_in],[],1,0,1:0,1


In [185]:
win_asps_lt = []
_ = df_book_asps[['result', 'fs_asps_wt_pls_lt']].apply(lambda x: [win_asps_lt.append(asp) for asp in x[1]] if x[0] == 'fav_win' else None, axis=1).dropna()
win_asps_lt[:3]

['Mn_i_V4_cl', 'Mn_o_PF_cl', 'Mn_o_PF_cl']

In [186]:
drow_asps_lt = []
_ = df_book_asps[['result', 'fs_asps_wt_pls_lt']].apply(lambda x: [drow_asps_lt.append(asp) for asp in x[1]] if x[0] == 'fav_drow' else None, axis=1).dropna()
drow_asps_lt[:3]

['Mn_q_AV7_cl', 'Mn_q_AV7_cl', 'H7_c_APl']

In [187]:
loss_asps_lt = []
_ = df_book_asps[['result', 'fs_asps_wt_pls_lt']].apply(lambda x: [loss_asps_lt.append(asp) for asp in x[1]] if x[0] == 'fav_loss' else None, axis=1).dropna()
loss_asps_lt[:3]

['SN_c_APF', 'Mn_q_APF_cl', 'Mn_q_AV7_cl']

In [188]:
sr_win_asps_count  = pd.DataFrame({'count_win': Counter(win_asps_lt).values()}, index=Counter(win_asps_lt).keys()).sort_values(by='count_win', ascending=False)
sr_drow_asps_count = pd.DataFrame({'count_drow': Counter(drow_asps_lt).values()}, index=Counter(drow_asps_lt).keys()).sort_values(by='count_drow', ascending=False)
sr_loss_asps_count = pd.DataFrame({'count_loss': Counter(loss_asps_lt).values()}, index=Counter(loss_asps_lt).keys()).sort_values(by='count_loss', ascending=False)

In [189]:
df_asps_count = pd.merge(sr_win_asps_count, sr_drow_asps_count, how='left', left_index=True, right_index=True)
df_asps_count = pd.merge(df_asps_count, sr_loss_asps_count, how='left', left_index=True, right_index=True)

In [190]:
df_asps_count['count_shape'] = df_asps_count.count_win + df_asps_count.count_drow + df_asps_count.count_loss

In [191]:
mean_win  = df_book_asps.shape[0] / df_book_asps.result.value_counts().fav_win
mean_drow = df_book_asps.shape[0] / df_book_asps.result.value_counts().fav_drow
mean_loss = df_book_asps.shape[0] / df_book_asps.result.value_counts().fav_loss

print('mean_win -', mean_win.round(2), 'mean_drow -', mean_drow.round(2), 'mean_loss -', mean_loss.round(2))

df_asps_count['mean_win_att']  = round(mean_win  - (df_asps_count.count_shape / df_asps_count.count_win), 2)
df_asps_count['mean_drow_att'] = round(mean_drow - (df_asps_count.count_shape / df_asps_count.count_drow), 2)
df_asps_count['mean_loss_att'] = round(mean_loss - (df_asps_count.count_shape / df_asps_count.count_loss), 2)

mean_win - 2.32 mean_drow - 3.7 mean_loss - 3.36


In [192]:
df_asps_count.head()

Unnamed: 0,count_win,count_drow,count_loss,count_shape,mean_win_att,mean_drow_att,mean_loss_att
Mn_o_PF_cl,60,56.0,48.0,164.0,-0.42,0.77,-0.06
Mn_i_V7_cl,41,22.0,25.0,88.0,0.17,-0.3,-0.16
Mn_q_APF_cl,36,20.0,17.0,73.0,0.29,0.05,-0.94
Mn_t_PF_cl,35,26.0,30.0,91.0,-0.28,0.2,0.32
Mn_q_AV7_cl,35,19.0,22.0,76.0,0.15,-0.3,-0.1


In [193]:
df_all_profit_calc = pd.DataFrame()

for asp in df_asps_count.index:
    df_research_asp = df_book_asps[['bets_desc', 'fs_asps_wt_pls_lt']].apply(lambda x: x[0] if any(var for var in x[1] if var == asp) else None, axis=1).dropna()
    
    shape        = df_research_asp.shape[0]
    bet_sum_win  = df_book_asps[df_book_asps.index.isin(df_research_asp.index)].bets_desc.map(lambda x:  float(x.split('_')[1]) if x.split('_')[0] == 'win' else 0).sum()
    bet_sum_drow = df_book_asps[df_book_asps.index.isin(df_research_asp.index)].bets_desc.map(lambda x:  float(x.split('_')[1]) if x.split('_')[0] == 'drow' else 0).sum()
    bet_sum_loss = df_book_asps[df_book_asps.index.isin(df_research_asp.index)].bets_desc.map(lambda x:  float(x.split('_')[1]) if x.split('_')[0] == 'loss' else 0).sum()
    
    diff_win  = bet_sum_win - shape
    diff_drow = bet_sum_drow - shape
    diff_loss = bet_sum_loss - shape
    
    profit_calc_dc = {'aspect':asp, 'bet_sum_win':bet_sum_win, 'bet_sum_drow':bet_sum_drow, 'bet_sum_loss':bet_sum_loss, 'diff_sum_win':diff_win, 'diff_sum_drow':diff_drow, 'diff_sum_loss':diff_loss}
    
    df_profit_calc     = pd.DataFrame.from_dict([profit_calc_dc])
    df_all_profit_calc = pd.concat([df_all_profit_calc, df_profit_calc])
    
df_all_profit_calc.set_index(keys='aspect', inplace=True)

In [194]:
df_all_profit_calc.shape[0]

115

In [195]:
df_asps_count = df_asps_count.merge(df_all_profit_calc, how='left', left_index=True, right_index=True)

In [196]:
df_asps_count.fillna(0, inplace=True)

In [197]:
df_asps_count['win_surplus']  = df_asps_count.mean_win_att  - sum([df_asps_count.mean_drow_att, df_asps_count.mean_loss_att])
df_asps_count['drow_surplus'] = df_asps_count.mean_drow_att - sum([df_asps_count.mean_win_att, df_asps_count.mean_loss_att])
df_asps_count['loss_surplus'] = df_asps_count.mean_loss_att - sum([df_asps_count.mean_win_att, df_asps_count.mean_drow_att])

In [198]:
df_asps_count.sort_values(by='mean_loss_att', ascending=False).head()

Unnamed: 0,count_win,count_drow,count_loss,count_shape,mean_win_att,mean_drow_att,mean_loss_att,bet_sum_win,bet_sum_drow,bet_sum_loss,diff_sum_win,diff_sum_drow,diff_sum_loss,win_surplus,drow_surplus,loss_surplus
Mn_t_V4_cl,8,3.0,15.0,26.0,-0.93,-4.97,1.62,17.81,10.66,42.47,-8.19,-15.34,16.47,2.42,-5.66,7.52
H10_c_Ur,3,2.0,6.0,11.0,-1.35,-1.8,1.52,5.76,7.13,12.58,-5.24,-3.87,1.58,-1.07,-1.97,4.67
H4_c_V1_in,1,1.0,2.0,4.0,-1.68,-0.3,1.36,2.07,3.29,5.22,-1.93,-0.71,1.22,-2.74,0.02,3.34
H7_c_V10_out,4,2.0,6.0,12.0,-0.68,-2.3,1.36,7.76,7.87,16.67,-4.24,-4.13,4.67,0.26,-2.98,4.34
Mn_c_AV4_cl,4,2.0,6.0,12.0,-0.68,-2.3,1.36,8.06,11.57,16.23,-3.94,-0.43,4.23,0.26,-2.98,4.34


In [199]:
# res_ind = df_asps_count[(df_asps_count.mean_win_att > 0.1) & (df_asps_count.diff_sum_win > 1)].index
# res_ind = df_asps_count[(df_asps_count.mean_drow_att > 0.4) & (df_asps_count.diff_sum_drow > 10)].index
res_ind = df_asps_count[(df_asps_count.mean_loss_att > 0.1) & (df_asps_count.diff_sum_loss > 5)].index
print(res_ind[:3])
result = ['win', 'drow', 'loss'][2]
print('result -', result)

Index(['Mn_q_AV1_cl', 'Mn_i_V10_cl', 'Mn_t_AV1_cl'], dtype='object')
result - loss


In [200]:
df_check[df_check.fs_asps_wt_pls_lt.map(lambda x: any(var for var in x if var in res_ind))].shape[0]

86

In [201]:
df_check[df_check.fs_asps_wt_pls_lt.map(lambda x: any(var for var in x if var in res_ind))].bets_desc.map(lambda x:  1 if x.split('_')[0] == result else 0).sum()

24

In [202]:
df_check[df_check.fs_asps_wt_pls_lt.map(lambda x: any(var for var in x if var in res_ind))].bets_desc.map(lambda x:  float(x.split('_')[1]) if x.split('_')[0] == result else 0).sum()

86.91000000000001

In [203]:
df_check.shape[0]

585

In [204]:
df_book_asps.shape[0]

2971

In [205]:
st

NameError: name 'st' is not defined

In [206]:
df_asps_count.columns

Index(['count_win', 'count_drow', 'count_loss', 'count_shape', 'mean_win_att',
       'mean_drow_att', 'mean_loss_att', 'bet_sum_win', 'bet_sum_drow',
       'bet_sum_loss', 'diff_sum_win', 'diff_sum_drow', 'diff_sum_loss',
       'win_surplus', 'drow_surplus', 'loss_surplus'],
      dtype='object')

In [207]:
df_check['asps_win_rate']  = df_check.fs_asps_wt_pls_lt.map(lambda x: sum([df_asps_count[df_asps_count.index == asp].diff_sum_win.values[0] for asp in x if asp in df_asps_count.index]))
df_check['asps_drow_rate'] = df_check.fs_asps_wt_pls_lt.map(lambda x: sum([df_asps_count[df_asps_count.index == asp].diff_sum_drow.values[0] for asp in x if asp in df_asps_count.index]))
df_check['asps_loss_rate'] = df_check.fs_asps_wt_pls_lt.map(lambda x: sum([df_asps_count[df_asps_count.index == asp].diff_sum_loss.values[0] for asp in x if asp in df_asps_count.index]))

In [208]:
# df_book_asps[df_book_asps.goals == df_book_asps.asps_goals].shape[0]

In [209]:
print('fav_win_shape -', df_book_asps[df_book_asps.fav_cnt_asps > df_book_asps.pre_cnt_asps].shape[0])
print('fav_win_count -', df_book_asps[df_book_asps.fav_cnt_asps > df_book_asps.pre_cnt_asps].bets_desc.map(lambda x:  1 if x.split('_')[0] == 'win' else 0).sum())
print('fav_win_bets  -', df_book_asps[df_book_asps.fav_cnt_asps > df_book_asps.pre_cnt_asps].bets_desc.map(lambda x:  float(x.split('_')[1]) if x.split('_')[0] == 'win' else 0).sum())

fav_win_shape - 1425
fav_win_count - 605
fav_win_bets  - 1340.63


In [210]:
cols = ['game_id', 'game_utc', 'goals', 'f_team', 's_team', 'city', 'country', 'bets_desc', 'fs_asps_wt_pls_lt', 'asps_cnt', 'asps_win_rate', 'asps_drow_rate', 'asps_loss_rate', 'result']
df_check[cols].sort_values(by=['asps_loss_rate', 'game_utc'], ascending=False).head(10)

Unnamed: 0,game_id,game_utc,goals,f_team,s_team,city,country,bets_desc,fs_asps_wt_pls_lt,asps_cnt,asps_win_rate,asps_drow_rate,asps_loss_rate,result
23501,1925730,2023-09-18 19:15:00,4:1,Boavista,Chaves,Porto,Portugal,win_1.7,"(H7_c_V1_in,)",1,-12.39,-5.7,17.29,fav_win
23500,1905481,2023-09-18 19:00:00,2:4,Granada CF,Girona,Granada,Spain,loss_2.52,"(H7_c_V1_in,)",1,-12.39,-5.7,17.29,fav_loss
23414,1871382,2023-08-30 22:00:00,2:0,Novorizontino,Ituano,Horizonte,Brazil,win_1.82,"(H7_c_V1_in,)",1,-12.39,-5.7,17.29,fav_win
22053,1753554,2023-03-13 16:00:00,3:0,Istanbulspor,Sivasspor,İstanbul,Turkey,win_2.9,"(H7_c_V1_in,)",1,-12.39,-5.7,17.29,fav_win
22042,1737726,2023-03-10 16:30:00,2:2,Kaiserslautern,Sandhausen,Kaiserslautern,Germany,drow_3.7,"(H7_c_V1_in,)",1,-12.39,-5.7,17.29,fav_drow
21887,1739960,2023-02-17 16:00:00,1:0,Kolubara,Vozdovac,Lazarevac,Serbia,win_2.37,"(H7_c_V1_in,)",1,-12.39,-5.7,17.29,fav_win
21758,1737333,2023-02-01 17:00:00,0:2,Nantes,Marseille,Nantes,France,loss_1.74,"(H7_c_V1_in,)",1,-12.39,-5.7,17.29,fav_loss
21388,1748364,2022-12-07 19:00:00,2:1,Tenerife,Deportivo Alavés,Santa Cruz de Tenerife,Spain,win_2.26,"(H7_c_V1_in,)",1,-12.39,-5.7,17.29,fav_win
21150,1748310,2022-11-02 17:00:00,2:1,Villarreal B,Real Oviedo,Villarreal,Spain,win_2.63,"(H7_c_V1_in,)",1,-12.39,-5.7,17.29,fav_win
22637,1736492,2023-05-04 19:00:00,1:0,Brighton,Manchester United,Brighton,United Kingdom,win_2.34,"(Mn_t_V4_cl,)",1,-8.19,-15.34,16.47,fav_win


In [211]:
df_gb_asps = df_check[cols].sort_values(by=['asps_loss_rate', 'game_utc'], ascending=False).head(200).groupby('fs_asps_wt_pls_lt').result.apply(list).to_frame()

In [212]:
df_gb_asps.head()

Unnamed: 0_level_0,result
fs_asps_wt_pls_lt,Unnamed: 1_level_1
"(DPF_o_APF_cl,)","[fav_loss, fav_loss]"
"(H10_c_AV7_in,)","[fav_win, fav_win, fav_win]"
"(H10_c_AV7_out,)","[fav_win, fav_win, fav_drow, fav_win, fav_win]"
"(H10_c_Ur,)",[fav_drow]
"(H1_c_APF,)","[fav_loss, fav_win, fav_win]"


In [213]:
df_asps_count.sort_values(by='diff_sum_win', ascending=False).head(10)

Unnamed: 0,count_win,count_drow,count_loss,count_shape,mean_win_att,mean_drow_att,mean_loss_att,bet_sum_win,bet_sum_drow,bet_sum_loss,diff_sum_win,diff_sum_drow,diff_sum_loss,win_surplus,drow_surplus,loss_surplus
Mn_t_APF_cl,30,21.0,16.0,67.0,0.08,0.5,-0.83,83.85,74.57,40.16,16.85,7.57,-26.84,0.41,1.25,-1.41
H4_c_AV1_in,7,1.0,1.0,9.0,1.03,-5.3,-5.64,21.79,3.14,3.5,12.79,-5.86,-5.5,11.97,-0.69,-1.37
Mn_c_AV1_cl,15,5.0,5.0,25.0,0.65,-1.3,-1.64,35.24,16.09,13.46,10.24,-8.91,-11.54,3.59,-0.31,-0.99
Mn_i_V10_cl,23,11.0,16.0,50.0,0.14,-0.85,0.23,60.15,39.7,65.52,10.15,-10.3,15.52,0.76,-1.22,0.94
Mn_i_V4_cl,20,5.0,8.0,33.0,0.67,-2.9,-0.77,42.24,16.13,32.33,9.24,-16.87,-0.67,4.34,-2.8,1.46
H4_c_Mn,16,5.0,7.0,28.0,0.57,-1.9,-0.64,36.89,16.88,27.67,8.89,-11.12,-0.33,3.11,-1.83,0.69
Mn_q_APF_cl,36,20.0,17.0,73.0,0.29,0.05,-0.94,80.3,68.36,56.97,7.3,-4.64,-16.03,1.18,0.7,-1.28
Mn_c_AV10_cl,7,2.0,2.0,11.0,0.75,-1.8,-2.14,17.26,6.78,4.04,6.26,-4.22,-6.96,4.69,-0.41,-1.09
PF_o_DPF_cl,4,2.0,0.0,0.0,0.0,0.0,0.0,11.55,7.18,0.0,5.55,1.18,-6.0,0.0,0.0,0.0
Mn_i_AV4_cl,13,4.0,3.0,20.0,0.78,-1.3,-3.31,25.47,13.68,7.7,5.47,-6.32,-12.3,5.39,1.23,-2.79


In [214]:
df_asps_count.sort_values(by='diff_sum_drow', ascending=False).head(10)

Unnamed: 0,count_win,count_drow,count_loss,count_shape,mean_win_att,mean_drow_att,mean_loss_att,bet_sum_win,bet_sum_drow,bet_sum_loss,diff_sum_win,diff_sum_drow,diff_sum_loss,win_surplus,drow_surplus,loss_surplus
Mn_o_PF_cl,60,56.0,48.0,164.0,-0.42,0.77,-0.06,120.07,197.25,140.92,-43.93,33.25,-23.08,-1.13,1.25,-0.41
Mn_q_AV10_cl,6,9.0,7.0,22.0,-1.35,1.25,0.21,10.38,38.99,27.52,-11.62,16.99,5.52,-2.81,2.39,0.31
H7_c_AV10_in,2,8.0,1.0,11.0,-3.18,2.32,-7.64,3.39,27.7,1.34,-7.61,16.7,-9.66,2.14,13.14,-6.78
Mn_o_V7_cl,7,13.0,10.0,30.0,-1.97,1.39,0.36,12.27,43.95,40.81,-17.73,13.95,10.81,-3.72,3.0,0.94
Mn_q_AV4_cl,5,11.0,12.0,28.0,-3.28,1.15,1.02,8.45,40.77,29.38,-19.55,12.77,1.38,-5.45,3.41,3.15
Mn_i_APF_cl,33,23.0,21.0,77.0,-0.02,0.35,-0.31,66.96,89.46,51.75,-10.04,12.46,-25.25,-0.06,0.68,-0.64
H7_c_APF,10,10.0,5.0,25.0,-0.18,1.2,-1.64,30.36,36.24,19.63,5.36,11.24,-5.37,0.26,3.02,-2.66
H1_c_Pl,2,5.0,1.0,8.0,-1.68,2.1,-4.64,3.09,18.18,1.99,-4.91,10.18,-6.01,0.86,8.42,-5.06
H4_c_Pl,6,7.0,4.0,17.0,-0.52,1.27,-0.89,11.75,27.14,10.43,-5.25,10.14,-6.57,-0.9,2.68,-1.64
H1_c_V7_in,4,6.0,5.0,15.0,-1.43,1.2,0.36,8.95,24.98,22.15,-6.05,9.98,7.15,-2.99,2.27,0.59


In [215]:
df_asps_count.sort_values(by='diff_sum_loss', ascending=False).head(10)

Unnamed: 0,count_win,count_drow,count_loss,count_shape,mean_win_att,mean_drow_att,mean_loss_att,bet_sum_win,bet_sum_drow,bet_sum_loss,diff_sum_win,diff_sum_drow,diff_sum_loss,win_surplus,drow_surplus,loss_surplus
H7_c_V1_in,16,10.0,16.0,42.0,-0.31,-0.5,0.73,29.61,36.3,59.29,-12.39,-5.7,17.29,-0.54,-0.92,1.54
Mn_t_V4_cl,8,3.0,15.0,26.0,-0.93,-4.97,1.62,17.81,10.66,42.47,-8.19,-15.34,16.47,2.42,-5.66,7.52
Mn_i_V10_cl,23,11.0,16.0,50.0,0.14,-0.85,0.23,60.15,39.7,65.52,10.15,-10.3,15.52,0.76,-1.22,0.94
H4_c_AMn,6,5.0,7.0,18.0,-0.68,0.1,0.79,16.02,16.07,33.3,-1.98,-1.93,15.3,-1.57,-0.01,1.37
H7_c_APl,4,4.0,6.0,14.0,-1.18,0.2,1.02,9.21,13.06,27.48,-4.79,-0.94,13.48,-2.4,0.36,2.0
Mn_i_AV10_cl,9,4.0,8.0,21.0,-0.02,-1.55,0.73,26.09,14.71,33.83,5.09,-6.29,12.83,0.8,-2.26,2.3
Mn_t_AV1_cl,19,8.0,19.0,46.0,-0.1,-2.05,0.94,39.94,27.92,58.8,-6.06,-18.08,12.8,1.01,-2.89,3.09
Mn_q_AV1_cl,26,14.0,24.0,64.0,-0.14,-0.88,0.69,61.71,49.9,75.49,-2.29,-14.1,11.49,0.05,-1.43,1.71
Mn_o_V7_cl,7,13.0,10.0,30.0,-1.97,1.39,0.36,12.27,43.95,40.81,-17.73,13.95,10.81,-3.72,3.0,0.94
PF_c_DPF_cl,7,5.0,7.0,19.0,-0.4,-0.1,0.64,15.74,17.57,28.22,-3.26,-1.43,9.22,-0.94,-0.34,1.14


In [216]:
df_check.head(1)

Unnamed: 0,game_id,ligue_header,season_id,game_utc,goals,ligue,actual_city,actual_country,f_team,s_team,pos_f,pos_s,pts_f,pts_s,1,X,2,un_2.5,ov_2.5,country,city,lat,lon,f_point,s_point,type,approach,sing,tr_orb,bp_asp,den_point,smain_ch,fmain_ch,f_role,s_role,fs_asps,day_month_yr,cnt_asp_for_day,f_role_wt_pls,s_role_wt_pls,cnt_asp_wt_pls_for_day,month_yr,moon_day,nakshatras,week_day,day,goals_count,host_role,result,fav_goals,win_bets,drow_bets,loss_bets,day_name,bets_desc,fs_asps_wt_pls_lt,fav_asps_lt,pre_asps_lt,fav_cnt_asps,pre_cnt_asps,asps_goals,asps_cnt,asps_win_rate,asps_drow_rate,asps_loss_rate
20759,1743346,Premier Liga,381,2022-10-03 16:00:00,0:0,top,Samara,Russia,Krylya Sovetov Samara,Krasnodar,12,6,32,48,2.05,3.76,3.8,2.06,1.89,Russia,Samara,53n12,50000000000.0,Mn,ASn,i,moon_conv_compl_weak,equal,0.28,no,"([], [antes_ruls])",AV4,,Mn,ASn-AV4,Mn_i_ASn-AV4,2022-10-03,5,Mn,AV4,7,10.2022,7,Revathi,0,20,0,Fav,fav_drow,0,0.0,3.76,0.0,Mon,drow_3.76,"(Mn_i_AV4_cl,)",[],[Mn_i_AV4_cl],0,1,0:1,1,5.47,-6.32,-12.3


In [217]:
inds_lt = df_asps_count[df_asps_count.loss_surplus > 2].index

In [235]:
# df_check_asp = df_check[['bets_desc', 'fs_asps_wt_pls_lt']].apply(lambda x: x[0] if any(var for var in x[1] if var == 'Mn_t_AV7_cl') else None, axis=1).dropna()
df_check_asp = df_check[['bets_desc', 'fs_asps_wt_pls_lt']].apply(lambda x: x[0] if any(var for var in x[1] if var in inds_lt) else None, axis=1).dropna()

cnt_win  = df_check[df_check.index.isin(df_check_asp.index)].bets_desc.map(lambda x:  float(x.split('_')[1]) if x.split('_')[0] == 'win' else None).dropna().shape[0]
cnt_drow = df_check[df_check.index.isin(df_check_asp.index)].bets_desc.map(lambda x:  float(x.split('_')[1]) if x.split('_')[0] == 'drow' else None).dropna().shape[0]
cnt_loss = df_check[df_check.index.isin(df_check_asp.index)].bets_desc.map(lambda x:  float(x.split('_')[1]) if x.split('_')[0] == 'loss' else None).dropna().shape[0]

print('shape -',df_check_asp.shape[0])
print('win  -', cnt_win, '-' , df_check[df_check.index.isin(df_check_asp.index)].bets_desc.map(lambda x:  float(x.split('_')[1]) if x.split('_')[0] == 'win' else 0).sum())
print('drow -', cnt_drow, '-' , df_check[df_check.index.isin(df_check_asp.index)].bets_desc.map(lambda x:  float(x.split('_')[1]) if x.split('_')[0] == 'drow' else 0).sum())
print('loss -', cnt_loss, '-' , df_check[df_check.index.isin(df_check_asp.index)].bets_desc.map(lambda x:  float(x.split('_')[1]) if x.split('_')[0] == 'loss' else 0).sum())

shape - 89
win  - 42 - 98.41
drow - 25 - 91.19
loss - 22 - 59.46000000000001


In [234]:
# df_check shape -  339
# min date 2020-11-02 14:00:00
# max date 2021-03-31 23:00:00
# inds_lt = df_asps_count[df_asps_count.loss_surplus > 2].index
# shape - 49
# win  - 19 - 46.1
# drow - 9 - 31.71
# loss - 21 - 89.46

inds_lt = ['Mn_i_AV1_cl', 'H7_c_V10_in', 'Mn_t_AV1_cl', 'PF_c_SN', 'H1_c_APF', 'H10_c_AV7_out', 'Mn_i_AV10_cl', 'H10_c_AV7_in', 'PF_c_Pl', 'Mn_t_V4_cl', 'DPF_c_APF_cl', 'Mn_o_V4_cl', 
            'H1_c_APl', 'H7_c_AMn', 'Pl_o_APF', 'Mn_q_AV4_cl', 'H7_c_V10_out', 'H4_c_AV1_out', 'Mn_c_AV4_cl', 'Mn_o_AV4_cl', 'H4_c_V1_out', 'H10_c_Ur', 'St_o_APF',b'H4_c_V1_in']

In [220]:
st

NameError: name 'st' is not defined

In [None]:
df_naksha = df_book_asps.fs_asps_wt_pls_lt.value_counts(dropna=False).to_frame()

In [None]:
df_naksha = df_naksha.merge(df_book_asps.groupby('fs_asps_wt_pls_lt').goals_count.sum(), how='left', left_index=True, right_index=True)

In [None]:
df_naksha['att_shp_gls'] = round(df_naksha.goals_count / df_naksha.fs_asps_wt_pls_lt, 2)

In [None]:
df_naksha.sort_values(by='att_shp_gls', ascending=False).head(10)

In [None]:
naksh = df_naksha.sort_values(by='att_shp_gls', ascending=False).index[6]
print(naksh)
print('shape -', df_book_asps[df_book_asps.fs_asps_wt_pls_lt == naksh].shape[0])
print('succs -', df_book_asps[(df_book_asps.fs_asps_wt_pls_lt == naksh) & (df_book_asps.goals_count > 2)].shape[0])
print('sum   -', df_book_asps[(df_book_asps.fs_asps_wt_pls_lt == naksh) & (df_book_asps.goals_count > 2)]['ov_2.5'].sum())

In [None]:
naksh = df_naksha.sort_values(by='att_shp_gls', ascending=False).index[6]
print(naksh)
print('shape -', df_check[df_check.fs_asps_wt_pls_lt == naksh].shape[0])
print('succs -', df_check[(df_check.fs_asps_wt_pls_lt == naksh) & (df_check.goals_count > 2)].shape[0])
print('sum   -', df_check[(df_check.fs_asps_wt_pls_lt == naksh) & (df_check.goals_count > 2)]['ov_2.5'].sum())

In [None]:
df_book_asps.head(1)

In [None]:
cols = ['game_id', 'game_utc', 'goals', 'goals_count', 'f_team', 's_team', 'city', 'country', 'bets_desc', 'fs_asps_wt_pls_lt', 'moon_day', 'nakshatras']

In [None]:
df_book_asps[(df_book_asps.game_id.isin(df_book_asps.apply(lambda x: x[0] if any(var for var in x['fs_asps_wt_pls_lt'] if var == 'Mn_t_PF_cl') else None, axis=1).dropna())) ][cols]

In [None]:
St

#### Foras calculate ->

In [None]:
# # Choise the action

# # date = '29.09.2023' - format='%d.%m.%Y'
# numb_asps  = 7 # change row code below with len_asp for 1 aspect
# numb_minus = 0

# csv_file = Path('csv_files/statistic/df_all_count_aspects.csv')

# action   = ['new', 'continue'][0]  # 0 - new, 1 - continue - for continue research
# print('action -', action, '*******************************')

# if action == 'new': 
#     start_date = df_main_asps.day_month_yr.min()
#     if csv_file.is_file():
#         os.remove(csv_file)   
#         print('remove file')
# elif action == 'continue':
#     old_dates  = pd.read_csv('csv_files/statistic/df_all_count_aspects.csv', usecols=['date'], dayfirst=True, parse_dates=True) 
#     start_date = old_dates.date.max() 
    
    
# for date in tqdm(df_main_asps[df_main_asps.day_month_yr >= start_date].day_month_yr.unique()[:]):               

#     today_games_ids       = df_main_asps[df_main_asps.day_month_yr == date].game_id.unique()
#     df_games_w_asps       = df_main_asps.groupby('game_id').fs_asps_wt_pls.agg(list).reset_index()
#     df_today_games_w_asps = df_games_w_asps[df_games_w_asps.game_id.isin(today_games_ids)].copy()
#     df_reserch_games_asps = df_games_w_asps[~df_games_w_asps.game_id.isin(today_games_ids)].copy()
    
#     df_reserch_games_asps.set_index('game_id', inplace=True)

#     df_count_asps = pd.DataFrame()
     
#     for game_id, aspects in zip(df_today_games_w_asps.game_id, df_today_games_w_asps.fs_asps_wt_pls):
        
#         len_asps = len(aspects)
#         count_asps = df_reserch_games_asps.fs_asps_wt_pls.map(lambda x: sum(item in aspects for item in x))
        
#         if len_asps == numb_asps: # for equal numb aspect - numb_asps == 1 (2, 3, ...) 
#         # if len_asps >= numb_asps and len(count_asps[count_asps > len_asps - numb_minus]) > 0: # for numb aspects with minus - numb_asps >= 2-1 
#             df_for_concat = pd.DataFrame({'game_id': game_id, 'count_asps': len(aspects), 'game_id_eq_asps': count_asps[count_asps == max(count_asps)].index.to_list(), 
#                                           'max_eq_count': list(count_asps[count_asps == max(count_asps)].values)})
            
#             df_count_asps = pd.concat([df_count_asps, df_for_concat])        
            
#     if 'game_id' in df_count_asps.columns:
#         df_count_asps = df_count_asps.groupby('game_id').agg({'count_asps': max, 'game_id_eq_asps': list, 'max_eq_count': list})  
#     else:
#         continue

#     df_all_result = pd.DataFrame()
      
#     for game_ids_lt in df_count_asps.game_id_eq_asps:
        
#         result_fav_home_lt = df_unique_game_id[(df_unique_game_id.game_id.isin(game_ids_lt)) & (df_unique_game_id.host_role == 'Fav')].result.to_list()
#         result_fav_away_lt = df_unique_game_id[(df_unique_game_id.game_id.isin(game_ids_lt)) & (df_unique_game_id.host_role == 'Pre')].result.to_list()
        
#         df_result_fav_home = pd.DataFrame.from_dict(Counter(result_fav_home_lt), orient='index').T
#         df_result_fav_home.rename(columns={'fav_win':'fav_win_home', 'fav_drow':'fav_drow_home', 'fav_loss':'fav_loss_home'}, inplace=True)
        
#         if df_result_fav_home.empty:
#             df_result_fav_home = pd.DataFrame({'fav_win_home': 0}, index=[0])
            
#         df_result_fav_away = pd.DataFrame.from_dict(Counter(result_fav_away_lt), orient='index').T
#         df_result_fav_away.rename(columns={'fav_win':'fav_win_away', 'fav_drow':'fav_drow_away', 'fav_loss':'fav_loss_away'}, inplace=True)
        
#         if df_result_fav_away.empty:
#             df_result_fav_away = pd.DataFrame({'fav_win_away': 0}, index=[0])
            
#         df_result     = pd.merge(df_result_fav_home, df_result_fav_away, how='left', left_index=True, right_index=True) 
#         df_all_result = pd.concat([df_all_result, df_result]).reset_index(drop=True)
    
#     df_all_result.fillna(0, inplace=True)
#     df_all_result = df_all_result.apply(pd.to_numeric, errors='coerce').astype('Int64')
#     df_all_result.set_index(df_count_asps.index, inplace=True)

#     df_count_asps = pd.merge(df_count_asps, df_all_result, how='left', left_index=True, right_index=True)
    
#     res_col_names = ['fav_win_home', 'fav_drow_home', 'fav_loss_home', 'fav_win_away', 'fav_drow_away', 'fav_loss_away']
    
#     for res_col in res_col_names:
#         if res_col not in df_count_asps:
#             df_count_asps[res_col] = 0 
            
#     for res_col, col_numb in zip(res_col_names, range(3, 9)):
#         df_count_asps.insert(col_numb, res_col, df_count_asps.pop(res_col))        

#     df_res_today_games = df_unique_game_id[df_unique_game_id.game_id.isin(df_count_asps.index)][['game_id', 'host_role', 'result', '1', 'X', '2', 'fav_goals']].set_index('game_id')

#     df_count_asps = df_count_asps.merge(df_res_today_games[['host_role', 'result', '1', 'X', '2', 'fav_goals']], how='left', left_index=True, right_index=True)
    
#     # Stat result is result calculating some exel results
    
#     df_count_asps['home_diff_mean_res'] = df_count_asps[['fav_win_home', 'fav_drow_home', 'fav_loss_home']].apply(lambda x: sdp.diff_mean_stat_result(x[0], x[1], x[2], fav_win_home_mean, fav_drow_home_mean, fav_loss_home_mean), axis=1)
#     df_count_asps['away_diff_mean_res'] = df_count_asps[['fav_win_away', 'fav_drow_away', 'fav_loss_away']].apply(lambda x: sdp.diff_mean_stat_result(x[0], x[1], x[2], fav_win_away_mean, fav_drow_away_mean, fav_loss_away_mean), axis=1)

#     df_count_asps['home_w_more_d_plus_l_res'] = df_count_asps[['fav_win_home', 'fav_drow_home', 'fav_loss_home']].apply(lambda x: sdp.stat_result_win_more_drow_plus_loss(x[0], x[1], x[2]), axis=1)
#     df_count_asps['away_w_more_d_plus_l_res'] = df_count_asps[['fav_win_away', 'fav_drow_away', 'fav_loss_away']].apply(lambda x: sdp.stat_result_win_more_drow_plus_loss(x[0], x[1], x[2]), axis=1)

#     df_count_asps['home_w_more_d_or_l_res'] = df_count_asps[['fav_win_home', 'fav_drow_home', 'fav_loss_home']].apply(lambda x: sdp.stat_result_win_more_drow_or_loss(x[0], x[1], x[2]), axis=1)
#     df_count_asps['away_w_more_d_or_l_res'] = df_count_asps[['fav_win_away', 'fav_drow_away', 'fav_loss_away']].apply(lambda x: sdp.stat_result_win_more_drow_or_loss(x[0], x[1], x[2]), axis=1)

#     df_count_asps['home_w_more_d_or_l_wt_drow_res'] = df_count_asps[['fav_win_home', 'fav_drow_home', 'fav_loss_home']].apply(lambda x: sdp.stat_result_win_more_drow_or_loss_without_drow(x[0], x[1], x[2]), axis=1)
#     df_count_asps['away_w_more_d_or_l_wt_drow_res'] = df_count_asps[['fav_win_away', 'fav_drow_away', 'fav_loss_away']].apply(lambda x: sdp.stat_result_win_more_drow_or_loss_without_drow(x[0], x[1], x[2]), axis=1)

#     df_count_asps['home_w_more_d_plus_l_wt_drow_res'] = df_count_asps[['fav_win_home', 'fav_drow_home', 'fav_loss_home']].apply(lambda x: sdp.stat_result_win_more_drow_plus_loss_without_drow(x[0], x[1], x[2]), axis=1)
#     df_count_asps['away_w_more_d_plus_l_wt_drow_res'] = df_count_asps[['fav_win_away', 'fav_drow_away', 'fav_loss_away']].apply(lambda x: sdp.stat_result_win_more_drow_plus_loss_without_drow(x[0], x[1], x[2]), axis=1)

#     df_count_asps['home_only_d_or_w_and_d_more_w_or_l_res'] = df_count_asps[['fav_win_home', 'fav_drow_home', 'fav_loss_home']].apply(lambda x: sdp.stat_result_only_drow_or_win_and_drow_more_win_or_loss(x[0], x[1], x[2]), axis=1)
#     df_count_asps['away_only_d_or_w_and_d_more_w_or_l_res'] = df_count_asps[['fav_win_away', 'fav_drow_away', 'fav_loss_away']].apply(lambda x: sdp.stat_result_only_drow_or_win_and_drow_more_win_or_loss(x[0], x[1], x[2]), axis=1)

#     df_count_asps['home_only_d_or_w_and_d_more_w_plus_l_res'] = df_count_asps[['fav_win_home', 'fav_drow_home', 'fav_loss_home']].apply(lambda x: sdp.stat_result_only_drow_or_win_and_drow_more_win_plus_loss(x[0], x[1], x[2]), axis=1)
#     df_count_asps['away_only_d_or_w_and_d_more_w_plus_l_res'] = df_count_asps[['fav_win_away', 'fav_drow_away', 'fav_loss_away']].apply(lambda x: sdp.stat_result_only_drow_or_win_and_drow_more_win_plus_loss(x[0], x[1], x[2]), axis=1)
    
#     res_cols_lt = ['home_diff_mean_res', 'away_diff_mean_res', 'home_w_more_d_plus_l_res', 'away_w_more_d_plus_l_res', 'home_w_more_d_or_l_res', 'away_w_more_d_or_l_res', 'home_w_more_d_or_l_wt_drow_res',
#                    'away_w_more_d_or_l_wt_drow_res', 'home_w_more_d_plus_l_wt_drow_res', 'away_w_more_d_plus_l_wt_drow_res']
    
#     for fora in [-1.5, -1.25, -1, -0.75, None, -0.25, 0, 0.25, 0.5, 0.75, 1, 1.25, 1.5]:
#         for res_col in res_cols_lt:
#             col_name_bet = re.sub('res', 'bet', str(res_col))
#             col_name = col_name_bet + '_' + str(fora)
#             df_count_asps[col_name] = df_count_asps[['host_role', 'result', res_col, '1', 'X', '2', 'fav_goals']].apply(lambda x: sdp.bets_fora_result(x[0], x[1], x[2], x[3], x[4], x[5], x[6], fora), axis =1)
        
#     df_count_asps['home_only_d_or_w_and_d_more_w_or_l_bets']   = df_count_asps[['result', 'home_only_d_or_w_and_d_more_w_or_l_res', 'X']].apply(lambda x: sdp.only_win_or_drow_bets_result(x[0], x[1], x[2]), axis =1)
#     df_count_asps['away_only_d_or_w_and_d_more_w_or_l_bets']   = df_count_asps[['result', 'away_only_d_or_w_and_d_more_w_or_l_res', 'X']].apply(lambda x: sdp.only_win_or_drow_bets_result(x[0], x[1], x[2]), axis =1)
#     df_count_asps['home_only_d_or_w_and_d_more_w_plus_l_bets'] = df_count_asps[['result', 'home_only_d_or_w_and_d_more_w_plus_l_res', 'X']].apply(lambda x: sdp.only_win_or_drow_bets_result(x[0], x[1], x[2]), axis =1)
#     df_count_asps['away_only_d_or_w_and_d_more_w_plus_l_bets'] = df_count_asps[['result', 'away_only_d_or_w_and_d_more_w_plus_l_res', 'X']].apply(lambda x: sdp.only_win_or_drow_bets_result(x[0], x[1], x[2]), axis =1)
       
#     df_count_asps['date'] = date    
    
#     df_count_asps = df_count_asps.astype('str')
    
#     if csv_file.is_file():
#         df_count_asps.to_csv('csv_files/statistic/df_all_count_aspects.csv', mode='a', header=False, index=True)
#     else: 
#         df_count_asps.to_csv('csv_files/statistic/df_all_count_aspects.csv', mode='a', header=True, index=True)
        
#     del df_count_asps

In [None]:
# df_all_counet_asps = pd.read_csv('csv_files/statistic/df_all_count_aspects.csv', converters={'max_eq_count': literal_eval}, index_col='game_id') 
# For exist file strategy calculate
# df_all_count_asps = pd.read_csv('csv_files/results/df_all_count_only_1_asps_h_660_a_246_games_remove_full_19_03_24.csv', index_col='game_id') 

In [None]:
# Some indexes - 'game_id' may have a dublicate with 'continue' action
# df_all_count_asps.drop_duplicates(inplace=True)

In [None]:
# df_all_count_asps.insert(0, 'date', df_all_count_asps.pop('date'))

In [None]:
# research_num      = 7
# df_all_count_asps = df_all_count_asps[df_all_count_asps.index.isin(df_all_count_asps[['count_asps','max_eq_count']].apply(lambda x: x.name if research_num in x[1] else None, axis=1))].copy()

In [None]:
# print('shape -', df_all_count_asps.shape[0])
# df_all_count_asps.head(20)

In [None]:
# bet_cols                    = [x for x in df_all_count_asps.columns if 'bet' in x]
# df_all_count_asps[bet_cols] = df_all_count_asps[bet_cols].apply(lambda x: pd.to_numeric(x, errors='coerce'), axis=1)

In [None]:
# fav_home_shape_rows = df_all_count_asps[df_all_count_asps.host_role == 'Fav'].shape[0]
# fav_away_shape_rows = df_all_count_asps[df_all_count_asps.host_role == 'Pre'].shape[0]
# print('fav_home_shape_rows -', fav_home_shape_rows)
# print('fav_away_shape_rows -', fav_away_shape_rows)

In [None]:
# dict_results = {}

# for bet_col in df_all_count_asps[bet_cols]:
#     if 'home' in str(bet_col):
#         bet_result = round(df_all_count_asps[df_all_count_asps.host_role == 'Fav'][bet_col].mean(), 3)
#         dict_results.update({bet_col: bet_result})  
#     elif 'away' in str(bet_col):
#         bet_result = round(df_all_count_asps[df_all_count_asps.host_role == 'Pre'][bet_col].mean(), 3)
#         dict_results.update({bet_col: bet_result})    
#     else:
#         None

In [None]:
# df_result = pd.DataFrame.from_dict(dict_results.items())
# df_result.columns=['stat_name', 'bets_result']

In [None]:
# df_result.sort_values(by='bets_result', ascending=False)[:5]

In [None]:
Stop *******************************************************

In [None]:
# df_all_count_asps.to_csv('csv_files/results/df_all_count_only_7_asps_h_37_a_14_games_remove_full_26_03_24.csv', header=True, index=True)

#### Aspects research

In [None]:
# df_all_count_asps_1 =  pd.read_csv('csv_files/results/df_all_count_only_1_asps_h_660_a_246_games_remove_full_19_03_24.csv', converters={'game_id_eq_asps': literal_eval}, index_col='game_id')
# df_all_count_asps_2 =  pd.read_csv('csv_files/results/df_all_count_only_2_asps_h_2153_a_803_games_remove_full_24_03_24.csv', converters={'game_id_eq_asps': literal_eval}, index_col='game_id')
# df_all_count_asps_3 =  pd.read_csv('csv_files/results/df_all_count_only_3_asps_h_1920_a_700_games_remove_full_26_03_24.csv', converters={'game_id_eq_asps': literal_eval}, index_col='game_id')
# df_all_count_asps_4 =  pd.read_csv('csv_files/results/df_all_count_only_4_asps_h_689_a_238_games_remove_full_26_03_24.csv', converters={'game_id_eq_asps': literal_eval}, index_col='game_id')
# df_all_count_asps_5 =  pd.read_csv('csv_files/results/df_all_count_only_5_asps_h_211_a_77_games_remove_full_26_03_24.csv', converters={'game_id_eq_asps': literal_eval}, index_col='game_id')
# df_all_count_asps_6 =  pd.read_csv('csv_files/results/df_all_count_only_6_asps_h_115_a_31_games_remove_full_26_03_24.csv', converters={'game_id_eq_asps': literal_eval}, index_col='game_id')
# df_all_count_asps_7 =  pd.read_csv('csv_files/results/df_all_count_only_7_asps_h_37_a_14_games_remove_full_26_03_24.csv', converters={'game_id_eq_asps': literal_eval}, index_col='game_id')

# df_all_count_asps = pd.concat([df_all_count_asps_1, df_all_count_asps_2, df_all_count_asps_3, df_all_count_asps_4, df_all_count_asps_5, df_all_count_asps_6, df_all_count_asps_7], )

In [None]:
# df_all_count_asps = df_all_count_asps[df_all_count_asps.columns[:16]].copy()
# df_all_count_asps.drop(columns=['max_eq_count'], inplace=True)
# print('shpe -', df_all_count_asps.shape)
# df_all_count_asps.head(2)

In [None]:
# col_names = ['fav_win_home', 'fav_drow_home', 'fav_loss_home', 'fav_win_away', 'fav_drow_away', 'fav_loss_away']
# df_all_count_asps[col_names] = df_all_count_asps[col_names].astype(int)

In [None]:
# fav_win_home_mean  =  1.8825021697042525
# fav_drow_home_mean =  3.8524489377689735
# fav_loss_home_mean =  4.779727095516569

# fav_win_away_mean  =  2.015090735434575
# fav_drow_away_mean =  3.9890338438268103
# fav_loss_away_mean =  3.9516763438846225

In [None]:
# print('fav_win_home_mean  - ', fav_win_home_mean)
# print('fav_drow_home_mean - ', fav_drow_home_mean)
# print('fav_loss_home_mean - ', fav_loss_home_mean)
# print('#########################################')
# print('fav_win_away_mean  - ', fav_win_away_mean)
# print('fav_drow_away_mean - ', fav_drow_away_mean)
# print('fav_loss_away_mean - ', fav_loss_away_mean)

In [None]:
# df_all_count_asps.head(2)

In [None]:
# df_all_count_asps['count_home'] = df_all_count_asps.fav_win_home + df_all_count_asps.fav_drow_home + df_all_count_asps.fav_loss_home
# df_all_count_asps['count_away'] = df_all_count_asps.fav_win_away + df_all_count_asps.fav_drow_away + df_all_count_asps.fav_loss_away

# df_all_count_asps['mean_diff_win_home']  = fav_win_home_mean - df_all_count_asps.count_home / df_all_count_asps.fav_win_home 
# df_all_count_asps['mean_diff_drow_home'] = fav_drow_home_mean - df_all_count_asps.count_home / df_all_count_asps.fav_drow_home 
# df_all_count_asps['mean_diff_loss_home'] = fav_loss_home_mean - df_all_count_asps.count_home / df_all_count_asps.fav_loss_home 

# df_all_count_asps['mean_diff_win_away']  = fav_win_away_mean - df_all_count_asps.count_away / df_all_count_asps.fav_win_away
# df_all_count_asps['mean_diff_drow_away'] = fav_drow_away_mean - df_all_count_asps.count_away / df_all_count_asps.fav_drow_away 
# df_all_count_asps['mean_diff_loss_away'] = fav_loss_away_mean - df_all_count_asps.count_away / df_all_count_asps.fav_loss_away 

In [None]:
# df_aspects = pd.DataFrame({'fs_asps_wt_pls': df_main_asps.fs_asps_wt_pls, 'game_id': df_main_asps.game_id})
# df_aspects.game_id = df_aspects.game_id.map(int)
# df_aspects.head(2)

In [None]:
# df_all_count_asps.reset_index(inplace=True)

In [None]:
# df_all_count_asps['home_res'] = df_all_count_asps[['fav_win_home', 'fav_drow_home', 'fav_loss_home']].apply(lambda x: [x[0], x[1], x[2]], axis=1)
# df_all_count_asps['away_res'] = df_all_count_asps[['fav_win_away', 'fav_drow_away', 'fav_loss_away']].apply(lambda x: [x[0], x[1], x[2]], axis=1)

In [None]:
# cols = ['game_id', 'count_asps', 'count_home', 'home_res', 'mean_diff_win_home', 'mean_diff_drow_home', 'mean_diff_loss_home', 'count_away', 'away_res', 'mean_diff_win_away', 'mean_diff_drow_away', 'mean_diff_loss_away']
# df_aspect_stat = pd.merge(df_all_count_asps[cols], df_aspects, how='left', left_on='game_id', right_on='game_id')
# df_aspect_stat.set_index(keys='game_id', inplace=True)
# df_aspect_stat.insert(0, 'fs_asps_wt_pls', df_aspect_stat.pop('fs_asps_wt_pls')) 
# # df_aspect_stat['home_away_att'] = df_aspect_stat.count_home / df_aspect_stat.count_away

In [None]:
# df_aspect_stat_gb = df_aspect_stat.groupby('game_id').agg({'fs_asps_wt_pls': list})
# df_aspect_stat_gb.rename(columns={'fs_asps_wt_pls':'fs_asps_wt_pls_lt'}, inplace=True)
# df_aspect_stat_gb.head(3)

In [None]:
# df_aspect_stat_gb = df_aspect_stat_gb.merge(df_aspect_stat, how='left', left_index=True, right_index=True).drop_duplicates(subset='fs_asps_wt_pls_lt')
# df_aspect_stat_gb.drop(columns='fs_asps_wt_pls', inplace=True)
# df_aspect_stat_gb.replace([np.inf, -np.inf], 0, inplace=True)
# df_aspect_stat_gb.replace([np.inf, -np.inf], 0, inplace=True)
# df_aspect_stat_gb = df_aspect_stat_gb.round(3)

# for col in df_aspect_stat_gb.columns:
#     if df_aspect_stat_gb[col].dtype != 'O':
#         df_aspect_stat_gb[col] = df_aspect_stat_gb[col].map(lambda x: 0 if math.isnan(x) else x)

# df_aspect_stat_gb.head(3)

In [None]:
# df_aspect_stat_gb.sort_values(by=df_aspect_stat_gb.columns[1], ascending=True, inplace=True)

In [None]:
# df_aspect_stat_gb.head(1)

In [None]:
# file = open('pickle_files/aspected_files/df_aspect_stat_03_04_2024', 'wb')
# pickle.dump(df_aspect_stat_gb, file) 
# file.close()

In [None]:
Stop ***********************************************************

#### Hide ^

In [None]:
# Mean from main dataset
# fav_win_home_mean  =  1.8825
# fav_drow_home_mean =  3.8524
# fav_loss_home_mean =  4.7797

# fav_win_away_mean  =  2.0150
# fav_drow_away_mean =  3.9890
# fav_loss_away_mean =  3.9516

In [None]:
# with open('pickle_files/aspected_files/df_for_statistic_05_10_2023_to_03_11_2013_w_moon_days', 'rb') as f:
#     df_main = pickle.load(f)  
    
# df_main_gb = df_main.groupby('game_id').fs_asps_wt_pls.agg(list)
# df_main_gb.head()    

In [None]:
# df_main_asps.head(1)

In [None]:
# print('win  bets mean - ', df_main_asps[df_main_asps.win_bets != 0].win_bets.mean())
# print('drow bets mean - ', df_main_asps[df_main_asps.drow_bets != 0].drow_bets.mean())
# print('loss bets mean - ', df_main_asps[df_main_asps.loss_bets != 0].loss_bets.mean())

#### Leagues aspect statistic calculate

In [None]:
df_main_asps_cp = df_main_asps.copy()

In [None]:
df_main_asps_cp = df_main_asps_cp[df_main_asps_cp.ligue == 'top'].copy()
print('shape -', df_main_asps_cp.shape[0])
df_main_asps_cp.head(1)

In [None]:
df_main_asps_cp.ligue_header.value_counts(dropna=False).head(50)

In [None]:
df_asps_leagues = df_main_asps_cp.groupby(['ligue_header', 'country', 'fs_asps_wt_pls']).result.value_counts().to_frame()

In [None]:
df_asps_leagues

In [None]:
df_asps_leagues.rename(columns={'result':'count'}, inplace=True)

In [None]:
df_asps_leagues.reset_index(inplace=True)

In [None]:
df_asps_leagues = df_asps_leagues.pivot(index=['ligue_header', 'country', 'fs_asps_wt_pls'],columns='result',values='count').fillna(0).copy()

In [None]:
df_asps_leagues.insert(0, 'fav_win', df_asps_leagues.pop('fav_win'))

In [None]:
df_asps_leagues = df_asps_leagues.astype(int)

In [None]:
df_asps_leagues['count_games'] = df_asps_leagues.fav_win + df_asps_leagues.fav_drow + df_asps_leagues.fav_loss

In [None]:
df_asps_leagues

In [None]:
df_league_stat = df_asps_leagues.groupby(['ligue_header', 'country']).sum()
df_league_stat.head()

In [None]:
df_asps_leagues['win_att']  = round(df_asps_leagues.fav_win / df_asps_leagues.count_games, 2)
df_asps_leagues['drow_att'] = round(df_asps_leagues.fav_drow / df_asps_leagues.count_games, 2)
df_asps_leagues['loss_att'] = round(df_asps_leagues.fav_loss / df_asps_leagues.count_games, 2)

In [None]:
df_league_stat['win_att_lg']  = round(df_league_stat.fav_win / df_league_stat.count_games, 2)
df_league_stat['drow_att_lg'] = round(df_league_stat.fav_drow / df_league_stat.count_games, 2)
df_league_stat['loss_att_lg'] = round(df_league_stat.fav_loss / df_league_stat.count_games, 2)

In [None]:
df_league_stat.head()

In [None]:
# For mean() calculating
df_main_asps_cp[['win_bets', 'drow_bets', 'loss_bets']] = df_main_asps_cp[['win_bets', 'drow_bets', 'loss_bets']].replace(0, np.NaN)

In [None]:
df_league_stat['win_bets_mean']  = round(df_main_asps_cp.groupby(['ligue_header', 'country']).win_bets.mean(), 2)
df_league_stat['drow_bets_mean'] = round(df_main_asps_cp.groupby(['ligue_header', 'country']).drow_bets.mean(), 2)
df_league_stat['loss_bets_mean'] = round(df_main_asps_cp.groupby(['ligue_header', 'country']).loss_bets.mean(), 2)

In [None]:
df_asps_leagues = df_asps_leagues.merge(df_league_stat[['win_att_lg',	'drow_att_lg', 'loss_att_lg', 'win_bets_mean', 'drow_bets_mean', 'loss_bets_mean']], how='left', left_index=True, right_index=True)

In [None]:
df_asps_leagues['win_att_diff']  = df_asps_leagues.win_att - df_asps_leagues.win_att_lg
df_asps_leagues['drow_att_diff'] = df_asps_leagues.drow_att - df_asps_leagues.drow_att_lg
df_asps_leagues['loss_att_diff'] = df_asps_leagues.loss_att - df_asps_leagues.loss_att_lg

In [None]:
df_asps_leagues['win_prof']  = round(df_asps_leagues.fav_win * df_asps_leagues.win_bets_mean / df_asps_leagues.count_games, 2)
df_asps_leagues['drow_prof'] = round(df_asps_leagues.fav_drow * df_asps_leagues.drow_bets_mean / df_asps_leagues.count_games, 2)
df_asps_leagues['loss_prof'] = round(df_asps_leagues.fav_loss * df_asps_leagues.loss_bets_mean / df_asps_leagues.count_games, 2)

In [None]:
df_asps_leagues.drop(columns=['win_att_lg', 'drow_att_lg', 'loss_att_lg'], inplace=True)

In [None]:
df_asps_leagues[df_asps_leagues.count_games > 19].sort_values(by='loss_prof', ascending=False).head()

In [None]:
# file = open('pickle_files/df_asps_leagues_stat_before_2020', 'wb')
# pickle.dump(df_asps_leagues, file) 
# file.close()

#### End

In [None]:
Stop *****************************************************************************************************

### Leagues aspect statistic calculate

In [None]:
# with open('pickle_files/df_asps_leagues_stat_before_2024', 'rb') as f:
#     df_asps_leagues_2024 = pickle.load(f) 
    
# df_asps_leagues_2024.head() 

In [None]:
# df_bets_mean = df_asps_leagues_2024[['win_bets_mean', 'drow_bets_mean', 'loss_bets_mean']].copy()
# df_bets_mean.head()

In [None]:
with open('pickle_files/df_asps_leagues_stat_before_2020', 'rb') as f:
    df_asps_leagues = pickle.load(f) 
    
df_asps_leagues.head() 

In [None]:
df_asps_leagues['win_surplus']  = df_asps_leagues.win_att_diff - sum([df_asps_leagues.drow_att_diff, df_asps_leagues.loss_att_diff])
df_asps_leagues['drow_surplus'] = df_asps_leagues.drow_att_diff - sum([df_asps_leagues.win_att_diff, df_asps_leagues.loss_att_diff])
df_asps_leagues['loss_surplus'] = df_asps_leagues.loss_att_diff - sum([df_asps_leagues.win_att_diff, df_asps_leagues.drow_att_diff])

In [None]:
df_asps_leagues.loss_surplus.sort_values(ascending=False)

In [None]:
print('df_main_asps_cp shape -', df_main_asps.shape[0])
df_main_asps_cp = df_main_asps[df_main_asps.ligue == 'top'].copy()
print('df_main_asps_cp shape -', df_main_asps_cp.shape[0])

In [None]:
def bets_description(win_bets: float, drow_bets: float, loss_bets: float):
    if win_bets != 0:
        return 'win_' + str(win_bets) 
    elif drow_bets != 0:
        return 'drow_' + str(drow_bets)
    else:
        return 'loss_' + str(loss_bets)
    
df_main_asps_cp['bets_desc'] = df_main_asps_cp[['win_bets', 'drow_bets', 'loss_bets']].apply(lambda x: bets_description(x[0], x[1], x[2]), axis=1)    

In [None]:
df_main_asps_cp['week'] = df_main_asps_cp.game_utc.dt.isocalendar().week

In [None]:
df_main_asps_cp.sort_values(by='game_utc', inplace=True)

In [None]:
df_main_asps_cp.tail(1)

In [None]:
print('unique games top  -', len(df_main_asps_cp.game_id.unique()))
print('count all aspects -', df_main_asps_cp.shape[0])

In [None]:
unique_asps_lt = df_asps_leagues.reset_index().fs_asps_wt_pls.unique()
print('len - ', len(unique_asps_lt))
unique_asps_lt[:3]

In [None]:
df_main_asps_cp.set_index(['ligue_header', 'country', 'fs_asps_wt_pls'], inplace=True)

In [None]:
# df_main_asps_cp = df_main_asps_cp[(df_main_asps_cp.game_utc > '2024') & (df_main_asps_cp.game_utc < '2025')].copy()

In [None]:
print('min date', min(df_main_asps_cp.game_utc))
print('max date', max(df_main_asps_cp.game_utc))

In [None]:
df_main_asps_cp['week_yr'] = df_main_asps_cp[['week', 'game_utc']].apply(lambda x: str(x[0]) + '.' + str(x[1])[0:4], axis=1)
df_main_asps_cp.week_yr.head(3)

In [None]:
cols = ['game_id', 'game_utc', 'goals', 'f_team', 's_team', 'city', 'bets_desc', 'result']
df_main_asps_cp_asps = df_main_asps_cp[cols].merge(df_asps_leagues, how='left', left_index=True, right_index=True)
df_main_asps_cp_asps.head(2)

In [None]:
df_main_asps_cp_asps_gb = df_main_asps_cp_asps.groupby('game_id').agg({'win_att_diff':sum, 'drow_att_diff':sum, 'loss_att_diff':sum, 'win_prof':sum, 'drow_prof':sum, 'loss_prof':sum, 'win_surplus':sum, 'drow_surplus':sum, 'loss_surplus':sum})
df_main_asps_cp_asps_gb.reset_index(inplace=True)
df_main_asps_cp_asps_gb.rename(columns={'win_att_diff':'win_att_diff_sum', 'drow_att_diff':'drow_att_diff_sum', 'loss_att_diff':'loss_att_diff_sum', 'win_prof':'win_prof_sum', 'drow_prof':'drow_prof_sum',
                                        'loss_prof':'loss_prof_sum', 'win_surplus':'win_surplus_sum', 'drow_surplus':'drow_surplus_sum', 'loss_surplus':'loss_surplus_sum'}, inplace=True)
df_main_asps_cp_asps_gb.head(2)

In [None]:
sr_fs_asps_wt_pls_lt = df_main_asps_cp_asps.reset_index().groupby('game_id').fs_asps_wt_pls.apply(tuple)
sr_fs_asps_wt_pls_lt.reset_index().head(2)

In [None]:
df_main_asps_cp_asps_gb = df_main_asps_cp_asps_gb.merge(sr_fs_asps_wt_pls_lt.reset_index(), how='left', left_on='game_id', right_on='game_id')
df_main_asps_cp_asps_gb.rename(columns={'fs_asps_wt_pls':'fs_asps_wt_pls_lt'}, inplace=True)
df_main_asps_cp_asps_gb.head(2)

In [None]:
print('df_main_asps_cp_asps_shape -', df_main_asps_cp_asps.shape[0])
df_main_asps_cp_asps.drop_duplicates(subset='game_id', inplace=True)
print('df_main_asps_cp_asps_shape -', df_main_asps_cp_asps.shape[0])

In [None]:
df_main_asps_cp_asps = df_main_asps_cp_asps[cols].merge(df_main_asps_cp_asps_gb, how='left', left_on='game_id', right_on='game_id')
df_main_asps_cp_asps.set_index(keys='game_id', inplace=True)
df_main_asps_cp_asps.head(1)

In [None]:
St

In [None]:
# Unordinary period research #####################################################
df_main_asps_cp_asps.reset_index(inplace=True)
df_main_asps_cp_asps.set_index(keys='game_id', inplace=True)
df_main_asps_cp_asps.head(1)

In [None]:
# df_specific = df_main_asps_cp_asps[(df_main_asps_cp_asps.game_utc > '2021.04') & (df_main_asps_cp_asps.game_utc < '2022.05')].copy()
# print('min date', min(df_specific.game_utc))
# print('max date', max(df_specific.game_utc))
# print('shape -', df_specific.shape[0])

In [None]:
# df_specific.result.value_counts(dropna=False)

In [None]:
# df_specific

In [None]:
# df_ordinary = df_main_asps_cp_asps[~df_main_asps_cp_asps.game_id.isin(df_specific.game_id)].copy()
# df_ordinary = df_ordinary[df_ordinary.game_utc < '2021.04'].copy()
df_ordinary = df_main_asps_cp_asps[df_main_asps_cp_asps.game_utc < '2022.05'].copy()
print('min date', min(df_ordinary.game_utc))
print('max date', max(df_ordinary.game_utc))
print('shape -', df_ordinary.shape[0])

In [None]:
# df_research = df_main_asps_cp_asps[(~df_main_asps_cp_asps.game_id.isin(df_specific.game_id)) & (~df_main_asps_cp_asps.game_id.isin(df_ordinary.game_id))].copy()
df_research = df_main_asps_cp_asps[(~df_main_asps_cp_asps.index.isin(df_ordinary.index)) & (df_main_asps_cp_asps.game_utc < '2023.05')].copy()
print('min date', min(df_research.game_utc))
print('max date', max(df_research.game_utc))
print('shape -', df_research.shape[0])

In [None]:
df_check = df_main_asps_cp_asps[(~df_main_asps_cp_asps.index.isin(df_ordinary.index)) & (~df_main_asps_cp_asps.index.isin(df_research.index))].copy()
print('min date', min(df_check.game_utc))
print('max date', max(df_check.game_utc))
print('shape -', df_check.shape[0])

In [None]:
ordi_win_asps_lt = []
_ = df_ordinary[['result', 'fs_asps_wt_pls_lt']].apply(lambda x: [ordi_win_asps_lt.append(asp) for asp in x[1]] if x[0] == 'fav_win' else None, axis=1).dropna()
ordi_win_asps_lt[:3]

In [None]:
ordi_drow_asps_lt = []
_ = df_ordinary[['result', 'fs_asps_wt_pls_lt']].apply(lambda x: [ordi_drow_asps_lt.append(asp) for asp in x[1]] if x[0] == 'fav_drow' else None, axis=1).dropna()
ordi_drow_asps_lt[:3]

In [None]:
ordi_loss_asps_lt = []
_ = df_ordinary[['result', 'fs_asps_wt_pls_lt']].apply(lambda x: [ordi_loss_asps_lt.append(asp) for asp in x[1]] if x[0] == 'fav_loss' else None, axis=1).dropna()
ordi_loss_asps_lt[:3]

In [None]:
sr_win_asps_count  = pd.DataFrame({'count_win':  Counter(ordi_win_asps_lt).values()}, index=Counter(ordi_win_asps_lt).keys()).sort_values(by='count_win', ascending=False)
sr_drow_asps_count = pd.DataFrame({'count_drow': Counter(ordi_drow_asps_lt).values()}, index=Counter(ordi_drow_asps_lt).keys()).sort_values(by='count_drow', ascending=False)
sr_loss_asps_count = pd.DataFrame({'count_loss': Counter(ordi_loss_asps_lt).values()}, index=Counter(ordi_loss_asps_lt).keys()).sort_values(by='count_loss', ascending=False)

In [None]:
print('shape win  -', sr_win_asps_count.shape[0])
print('shape drow -', sr_drow_asps_count.shape[0])
print('shape loss -', sr_loss_asps_count.shape[0])

In [None]:
df_asps_count = pd.merge(sr_win_asps_count, sr_drow_asps_count, how='left', left_index=True, right_index=True)
df_asps_count = pd.merge(df_asps_count, sr_loss_asps_count, how='left', left_index=True, right_index=True)

In [None]:
df_asps_count.shape[0]

In [None]:
df_asps_count['cnt_drow_loss'] = df_asps_count.count_drow + df_asps_count.count_loss

In [None]:
df_asps_count['diff_win_drolos'] = df_asps_count.count_win - df_asps_count.cnt_drow_loss

In [None]:
# df_asps_count.sort_values(by='diff_win_drolos', ascending=True).head(10)

In [None]:
# df_asps_count.sort_values(by='diff_win_drolos', ascending=False).head(10)

In [None]:
df_all_profit_calc = pd.DataFrame()

for asp in df_asps_count.index:
    df_research_asp = df_research[['bets_desc', 'fs_asps_wt_pls_lt']].apply(lambda x: x[0] if any(var for var in x[1] if var == asp) else None, axis=1).dropna()
    
    shape        = df_research_asp.shape[0]
    bet_sum_win  = df_research[df_research.index.isin(df_research_asp.index)].bets_desc.map(lambda x:  float(x.split('_')[1]) if x.split('_')[0] == 'win' else 0).sum()
    bet_sum_drow = df_research[df_research.index.isin(df_research_asp.index)].bets_desc.map(lambda x:  float(x.split('_')[1]) if x.split('_')[0] == 'drow' else 0).sum()
    bet_sum_loss = df_research[df_research.index.isin(df_research_asp.index)].bets_desc.map(lambda x:  float(x.split('_')[1]) if x.split('_')[0] == 'loss' else 0).sum()
    
    diff_win  = bet_sum_win - shape
    diff_drow = bet_sum_drow - shape
    diff_loss = bet_sum_loss - shape
    
    profit_calc_dc = {'aspect':asp, 'shape':shape, 'bet_sum_win':bet_sum_win, 'bet_sum_drow':bet_sum_drow, 'bet_sum_loss':bet_sum_loss, 'diff_win':diff_win, 'diff_drow':diff_drow, 'diff_loss':diff_loss}
    
    df_profit_calc     = pd.DataFrame.from_dict([profit_calc_dc])
    df_all_profit_calc = pd.concat([df_all_profit_calc, df_profit_calc])
    
df_all_profit_calc.set_index(keys='aspect', inplace=True)    

In [None]:
df_all_profit_calc.head()

In [None]:
df_all_profit_calc.sort_values(by='diff_win', ascending=False).head(10)

In [None]:
df_all_profit_calc.sort_values(by='diff_drow', ascending=False).head(10)

In [None]:
df_all_profit_calc.sort_values(by='diff_loss', ascending=False).head(10)

In [None]:
df_check_asp = df_check[['bets_desc', 'fs_asps_wt_pls_lt']].apply(lambda x: x[0] if any(var for var in x[1] if var == 'Mn_c_V1') else None, axis=1).dropna()
df_check_asp.shape[0]

In [None]:
df_check[df_check.index.isin(df_check_asp.index)].bets_desc.map(lambda x:  float(x.split('_')[1]) if x.split('_')[0] == 'win' else 0).sum()

In [None]:
df_check[df_check.index.isin(df_check_asp.index)].bets_desc.map(lambda x:  float(x.split('_')[1]) if x.split('_')[0] == 'drow' else 0).sum()

In [None]:
df_check[df_check.index.isin(df_check_asp.index)].bets_desc.map(lambda x:  float(x.split('_')[1]) if x.split('_')[0] == 'loss' else 0).sum()

In [None]:
# df_check.head(1)

In [None]:
sr_loss_asps_count.iloc[1].name

In [None]:
top_loss_asps_count_lt = sr_loss_asps_count.iloc[0].name 

In [None]:
sr_top_loss_game_ids = df_research[['game_id', 'fs_asps_wt_pls_lt']].apply(lambda x: x[0] if any(asp for asp in x[1] if asp in top_loss_asps_count_lt) else None, axis=1).dropna()

In [None]:
df_research[df_research.game_id.isin(sr_top_loss_game_ids)][['fs_asps_wt_pls_lt', 'result']] # .result.value_counts(dropna=False)

In [None]:
##############################################################

In [None]:
result = ['win', 'drow', 'loss'][2]
ind = 2
col = ['win_att_diff_sum', 'drow_att_diff_sum', 'loss_att_diff_sum', 'win_prof_sum', 'drow_prof_sum', 'loss_prof_sum', 'win_surplus_sum', 'drow_surplus_sum', 'loss_surplus_sum'][ind]
print(col, '-', result)
df_main_asps_cp_asps.sort_values(by=col, ascending=False).head(1000).bets_desc.map(lambda x:  float(x.split('_')[1]) if x.split('_')[0] == result else 0).sum()

In [None]:
result = ['win', 'drow', 'loss'][2]

att_diff_shape  = df_main_asps_cp_asps[df_main_asps_cp_asps.win_att_diff_sum > 0.949].shape[0]
print('result -', result, ', att_diff_shape -', att_diff_shape)
bets_att_diff_sum = df_main_asps_cp_asps[df_main_asps_cp_asps.win_att_diff_sum > 0.949].bets_desc.map(lambda x:  float(x.split('_')[1]) if x.split('_')[0] == result else 0).sum()
print('prof_percent win_att_diff_sum -', bets_att_diff_sum / att_diff_shape)

prof_sum_shape  = df_main_asps_cp_asps[df_main_asps_cp_asps.win_prof_sum > 12.16].shape[0]
print('result -', result, ', prof_sum_shape -', prof_sum_shape)
bets_win_prof_sum = df_main_asps_cp_asps[df_main_asps_cp_asps.win_prof_sum > 12.16].bets_desc.map(lambda x:  float(x.split('_')[1]) if x.split('_')[0] == result else 0).sum()
print('prof_percent win_prof_sum -', bets_win_prof_sum / prof_sum_shape)

surplus_sum_shape  = df_main_asps_cp_asps[df_main_asps_cp_asps.win_surplus_sum > 1.909].shape[0]
print('result -', result, ', surplus_sum_shape -', surplus_sum_shape)
bets_surplus_sum = df_main_asps_cp_asps[df_main_asps_cp_asps.win_surplus_sum > 1.909].bets_desc.map(lambda x:  float(x.split('_')[1]) if x.split('_')[0] == result else 0).sum()
print('prof_percent win_surplus_sum -', bets_surplus_sum / surplus_sum_shape)

In [None]:
df_win_top = pd.concat([df_main_asps_cp_asps[df_main_asps_cp_asps.win_att_diff_sum > 0.949],
                        df_main_asps_cp_asps[df_main_asps_cp_asps.win_prof_sum > 12.16],
                        df_main_asps_cp_asps[df_main_asps_cp_asps.win_surplus_sum > 1.909]
                       ]).reset_index()

In [None]:
df_win_top.drop_duplicates(subset='game_id', inplace=True)

In [None]:
df_win_top.shape[0]

In [None]:
df_win_top.bets_desc.map(lambda x:  float(x.split('_')[1]) if x.split('_')[0] == result else 0).sum()

In [None]:
df_win_top.bets_desc.map(lambda x:  float(x.split('_')[1]) if x.split('_')[0] == result else 0).sum() / df_win_top.shape[0]

In [None]:
# Calculate with new period
# win_att_diff_sum(1000) - loss - 1341.65 (5000 - 1.22, 30000 - 1.182)
# win_prof_sum(1000)     - loss - 1274.69 (5000 - 1.21, 30000 - 1.167)
# win_surplus_sum(1000)  - loss - 1294.99 (5000 - 1.22, 30000 - 1.181)

# drow_att_diff_sum(1000) - loss - 1080.52 (5000 - 1.09, 30000 - 1.171)
# drow_prof_sum(1000)     - loss - 1143.42 (5000 - 1.06, 30000 - 1.170)
# drow_surplus_sum(1000)  - loss - 1087.36 (5000 - 1.09, 30000 - 1.168)

# loss_att_diff_sum(1000) - loss - 1056.81 (5000 - 1.12, 30000 - 1.175)
# loss_prof_sum(1000)     - loss - 1170.94 (5000 - 1.15, 30000 - 1.177)
# loss_surplus_sum(1000)  - loss - 1056.26 (5000 - 1.13, 30000 - 1.174)


# Before 2020 year results:

# df_win_att_diff_top min - 0.949 max - 6.34
# df_win_prof_sum_top min - 12.16 max - 27.47
# df_win_surplus_sum_top min - 1.909 max - 12.68

In [None]:
# df_win_att_diff_top    = df_main_asps_cp_asps.sort_values(by='win_att_diff_sum', ascending=False).head(1000)
# df_win_prof_sum_top    = df_main_asps_cp_asps.sort_values(by='win_prof_sum', ascending=False).head(1000)
# df_win_surplus_sum_top = df_main_asps_cp_asps.sort_values(by='win_surplus_sum', ascending=False).head(1000)

In [None]:
# print('df_win_att_diff_top min -', df_win_att_diff_top.win_att_diff_sum.min(), 'max -', df_win_att_diff_top.win_att_diff_sum.max())
# print('df_win_prof_sum_top min -', df_win_prof_sum_top.win_prof_sum.min(), 'max -', df_win_prof_sum_top.win_prof_sum.max())
# print('df_win_surplus_sum_top min -', df_win_surplus_sum_top.win_surplus_sum.min(), 'max -', df_win_surplus_sum_top.win_surplus_sum.max())

In [None]:
# df_win_prof_sum_top

In [None]:
# print(df_win_att_diff_top[df_win_att_diff_top.game_id.isin(df_win_prof_sum_top.game_id)].shape[0])
# print(df_win_att_diff_top[df_win_att_diff_top.game_id.isin(df_win_surplus_sum_top.game_id)].shape[0])

In [None]:
#################################################

In [None]:
df_main_asps_cp_asps.bets_desc.map(lambda x:  float(x.split('_')[1]) if x.split('_')[0] == 'loss' else 0).sum()

In [None]:
38904 / df_main_asps_cp_asps.shape[0]

In [None]:
df_main_asps_cp_asps.reset_index(inplace=True)

In [None]:
df_main_asps_cp_asps.shape[0]

In [None]:
df_main_asps_cp_asps.tail(1)

In [None]:
# df_main_asps_cp_asps.bets_desc.value_counts(dropna=False)

In [None]:
df_main_asps_cp_asps[df_main_asps_cp_asps.loss_att_diff_sum > 0.5].shape[0]

In [None]:
# Random results wich give 17 percents of benefit
all_rand_results = []

for x in range(1,100):
    df_research = df_main_asps_cp_asps[df_main_asps_cp_asps.drow_surplus_sum > 0.6].copy()
    shape = df_research.shape[0]
    df_research.reset_index(drop=True, inplace=True)
    rand_results = df_research.loc[random.sample(range(0, shape), 1)].bets_desc.map(lambda x: float(x.split('_')[1]) if x.split('_')[0] == 'drow' else 0).sum()
    all_rand_results.append(rand_results)
    
print(shape)
sum(all_rand_results) 

In [None]:
st

In [None]:
def check_two_values_existing(prof_in: int, df_shape: int):
    if (prof_in > 0) and (df_shape > 0):
        perc_in = round(prof_in / df_shape, 2)
    else:
        perc_in = None
    return perc_in
    
def add_cols_names(fav_win: int, fav_drow: int, fav_loss: int, count_games_time: int):
    return {'fav_win': fav_win, 'fav_drow': fav_drow, 'fav_loss': fav_loss, 'count_games': count_games_time}

# 'df_bets_mean' creates from 'df_asps_leagues_stat_before_2024'

# Choose a column for time interval:
df_main_asps_cp['time'] = df_main_asps_cp['week_yr'].copy() # month_yr, week_yr, day_month_yr

df_asps_leagues_cp = df_asps_leagues.copy()
df_all_profit_calc = pd.DataFrame()

ind = 1
win_col_sort  = ['win_surplus', 'win_prof', 'win_att_diff'][ind]
drow_col_sort = ['drow_surplus', 'drow_prof', 'drow_att_diff'][ind]
loss_col_sort = ['loss_surplus', 'loss_prof', 'loss_att_diff'][ind]
print('cols_sort -', win_col_sort, '-', drow_col_sort, '-', loss_col_sort)

min_count_games = 1

win_cnt_top  = 3000
drow_cnt_top = 3000
loss_cnt_top = 3000

for time in df_main_asps_cp.time.unique():
    
    df_asps_leagues_cp_cnt = df_asps_leagues_cp[df_asps_leagues_cp.count_games >= min_count_games].copy()
    # print('df_asps_leagues_cp_cnt -', df_asps_leagues_cp_cnt.shape[0], '-', time)
    
    df_main_asps_cp_time = df_main_asps_cp[df_main_asps_cp.time.isin([time])].copy()
    
    # Calculating leagues aspect sum for game_id and getting top block
    
    cols                 = ['game_id', 'game_utc', 'goals', 'f_team', 's_team', 'city', 'bets_desc']
    df_main_asps_cp_asps = df_main_asps_cp_time[cols].merge(df_asps_leagues_cp_cnt, how='left', left_index=True, right_index=True)
    
    df_main_asps_cp_asps_gb = df_main_asps_cp_asps.groupby('game_id').agg({'win_att_diff':sum, 'drow_att_diff':sum, 'loss_att_diff':sum, 'win_prof':sum, 'drow_prof':sum, 'loss_prof':sum, 
                                                                           'win_surplus':sum, 'drow_surplus':sum, 'loss_surplus':sum})
    df_main_asps_cp_asps_gb.reset_index(inplace=True)
    df_main_asps_cp_asps_gb.rename(columns={'win_att_diff':'win_att_diff_sum', 'drow_att_diff':'drow_att_diff_sum', 'loss_att_diff':'loss_att_diff_sum', 'win_prof':'win_prof_sum', 
                                            'drow_prof':'drow_prof_sum', 'loss_prof':'loss_prof_sum', 'win_surplus':'win_surplus_sum', 'drow_surplus':'drow_surplus_sum', 
                                            'loss_surplus':'loss_surplus_sum'}, inplace=True)
    
    sr_fs_asps_wt_pls_lt = df_main_asps_cp_asps.reset_index().groupby('game_id').fs_asps_wt_pls.apply(tuple)
    
    df_main_asps_cp_asps_gb = df_main_asps_cp_asps_gb.merge(sr_fs_asps_wt_pls_lt.reset_index(), how='left', left_on='game_id', right_on='game_id')
    df_main_asps_cp_asps_gb.rename(columns={'fs_asps_wt_pls':'fs_asps_wt_pls_lt'}, inplace=True)
    
    df_main_asps_cp_asps.drop_duplicates(subset='game_id', inplace=True)
    
    df_main_asps_cp_asps = df_main_asps_cp_asps[cols].merge(df_main_asps_cp_asps_gb, how='left', left_on='game_id', right_on='game_id')
    df_main_asps_cp_asps.set_index(keys='game_id', inplace=True)
    
    df_win_att_diff_top    = df_main_asps_cp_asps.sort_values(by='win_att_diff_sum', ascending=False).head(1000)
    df_win_prof_sum_top    = df_main_asps_cp_asps.sort_values(by='win_prof_sum', ascending=False).head(1000)
    df_win_surplus_sum_top = df_main_asps_cp_asps.sort_values(by='win_surplus_sum', ascending=False).head(1000)
    
    
    
    # Calculating each leagues aspect rank block
#     win_top_inds  = df_asps_leagues_cp_cnt.sort_values(by=win_col_sort, ascending=False).head(win_cnt_top).index 
#     drow_top_inds = df_asps_leagues_cp_cnt.sort_values(by=drow_col_sort, ascending=False).head(drow_cnt_top).index 
#     loss_top_inds = df_asps_leagues_cp_cnt.sort_values(by=loss_col_sort, ascending=False).head(loss_cnt_top).index 
    
#     df_predict_win  = df_main_asps_cp_time[df_main_asps_cp_time.index.isin(win_top_inds)].drop_duplicates(subset='game_id')
#     df_predict_drow = df_main_asps_cp_time[df_main_asps_cp_time.index.isin(drow_top_inds)].drop_duplicates(subset='game_id')
#     df_predict_loss = df_main_asps_cp_time[df_main_asps_cp_time.index.isin(loss_top_inds)].drop_duplicates(subset='game_id')

#     win_cnt_in_win   = df_predict_win.bets_desc.map(lambda x:  1 if x.split('_')[0] == 'win' else 0).sum()
#     drow_cnt_in_drow = df_predict_drow.bets_desc.map(lambda x: 1 if x.split('_')[0] == 'drow' else 0).sum()
#     loss_cnt_in_drow = df_predict_drow.bets_desc.map(lambda x: 1 if x.split('_')[0] == 'loss' else 0).sum()
#     drow_cnt_in_loss = df_predict_loss.bets_desc.map(lambda x: 1 if x.split('_')[0] == 'drow' else 0).sum()
#     loss_cnt_in_loss = df_predict_loss.bets_desc.map(lambda x: 1 if x.split('_')[0] == 'loss' else 0).sum()
    
#     win_prof_in_win   = round(df_predict_win.bets_desc.map(lambda x:  float(x.split('_')[1]) if x.split('_')[0] == 'win' else 0).sum(), 2) - df_predict_win.shape[0]
#     drow_prof_in_drow = round(df_predict_drow.bets_desc.map(lambda x: float(x.split('_')[1]) if x.split('_')[0] == 'drow' else 0).sum(), 2) - df_predict_drow.shape[0]
#     loss_prof_in_drow = round(df_predict_drow.bets_desc.map(lambda x: float(x.split('_')[1]) if x.split('_')[0] == 'loss' else 0).sum(), 2) - df_predict_drow.shape[0]
#     drow_prof_in_loss = round(df_predict_loss.bets_desc.map(lambda x: float(x.split('_')[1]) if x.split('_')[0] == 'drow' else 0).sum(), 2) - df_predict_loss.shape[0]
#     loss_prof_in_loss = round(df_predict_loss.bets_desc.map(lambda x: float(x.split('_')[1]) if x.split('_')[0] == 'loss' else 0).sum(), 2) - df_predict_loss.shape[0]
    
#     pre_fora_half_in_drow = round(df_predict_drow.bets_desc.map(lambda x: sdp.fora_calculate('For_Pre', float(x.split('_')[1]), 0.5) if x.split('_')[0] == 'drow' or x.split('_')[0] == 'loss' else 0).sum(), 2)
#     pre_fora_half_in_loss = round(df_predict_loss.bets_desc.map(lambda x: sdp.fora_calculate('For_Pre', float(x.split('_')[1]), 0.5) if x.split('_')[0] == 'drow' or x.split('_')[0] == 'loss' else 0).sum(), 2)

#     perc_win_in_win   = check_two_values_existing(win_prof_in_win, df_predict_win.shape[0])
#     perc_drow_in_drow = check_two_values_existing(drow_prof_in_drow, df_predict_drow.shape[0])
#     perc_loss_in_drow = check_two_values_existing(loss_prof_in_drow, df_predict_drow.shape[0])
#     perc_drow_in_loss = check_two_values_existing(drow_prof_in_loss, df_predict_loss.shape[0])   
#     perc_loss_in_loss = check_two_values_existing(loss_prof_in_loss, df_predict_loss.shape[0])   
    
#     perc_pre_fora_half_in_drow = check_two_values_existing(pre_fora_half_in_drow, df_predict_drow.shape[0])       
#     perc_pre_fora_half_in_loss = check_two_values_existing(pre_fora_half_in_loss, df_predict_loss.shape[0])                  
            
#     profit_calc_dc = {'time': time, 'win_shp': df_predict_win.shape[0], 'drow_shp': df_predict_drow.shape[0], 'loss_shp': df_predict_loss.shape[0],
#                       'win_in_win': win_cnt_in_win, 'drow_in_drow': drow_cnt_in_drow, 'loss_in_drow': loss_cnt_in_drow, 'drow_in_loss': drow_cnt_in_loss, 'loss_in_loss': loss_cnt_in_loss,
#                       'win_prof_in_win': win_prof_in_win, 'drow_prof_in_drow': drow_prof_in_drow, 'loss_prof_in_drow': loss_prof_in_drow, 'drow_prof_in_loss': drow_prof_in_loss, 'loss_prof_in_loss': loss_prof_in_loss,
#                       'pre_fora_half_in_drow': pre_fora_half_in_drow, 'pre_fora_half_in_loss': pre_fora_half_in_loss,
#                       'perc_win_in_win': perc_win_in_win, 'perc_drow_in_drow': perc_drow_in_drow, 'perc_loss_in_drow': perc_loss_in_drow, 'perc_drow_in_loss': perc_drow_in_loss, 'perc_loss_in_loss': perc_loss_in_loss,
#                       'perc_pre_fora_half_in_drow': perc_pre_fora_half_in_drow, 'perc_pre_fora_half_in_loss': perc_pre_fora_half_in_loss
#                       }
    
#     df_profit_calc = pd.DataFrame.from_dict([profit_calc_dc])
#     df_all_profit_calc = pd.concat([df_all_profit_calc, df_profit_calc])
    
    # Adding time games data to leagues-aspects statistic file
    
    df_asps_leagues_time = df_main_asps_cp_time.groupby(['ligue_header', 'country', 'fs_asps_wt_pls']).result.value_counts().to_frame()
    df_asps_leagues_time.rename(columns={'result':'count'}, inplace=True)
    df_asps_leagues_time.reset_index(inplace=True)
    df_asps_leagues_time = df_asps_leagues_time.pivot(index=['ligue_header', 'country', 'fs_asps_wt_pls'], columns='result', values='count').fillna(0).copy()
    df_asps_leagues_time = df_asps_leagues_time.astype(int)
    
    for col in ['fav_win', 'fav_drow', 'fav_loss']:
        if col not in df_asps_leagues_time.columns:
            df_asps_leagues_time[col] = 0
    
    df_asps_leagues_time['count_games_time'] = df_asps_leagues_time.fav_win + df_asps_leagues_time.fav_drow + df_asps_leagues_time.fav_loss
    df_asps_leagues_time.rename(columns={'fav_win':'fav_win_time', 'fav_drow':'fav_drow_time', 'fav_loss':'fav_loss_time'}, inplace=True)
     
    df_asps_leagues_cp = df_asps_leagues_cp.merge(df_asps_leagues_time, how='left', left_index=True, right_index=True).fillna(0)
    # print('df_asps_leagues_cp shape after merge -', df_asps_leagues_cp.shape[0], ' - ', time)
    
    # Output only four columns
    df_asps_leagues_cp = df_asps_leagues_cp[['fav_win', 'fav_drow', 'fav_loss', 'count_games', 'fav_win_time', 'fav_drow_time', 'fav_loss_time', 'count_games_time']].apply(
                                                                                             lambda x: add_cols_names(x[0] + x[4], x[1] + x[5], x[2] + x[6], x[3] + x[7]), axis='columns', result_type='expand').copy()
    
    # Add new multiindex with data
    new_mindex_tp  = [x for x in df_asps_leagues_time.index if x not in df_asps_leagues_cp.index]
    new_multiindex = pd.MultiIndex.from_tuples(new_mindex_tp, names=['ligue_header', 'country', 'fs_asps_wt_pls'])

    df_new_data = df_asps_leagues_time[df_asps_leagues_time.index.isin(new_multiindex)].copy()
    df_new_data.rename(columns={'fav_win_time': 'fav_win', 'fav_drow_time': 'fav_drow', 'fav_loss_time': 'fav_loss', 'count_games_time': 'count_games'}, inplace=True)
    df_asps_leagues_cp = pd.concat([df_asps_leagues_cp, df_new_data])
    ###
    df_asps_leagues_cp['win_att']  = df_asps_leagues_cp.fav_win / df_asps_leagues_cp.count_games
    df_asps_leagues_cp['drow_att'] = df_asps_leagues_cp.fav_drow / df_asps_leagues_cp.count_games
    df_asps_leagues_cp['loss_att'] = df_asps_leagues_cp.fav_loss / df_asps_leagues_cp.count_games
    
    df_league_stat = df_asps_leagues_cp.groupby(['ligue_header', 'country']).sum()
    
    df_league_stat['win_att_lg']  = df_league_stat.fav_win / df_league_stat.count_games
    df_league_stat['drow_att_lg'] = df_league_stat.fav_drow / df_league_stat.count_games
    df_league_stat['loss_att_lg'] = df_league_stat.fav_loss / df_league_stat.count_games

    df_asps_leagues_cp = df_asps_leagues_cp.merge(df_league_stat[['win_att_lg',	'drow_att_lg', 'loss_att_lg']], how='left', left_index=True, right_index=True)
    
    df_asps_leagues_cp['win_att_diff']  = df_asps_leagues_cp.win_att - df_asps_leagues_cp.win_att_lg
    df_asps_leagues_cp['drow_att_diff'] = df_asps_leagues_cp.drow_att - df_asps_leagues_cp.drow_att_lg
    df_asps_leagues_cp['loss_att_diff'] = df_asps_leagues_cp.loss_att - df_asps_leagues_cp.loss_att_lg
    
    df_asps_leagues_cp = df_asps_leagues_cp.merge(df_bets_mean, how='left', left_index=True, right_index=True)
    
    df_asps_leagues_cp['win_prof']  = round(df_asps_leagues_cp.fav_win * df_asps_leagues_cp.win_bets_mean / df_asps_leagues_cp.count_games, 2)
    df_asps_leagues_cp['drow_prof'] = round(df_asps_leagues_cp.fav_drow * df_asps_leagues_cp.drow_bets_mean / df_asps_leagues_cp.count_games, 2)
    df_asps_leagues_cp['loss_prof'] = round(df_asps_leagues_cp.fav_loss * df_asps_leagues_cp.loss_bets_mean / df_asps_leagues_cp.count_games, 2)
    
    df_asps_leagues_cp['win_surplus']  = round(df_asps_leagues_cp.win_att_diff - (df_asps_leagues_cp.drow_att_diff + df_asps_leagues_cp.loss_att_diff), 3)
    df_asps_leagues_cp['drow_surplus'] = round(df_asps_leagues_cp.drow_att_diff - (df_asps_leagues_cp.win_att_diff + df_asps_leagues_cp.loss_att_diff), 3)
    df_asps_leagues_cp['loss_surplus'] = round(df_asps_leagues_cp.loss_att_diff - (df_asps_leagues_cp.win_att_diff + df_asps_leagues_cp.drow_att_diff), 3)
    
    df_asps_leagues_cp.drop(columns=['win_att_lg', 'drow_att_lg', 'loss_att_lg'], inplace=True) 
    ####
df_all_profit_calc.set_index(keys='time', inplace=True)    

In [None]:
#     Success index block 

# all_success_win_inds  = []
# all_success_drow_inds = []
# all_success_loss_inds = []

#     success_win_inds  = list(df_predict_win.apply(lambda x: x.name if x['bets_desc'].split('_')[0] == 'win' else None, axis=1).dropna().values)
#     success_drow_inds = list(df_predict_drow.apply(lambda x: x.name if x['bets_desc'].split('_')[0] == 'drow' else None, axis=1).dropna().values)
#     success_loss_inds = list(df_predict_loss.apply(lambda x: x.name if x['bets_desc'].split('_')[0] == 'loss' else None, axis=1).dropna().values)
    
#     all_success_win_inds  = all_success_win_inds + success_win_inds
#     all_success_drow_inds = all_success_drow_inds + success_drow_inds
#     all_success_loss_inds = all_success_loss_inds + success_loss_inds
#     print('all_success_win_inds -', len(all_success_win_inds), 'all_success_drow_inds -', len(all_success_drow_inds), 'all_success_loss_inds -', len(all_success_win_inds))

In [None]:
print('count_time_ints   -', df_all_profit_calc.shape[0])
print('count_games       -', df_all_profit_calc.win_shp.sum() + df_all_profit_calc.drow_shp.sum() + df_all_profit_calc.loss_shp.sum())
print('games_in_time_int -', ((df_all_profit_calc.drow_shp.sum() + df_all_profit_calc.loss_shp.sum()) / df_all_profit_calc.shape[0]).round(2), ' without win')
print('perc_win_in_win   -', (df_all_profit_calc.win_prof_in_win.sum() / df_all_profit_calc.win_shp.sum()).round(2), '-', df_all_profit_calc.win_shp.sum(), '-', df_all_profit_calc.win_prof_in_win.sum().round(), '-', 'time_ints -', df_all_profit_calc.perc_win_in_win.count())
print('perc_drow_in_drow -', (df_all_profit_calc.drow_prof_in_drow.sum() / df_all_profit_calc.drow_shp.sum()).round(2), '-', df_all_profit_calc.drow_shp.sum(), '-', df_all_profit_calc.drow_prof_in_drow.sum().round(), '-',  'time_ints -', df_all_profit_calc.perc_drow_in_drow.count())
print('perc_loss_in_drow -', (df_all_profit_calc.loss_prof_in_drow.sum() / df_all_profit_calc.drow_shp.sum()).round(2), '-', df_all_profit_calc.drow_shp.sum(), '-', df_all_profit_calc.loss_prof_in_drow.sum().round(), '-', 'time_ints -', df_all_profit_calc.perc_loss_in_drow.count())
print('perc_drow_in_loss -', (df_all_profit_calc.drow_prof_in_loss.sum() / df_all_profit_calc.loss_shp.sum()).round(2), '-', df_all_profit_calc.loss_shp.sum(), '-', df_all_profit_calc.drow_prof_in_loss.sum().round(), '-', 'time_ints -', df_all_profit_calc.perc_drow_in_loss.count())
print('perc_loss_in_loss -', (df_all_profit_calc.loss_prof_in_loss.sum() / df_all_profit_calc.loss_shp.sum()).round(2), '-', df_all_profit_calc.loss_shp.sum(), '-', df_all_profit_calc.loss_prof_in_loss.sum().round(), '-', 'time_ints -', df_all_profit_calc.perc_loss_in_loss.count())
print('')
print('perc_pre_fora_half_in_drow -', (df_all_profit_calc.pre_fora_half_in_drow.sum() / df_all_profit_calc.drow_shp.sum()).round(2),  '-', df_all_profit_calc.drow_shp.sum(), '-', df_all_profit_calc.pre_fora_half_in_drow.sum().round(), '-', 'time_ints -', df_all_profit_calc.perc_pre_fora_half_in_drow.count())
print('perc_pre_fora_half_in_loss -', (df_all_profit_calc.pre_fora_half_in_loss.sum() / df_all_profit_calc.loss_shp.sum()).round(2),  '-', df_all_profit_calc.loss_shp.sum(), '-', df_all_profit_calc.pre_fora_half_in_loss.sum().round(), '-', 'time_ints -', df_all_profit_calc.perc_pre_fora_half_in_loss.count())

In [None]:
# df_all_profit_calc.head(1000)

In [None]:
# cols = ['perc_loss_in_drow', 'perc_loss_in_loss']# 'perc_drow_in_drow',  'perc_drow_in_loss',
# df_reserch_cols = df_all_profit_calc[cols].fillna(0)
# df_reserch_cols['sum_cols_prof'] = df_reserch_cols[cols].sum(axis=1) - len(cols)
# print('sum_cols_prof -', sum(df_all_profit_calc[cols].sum(axis=1) - len(cols)))
# df_reserch_cols = df_reserch_cols.merge(df_all_profit_calc[['drow_shp', 'loss_shp', 'loss_prof_in_drow', 'loss_prof_in_loss', 'loss_in_drow', 'loss_in_loss']], how='left', left_index=True, right_index=True)
# df_reserch_cols

In [None]:
print('count_all_bets -', df_all_profit_calc[['drow_shp', 'loss_shp']].sum().sum().round())
print('count_access_bets -', df_all_profit_calc[['loss_in_drow', 'loss_in_loss']].sum().sum())
print('sum_profit_bets -', df_all_profit_calc[['loss_prof_in_drow', 'loss_prof_in_loss']].sum().sum())

In [None]:
df_all_profit_calc['sum_profits'] = round(df_all_profit_calc.loss_prof_in_drow + df_all_profit_calc.loss_prof_in_loss, 2)

In [None]:
df_all_profit_calc.shape[0]

In [None]:
df_all_profit_calc[df_all_profit_calc.sum_profits > 0].sum_profits.count()

In [None]:
df_all_profit_calc[df_all_profit_calc.sum_profits == 0].sum_profits.count()

In [None]:
df_all_profit_calc[df_all_profit_calc.sum_profits < 0].sum_profits.count()

In [None]:
df_all_profit_calc.head()

In [None]:
# 04.2021 - 04.2022 - best results
df_all_profit_calc.reset_index().loc[159:].sum_profits.sum()

In [None]:
# 04.2021 - 04.2022 - best results
df_all_profit_calc[['drow_shp', 'loss_shp', 'loss_in_drow', 'loss_in_loss', 'loss_prof_in_drow', 'loss_prof_in_loss', 'sum_profits']]

In [None]:
# Try use only unique time or only not unique time + #############################################################################################################################
# Calculate mean difference between att_diffs in exactly predict games +
# Time step longs equal one day +
# Find more predictable ligues or period or aspects

In [None]:
cols_pred   = ['game_id', 'game_utc', 'goals', 'f_team', 's_team', 'city', 'bets_desc']
cols_league = ['win_att_diff', 'drow_att_diff', 'loss_att_diff', 'fav_win', 'fav_drow', 'fav_loss', 'count_games',]

df_drow = df_predict_drow[cols_pred].merge(df_asps_leagues_cp[cols_league], how='left', left_index=True, right_index=True)
df_drow[['win_att_diff', 'drow_att_diff', 'loss_att_diff']] = df_drow[['win_att_diff', 'drow_att_diff', 'loss_att_diff']].round(2)
# df_drow

In [None]:
# df_asps_leagues_cp[df_asps_leagues_cp.index.isin(drow_top_inds)]

In [None]:
Stop *****************************************************************************************************

In [None]:
with open('pickle_files/aspected_files/df_aspect_stat_03_04_2024', 'rb') as f:
    df_aspect_stat = pickle.load(f)  

In [None]:
print('shape -', df_aspect_stat.shape[0])
df_aspect_stat.head()

In [None]:
df_aspect_stat[df_aspect_stat.count_asps == 4].sort_values(by='mean_diff_loss_home', ascending=False).head()

In [None]:
# df_aspect_stat.fs_asps_wt_pls_lt = df_aspect_stat.fs_asps_wt_pls_lt.map(literal_eval)
df_aspect_stat.fs_asps_wt_pls_lt = df_aspect_stat.fs_asps_wt_pls_lt.map(lambda x: sorted(x))

In [None]:
# df_aspect_stat[df_aspect_stat.count_home > 5].sort_values(by='mean_diff_win_home', ascending=False).tail(1)

In [None]:
print('shape -', df_main_asps.shape[0])
df_main_asps.head(3)

In [None]:
df_main_asps_gb = df_main_asps.groupby('game_id').fs_asps_wt_pls.agg(list)
print('shape -', df_main_asps_gb.shape[0])
df_main_asps_gb.head(2)

In [None]:
cols_merge = ['game_id'] + df_main_asps.columns[-6:].to_list() + ['game_utc', '1', 'X', '2']
df_main_asps_uniq = df_main_asps.drop_duplicates(subset='game_id')
df_main_asps_gb = pd.merge(df_main_asps_gb, df_main_asps_uniq[cols_merge], how='left', left_index=True, right_on='game_id').set_index(keys='game_id')

In [None]:
df_main_asps_gb.head(2)

In [None]:
df_main_asps_gb.fs_asps_wt_pls = df_main_asps_gb.fs_asps_wt_pls.map(lambda x: sorted(x))

In [None]:
matched_asps_lt = list()
_ = df_main_asps_gb.fs_asps_wt_pls.map(lambda x: matched_asps_lt.append(x) if x in df_aspect_stat.fs_asps_wt_pls_lt.to_list() else None)

In [None]:
print('len -', len(matched_asps_lt))
matched_asps_lt[:3]

In [None]:
df_aspect_stat[df_aspect_stat.fs_asps_wt_pls_lt.isin(matched_asps_lt)].head()

In [None]:
# Stop ***********************************

In [None]:
df_aspect_stat.tail(2)

In [None]:
# Devide aspect list to counts
cols = ['mean_diff_win_home', 'mean_diff_drow_home', 'mean_diff_loss_home', 'mean_diff_win_away', 'mean_diff_drow_away', 'mean_diff_loss_away']

for col in cols:
    two_cols = ['count_asps', col]
    df_aspect_stat[col] = df_aspect_stat[two_cols].apply(lambda x: x[1]/x[0], axis=1)

In [None]:
df_aspect_stat.tail(2)

In [None]:
unique_asps_lt = list()

_= df_aspect_stat.fs_asps_wt_pls_lt.map(lambda x: [unique_asps_lt.append(asp) for asp in x])

In [None]:
print('len -', len(unique_asps_lt))
unique_asps_lt = list(set(unique_asps_lt))
print('len -', len(unique_asps_lt))
unique_asps_lt[:3]

In [None]:
full_unique_asps_lt = list()

cols =  ['count_home', 'home_res', 'mean_diff_win_home', 'mean_diff_drow_home', 'mean_diff_loss_home', 'count_away', 'away_res', 'mean_diff_win_away', 'mean_diff_drow_away', 'mean_diff_loss_away']

for asp in unique_asps_lt: 
    all_unique_asps_dc = {}
    
    for col in cols:
        cols_pair = ['fs_asps_wt_pls_lt', col]
        
        if df_aspect_stat[col].dtype == 'int64':
            unique_asps_dc = {'aspect': asp, col: df_aspect_stat[cols_pair].apply(lambda x: x[1] if asp in x[0] else None, axis=1).sum().astype(int)}      
        
        if df_aspect_stat[col].dtype == 'float64':
            unique_asps_dc = {'aspect': asp, col: round(df_aspect_stat[cols_pair].apply(lambda x: x[1] if asp in x[0] else None, axis=1).sum(), 3)}  
        
        elif df_aspect_stat[col].dtype == 'O':
            count_lt = [df_aspect_stat[cols_pair].apply(lambda x: x[1][0] if asp in x[0] else None, axis=1).sum().astype(int), 
                        df_aspect_stat[cols_pair].apply(lambda x: x[1][1] if asp in x[0] else None, axis=1).sum().astype(int), 
                        df_aspect_stat[cols_pair].apply(lambda x: x[1][2] if asp in x[0] else None, axis=1).sum().astype(int)]
            
            unique_asps_dc = {'aspect': asp, col: count_lt}
    
        all_unique_asps_dc.update(unique_asps_dc)   
    full_unique_asps_lt.append(all_unique_asps_dc)

In [None]:
df_full_unique_asps = pd.DataFrame.from_dict(full_unique_asps_lt)
print('shape -', df_full_unique_asps.shape[0])
df_full_unique_asps.tail(3)

In [None]:
df_all_games_asps_rate = pd.DataFrame()

for ind in range(0, df_main_asps_gb.shape[0]):
    
    df_game_asps = pd.DataFrame()

    for asp in  df_main_asps_gb.fs_asps_wt_pls[ind]:
        # if asp in df_aspect_stat.fs_asps_wt_pls_lt.values.tolist():
        if asp in df_full_unique_asps.aspect.tolist():

            if df_main_asps_gb.iloc[ind].host_role == 'Fav':
                cols_stat = ['mean_diff_win_home', 'mean_diff_drow_home', 'mean_diff_loss_home']
            else:
                cols_stat = ['mean_diff_win_away', 'mean_diff_drow_away', 'mean_diff_loss_away']

            # df_one_asp = pd.DataFrame(df_aspect_stat[df_aspect_stat.fs_asps_wt_pls_lt == asp][cols_stat])
            df_one_asp = pd.DataFrame(df_full_unique_asps[df_full_unique_asps.aspect == asp][cols_stat])
            df_one_asp['aspect'] = asp

            df_game_asps = pd.concat([df_game_asps, df_one_asp])

        else:
            continue

        df_game_asps_rate = df_game_asps[cols_stat].apply(sum).to_frame().T

        df_game_asps_rate['aspects']  = str(df_game_asps.aspect.to_list())
        df_game_asps_rate['game_id']  = df_main_asps_gb.iloc[ind].name

        df_game_asps_rate.set_index(keys='game_id', inplace=True)
    
    df_all_games_asps_rate = pd.concat([df_all_games_asps_rate, df_game_asps_rate])    

In [None]:
df_main_asps_gb.head(1)

In [None]:
df_main_asps_gb['fav_advantage'] = df_main_asps_gb[['1', '2']].apply(lambda x: x[0]/x[1] if x[0] > x[1] else x[1]/x[0], axis=1).round(2)

In [None]:
cols = ['game_utc', 'host_role', 'result', 'fav_goals',	'1', 'X', '2', 'fav_advantage', 'win_bets',	'drow_bets', 'loss_bets']

df_rate = df_all_games_asps_rate.merge(df_main_asps_gb[cols], how='left', left_index=True, right_index=True)
# df_rate.head(3)

In [None]:
# advantage = 2

df_rate_home = df_rate[~df_rate.mean_diff_win_home.isna()].copy()
df_rate_home.drop(columns=['mean_diff_win_away', 'mean_diff_drow_away', 'mean_diff_loss_away'], inplace=True)
print('shape home -', df_rate_home.shape[0])
# df_rate_home = df_rate_home[df_rate_home.fav_advantage < advantage].copy()
# print('shape home -', df_rate_home.shape[0])

In [None]:
df_rate_away = df_rate[~df_rate.mean_diff_win_away.isna()].copy()
df_rate_away.drop(columns=['mean_diff_win_home', 'mean_diff_drow_home', 'mean_diff_loss_home'], inplace=True)
print('shape away -', df_rate_away.shape[0])
# df_rate_away = df_rate_away[df_rate_away.fav_advantage < advantage].copy()
# print('shape away -', df_rate_away.shape[0])

In [None]:
# cnt_asps = 5

# # df_rate_home = df_rate[~df_rate.mean_diff_win_home.isna()].copy()
# # df_rate_home.drop(columns=['mean_diff_win_away', 'mean_diff_drow_away', 'mean_diff_loss_away'], inplace=True)
# print('shape home -', df_rate_home.shape[0])
# df_rate_home = df_rate_home[df_rate_home.aspects.map(lambda x: len(literal_eval(x))) <= cnt_asps].copy()
# print('shape home -', df_rate_home.shape[0])

In [None]:
# df_rate_away = df_rate[~df_rate.mean_diff_win_away.isna()].copy()
# df_rate_away.drop(columns=['mean_diff_win_home', 'mean_diff_drow_home', 'mean_diff_loss_home'], inplace=True)
# print('shape away -', df_rate_away.shape[0])
# df_rate_away = df_rate_away[df_rate_away.aspects.map(lambda x: len(literal_eval(x))) <= cnt_asps].copy()
# print('shape away -', df_rate_away.shape[0])

In [None]:
# df_rate_away

In [None]:
mult_val = 1.5

def stat_result_twice_win_more_drow_plus_loss(fav_win: int,  fav_drow: int, fav_loss: int):
    if int(fav_win) * mult_val  > (int(fav_drow) + int(fav_loss)):    
        return 'fav_win'
    elif int(fav_drow) > int(fav_loss):
        return 'fav_drow'
    else:
        return 'fav_loss'

def stat_result_twice_win_more_drow_or_loss(fav_win: int,  fav_drow: int, fav_loss: int):
    if int(fav_win) * mult_val > int(fav_drow) and int(fav_win) * mult_val > int(fav_loss):
        return 'fav_win'
    elif int(fav_drow) > int(fav_loss):
        return 'fav_drow'
    else:
        return 'fav_loss'

def stat_result_twice_win_more_drow_or_loss_without_drow(fav_win: int, fav_drow: int, fav_loss: int):
    if int(fav_win) * mult_val > int(fav_drow) and int(fav_win) * mult_val > int(fav_loss):
        return 'fav_win'
    else:
        return 'fav_loss'    


def stat_result_twice_win_more_drow_plus_loss_without_drow(fav_win: int, fav_drow: int, fav_loss: int):
    if int(fav_win) * mult_val > int(fav_drow) + int(fav_loss):
        return 'fav_win'
    else:
        return 'fav_loss'    

def stat_result_win_more_drow_or_loss_without_loss(fav_win: int, fav_drow: int, fav_loss: int):
    if int(fav_win) > int(fav_drow) and int(fav_win) > int(fav_loss):
        return 'fav_win'
    else:
        return 'fav_drow'          
    
def stat_result_twice_win_more_drow_or_loss_without_loss(fav_win: int, fav_drow: int, fav_loss: int):
    if int(fav_win) * mult_val > int(fav_drow) and int(fav_win) * mult_val > int(fav_loss):
        return 'fav_win'
    else:
        return 'fav_drow'      
       
def stat_result_only_drow_or_win_and_drow_more_twice_win_or_loss(fav_win: int, fav_drow: int, fav_loss: int):
    if int(fav_drow) > int(fav_win) * mult_val and int(fav_drow) > int(fav_loss):
        return 'fav_drow'
    else:
        return 'fav_win_loss'

def stat_result_only_drow_or_win_and_drow_more_twice_win_plus_loss(fav_win: int, fav_drow: int, fav_loss: int):
    if int(fav_drow) > int(fav_win) * mult_val + int(fav_loss):
        return 'fav_drow'
    else:
        return 'fav_win_loss'    

In [None]:
# Stat each aspect rate 

df_rate_home['home_w_more_d_plus_l_res'] = df_rate_home[['mean_diff_win_home', 'mean_diff_drow_home', 'mean_diff_loss_home']].apply(lambda x: sdp.stat_result_win_more_drow_plus_loss(x[0], x[1], x[2]), axis=1)
df_rate_away['away_w_more_d_plus_l_res'] = df_rate_away[['mean_diff_win_away', 'mean_diff_drow_away', 'mean_diff_loss_away']].apply(lambda x: sdp.stat_result_win_more_drow_plus_loss(x[0], x[1], x[2]), axis=1)

df_rate_home['home_w_more_d_or_l_res'] = df_rate_home[['mean_diff_win_home', 'mean_diff_drow_home', 'mean_diff_loss_home']].apply(lambda x: sdp.stat_result_win_more_drow_or_loss(x[0], x[1], x[2]), axis=1)
df_rate_away['away_w_more_d_or_l_res'] = df_rate_away[['mean_diff_win_away', 'mean_diff_drow_away', 'mean_diff_loss_away']].apply(lambda x: sdp.stat_result_win_more_drow_or_loss(x[0], x[1], x[2]), axis=1)

df_rate_home['home_w_more_d_or_l_wt_drow_res'] = df_rate_home[['mean_diff_win_home', 'mean_diff_drow_home', 'mean_diff_loss_home']].apply(lambda x: sdp.stat_result_win_more_drow_or_loss_without_drow(x[0], x[1], x[2]), axis=1)
df_rate_away['away_w_more_d_or_l_wt_drow_res'] = df_rate_away[['mean_diff_win_away', 'mean_diff_drow_away', 'mean_diff_loss_away']].apply(lambda x: sdp.stat_result_win_more_drow_or_loss_without_drow(x[0], x[1], x[2]), axis=1)

df_rate_home['home_w_more_d_plus_l_wt_drow_res'] = df_rate_home[['mean_diff_win_home', 'mean_diff_drow_home', 'mean_diff_loss_home']].apply(lambda x: sdp.stat_result_win_more_drow_plus_loss_without_drow(x[0], x[1], x[2]), axis=1)
df_rate_away['away_w_more_d_plus_l_wt_drow_res'] = df_rate_away[['mean_diff_win_away', 'mean_diff_drow_away', 'mean_diff_loss_away']].apply(lambda x: sdp.stat_result_win_more_drow_plus_loss_without_drow(x[0], x[1], x[2]), axis=1)

df_rate_home['home_w_more_d_or_l_wt_loss_res'] = df_rate_home[['mean_diff_win_home', 'mean_diff_drow_home', 'mean_diff_loss_home']].apply(lambda x: stat_result_win_more_drow_or_loss_without_loss(x[0], x[1], x[2]), axis=1)
df_rate_away['away_w_more_d_or_l_wt_loss_res'] = df_rate_away[['mean_diff_win_away', 'mean_diff_drow_away', 'mean_diff_loss_away']].apply(lambda x: stat_result_win_more_drow_or_loss_without_loss(x[0], x[1], x[2]), axis=1)

df_rate_home['home_only_d_or_w_and_d_more_w_or_l_res'] = df_rate_home[['mean_diff_win_home', 'mean_diff_drow_home', 'mean_diff_loss_home']].apply(lambda x: sdp.stat_result_only_drow_or_win_and_drow_more_win_or_loss(x[0], x[1], x[2]), axis=1)
df_rate_away['away_only_d_or_w_and_d_more_w_or_l_res'] = df_rate_away[['mean_diff_win_away', 'mean_diff_drow_away', 'mean_diff_loss_away']].apply(lambda x: sdp.stat_result_only_drow_or_win_and_drow_more_win_or_loss(x[0], x[1], x[2]), axis=1)

df_rate_home['home_only_d_or_w_and_d_more_w_plus_l_res'] = df_rate_home[['mean_diff_win_home', 'mean_diff_drow_home', 'mean_diff_loss_home']].apply(lambda x: sdp.stat_result_only_drow_or_win_and_drow_more_win_plus_loss(x[0], x[1], x[2]), axis=1)
df_rate_away['away_only_d_or_w_and_d_more_w_plus_l_res'] = df_rate_away[['mean_diff_win_away', 'mean_diff_drow_away', 'mean_diff_loss_away']].apply(lambda x: sdp.stat_result_only_drow_or_win_and_drow_more_win_plus_loss(x[0], x[1], x[2]), axis=1)

res_cols_lt_fst = ['home_w_more_d_plus_l_res', 'away_w_more_d_plus_l_res', 'home_w_more_d_or_l_res', 'away_w_more_d_or_l_res', 'home_w_more_d_or_l_wt_drow_res',
                   'away_w_more_d_or_l_wt_drow_res', 'home_w_more_d_plus_l_wt_drow_res', 'away_w_more_d_plus_l_wt_drow_res', 'home_w_more_d_or_l_wt_loss_res', 'away_w_more_d_or_l_wt_loss_res']

df_rate_home['home_tw_more_d_plus_l_res'] = df_rate_home[['mean_diff_win_home', 'mean_diff_drow_home', 'mean_diff_loss_home']].apply(lambda x: stat_result_twice_win_more_drow_plus_loss(x[0], x[1], x[2]), axis=1)
df_rate_away['away_tw_more_d_plus_l_res'] = df_rate_away[['mean_diff_win_away', 'mean_diff_drow_away', 'mean_diff_loss_away']].apply(lambda x: stat_result_twice_win_more_drow_plus_loss(x[0], x[1], x[2]), axis=1)

df_rate_home['home_tw_more_d_or_l_res'] = df_rate_home[['mean_diff_win_home', 'mean_diff_drow_home', 'mean_diff_loss_home']].apply(lambda x: stat_result_twice_win_more_drow_or_loss(x[0], x[1], x[2]), axis=1)
df_rate_away['away_tw_more_d_or_l_res'] = df_rate_away[['mean_diff_win_away', 'mean_diff_drow_away', 'mean_diff_loss_away']].apply(lambda x: stat_result_twice_win_more_drow_or_loss(x[0], x[1], x[2]), axis=1)

df_rate_home['home_tw_more_d_or_l_wt_drow_res'] = df_rate_home[['mean_diff_win_home', 'mean_diff_drow_home', 'mean_diff_loss_home']].apply(lambda x: stat_result_twice_win_more_drow_or_loss_without_drow(x[0], x[1], x[2]), axis=1)
df_rate_away['away_tw_more_d_or_l_wt_drow_res'] = df_rate_away[['mean_diff_win_away', 'mean_diff_drow_away', 'mean_diff_loss_away']].apply(lambda x: stat_result_twice_win_more_drow_or_loss_without_drow(x[0], x[1], x[2]), axis=1)

df_rate_home['home_tw_more_d_plus_l_wt_drow_res'] = df_rate_home[['mean_diff_win_home', 'mean_diff_drow_home', 'mean_diff_loss_home']].apply(lambda x: stat_result_twice_win_more_drow_plus_loss_without_drow(x[0], x[1], x[2]), axis=1)
df_rate_away['away_tw_more_d_plus_l_wt_drow_res'] = df_rate_away[['mean_diff_win_away', 'mean_diff_drow_away', 'mean_diff_loss_away']].apply(lambda x: stat_result_twice_win_more_drow_plus_loss_without_drow(x[0], x[1], x[2]), axis=1)

df_rate_home['home_tw_more_d_or_l_wt_loss_res'] = df_rate_home[['mean_diff_win_home', 'mean_diff_drow_home', 'mean_diff_loss_home']].apply(lambda x: stat_result_twice_win_more_drow_or_loss_without_loss(x[0], x[1], x[2]), axis=1)
df_rate_away['away_tw_more_d_or_l_wt_loss_res'] = df_rate_away[['mean_diff_win_away', 'mean_diff_drow_away', 'mean_diff_loss_away']].apply(lambda x: stat_result_twice_win_more_drow_or_loss_without_loss(x[0], x[1], x[2]), axis=1)

df_rate_home['home_only_d_or_w_and_d_more_tw_or_l_res'] = df_rate_home[['mean_diff_win_home', 'mean_diff_drow_home', 'mean_diff_loss_home']].apply(lambda x: stat_result_only_drow_or_win_and_drow_more_twice_win_or_loss(x[0], x[1], x[2]), axis=1)
df_rate_away['away_only_d_or_w_and_d_more_tw_or_l_res'] = df_rate_away[['mean_diff_win_away', 'mean_diff_drow_away', 'mean_diff_loss_away']].apply(lambda x: stat_result_only_drow_or_win_and_drow_more_twice_win_or_loss(x[0], x[1], x[2]), axis=1)

df_rate_home['home_only_d_or_w_and_d_more_tw_plus_l_res'] = df_rate_home[['mean_diff_win_home', 'mean_diff_drow_home', 'mean_diff_loss_home']].apply(lambda x: stat_result_only_drow_or_win_and_drow_more_twice_win_plus_loss(x[0], x[1], x[2]), axis=1)
df_rate_away['away_only_d_or_w_and_d_more_tw_plus_l_res'] = df_rate_away[['mean_diff_win_away', 'mean_diff_drow_away', 'mean_diff_loss_away']].apply(lambda x: stat_result_only_drow_or_win_and_drow_more_twice_win_plus_loss(x[0], x[1], x[2]), axis=1)

res_cols_lt = ['home_tw_more_d_plus_l_res', 'away_tw_more_d_plus_l_res', 'home_tw_more_d_or_l_res', 'away_tw_more_d_or_l_res','home_tw_more_d_or_l_wt_drow_res', 'away_tw_more_d_or_l_wt_drow_res', 
               'home_tw_more_d_plus_l_wt_drow_res', 'away_tw_more_d_plus_l_wt_drow_res', 'home_tw_more_d_or_l_wt_loss_res', 'away_tw_more_d_or_l_wt_loss_res'] + res_cols_lt_fst

for fora in [-1.5, -1.25, -1, -0.75, None, -0.25, 0, 0.25, 0.5, 0.75, 1, 1.25, 1.5]:
    for res_col in res_cols_lt:
        col_name_bet = re.sub('res', 'bet', str(res_col))
        col_name = col_name_bet + '_' + str(fora)
        if res_col in df_rate_home.columns:
            df_rate_home[col_name] = df_rate_home[['host_role', 'result', res_col, '1', 'X', '2', 'fav_goals']].apply(lambda x: sdp.bets_fora_result(x[0], x[1], x[2], x[3], x[4], x[5], x[6], fora), axis =1)
        elif res_col in df_rate_away.columns:
            df_rate_away[col_name] = df_rate_away[['host_role', 'result', res_col, '1', 'X', '2', 'fav_goals']].apply(lambda x: sdp.bets_fora_result(x[0], x[1], x[2], x[3], x[4], x[5], x[6], fora), axis =1)    

df_rate_home['home_only_d_or_w_and_d_more_w_or_l_bets']   = df_rate_home[['result', 'home_only_d_or_w_and_d_more_w_or_l_res', 'X']].apply(lambda x: sdp.only_win_or_drow_bets_result(x[0], x[1], x[2]), axis =1)
df_rate_away['away_only_d_or_w_and_d_more_w_or_l_bets']   = df_rate_away[['result', 'away_only_d_or_w_and_d_more_w_or_l_res', 'X']].apply(lambda x: sdp.only_win_or_drow_bets_result(x[0], x[1], x[2]), axis =1)
df_rate_home['home_only_d_or_w_and_d_more_w_plus_l_bets'] = df_rate_home[['result', 'home_only_d_or_w_and_d_more_w_plus_l_res', 'X']].apply(lambda x: sdp.only_win_or_drow_bets_result(x[0], x[1], x[2]), axis =1)
df_rate_away['away_only_d_or_w_and_d_more_w_plus_l_bets'] = df_rate_away[['result', 'away_only_d_or_w_and_d_more_w_plus_l_res', 'X']].apply(lambda x: sdp.only_win_or_drow_bets_result(x[0], x[1], x[2]), axis =1)

df_rate_home['home_only_d_or_w_and_d_more_tw_or_l_bets']   = df_rate_home[['result', 'home_only_d_or_w_and_d_more_tw_or_l_res', 'X']].apply(lambda x: sdp.only_win_or_drow_bets_result(x[0], x[1], x[2]), axis =1)
df_rate_away['away_only_d_or_w_and_d_more_tw_or_l_bets']   = df_rate_away[['result', 'away_only_d_or_w_and_d_more_tw_or_l_res', 'X']].apply(lambda x: sdp.only_win_or_drow_bets_result(x[0], x[1], x[2]), axis =1)
df_rate_home['home_only_d_or_w_and_d_more_tw_plus_l_bets'] = df_rate_home[['result', 'home_only_d_or_w_and_d_more_tw_plus_l_res', 'X']].apply(lambda x: sdp.only_win_or_drow_bets_result(x[0], x[1], x[2]), axis =1)
df_rate_away['away_only_d_or_w_and_d_more_tw_plus_l_bets'] = df_rate_away[['result', 'away_only_d_or_w_and_d_more_tw_plus_l_res', 'X']].apply(lambda x: sdp.only_win_or_drow_bets_result(x[0], x[1], x[2]), axis =1)

In [None]:
print('len columns -', len(df_rate_away.columns))

In [None]:
bet_cols_home          = [x for x in df_rate_home.columns if 'bet' in x]
df_rate_home[bet_cols_home] = df_rate_home[bet_cols_home].apply(lambda x: pd.to_numeric(x, errors='coerce'), axis=1)

bet_cols_away          = [x for x in df_rate_away.columns if 'bet' in x]
df_rate_away[bet_cols_away] = df_rate_away[bet_cols_away].apply(lambda x: pd.to_numeric(x, errors='coerce'), axis=1)

In [None]:
dict_results = {}

bets_cols = bet_cols_home + bet_cols_away

for bet_col in bets_cols:
    if 'home' in str(bet_col):
        bet_result = round(df_rate_home[df_rate_home.host_role == 'Fav'][bet_col].mean(), 3)
        dict_results.update({bet_col: bet_result})  
    elif 'away' in str(bet_col):
        bet_result = round(df_rate_away[df_rate_away.host_role == 'Pre'][bet_col].mean(), 3)
        dict_results.update({bet_col: bet_result})    
    else:
        None

In [None]:
df_result = pd.DataFrame.from_dict(dict_results.items())
df_result.columns=['stat_name', 'bets_result']

In [None]:
df_result.sort_values(by='bets_result', ascending=False).head()

In [None]:
df_result.sort_values(by='bets_result', ascending=False).tail()

In [None]:
# shape away - 20834, away_w_more_d_or_l_wt_drow_bet_-0.25 - 1.155

# shape home - 21950 - home_w_more_d_or_l_wt_drow_bet_1 - advantage < 2, mult_val =	1.5 - 1.130 
# shape away - 62 - df_rate_home.fav_advantage < 1.3, mult_val = 1.5 - away_tw_more_d_or_l_wt_drow_bet_-1 - 1.340
# all games - away_tw_more_d_or_l_wt_drow_bet_1, mult_val = 1.5 - 1.093

In [None]:
Stop ********************************************************

In [None]:
# Choose home or away research aspekts and statistic columns   1 - #########################################################################################################

df_rate = df_rate_home.copy() # Home
# df_rate = df_rate_away.copy() # Away

In [None]:
df_rate.head(1)

In [None]:
research_cols = ['game_utc', 'aspects', 'result',  '1', 'X', '2', 'fav_goals', 'win_bets', 'drow_bets', 'loss_bets']

# Home   2 - #####################################################################################################################################################################
res_cols_home = ['mean_diff_win_home', 'mean_diff_drow_home', 'mean_diff_loss_home', 'home_w_more_d_or_l_wt_drow_res', 'home_w_more_d_or_l_wt_drow_bet_None']
research_cols = research_cols + res_cols_home

# Away
# res_cols_away = [ 'mean_diff_win_away', 'mean_diff_drow_away', 'mean_diff_loss_away', 'away_w_more_d_or_l_wt_drow_res', 'away_w_more_d_or_l_wt_drow_bet_None']
# research_cols = research_cols + res_cols_away

# df_rate[research_cols].head(2)

In [None]:
df_rate.shape[0]

In [None]:
# 3 - ###################################################################################################################################
col_res = 'home_w_more_d_or_l_wt_drow_res' # 'home_w_more_d_or_l_wt_drow_res' , 'away_w_more_d_or_l_wt_drow_res'

In [None]:
error_asps_lt = []

_= df_rate[df_rate.result != df_rate[col_res]].aspects.map(lambda x: [error_asps_lt.append(var) for var in literal_eval(x)])
df_error_asps = pd.DataFrame.from_dict(Counter(error_asps_lt), orient='index').reset_index()
df_error_asps.columns = ['aspects', 'count_error']
df_error_asps.sort_values(by='count_error', inplace=True, ascending=False)
df_error_asps.reset_index(drop=True, inplace=True)
df_error_asps.reset_index(drop=False, inplace=True)
df_error_asps.rename(columns={'index':'rate_asp_error'}, inplace=True)
print('shape -', df_error_asps.shape[0])
df_error_asps.head(3)

In [None]:
right_asps_lt = []

_= df_rate[df_rate.result == df_rate[col_res]].aspects.map(lambda x: [right_asps_lt.append(var) for var in literal_eval(x)])
df_right_asps = pd.DataFrame.from_dict(Counter(right_asps_lt), orient='index').reset_index()
df_right_asps.columns = ['aspects', 'count_right']
df_right_asps.sort_values(by='count_right', inplace=True, ascending=False)
df_right_asps.reset_index(drop=True, inplace=True)
df_right_asps.reset_index(drop=False, inplace=True)
df_right_asps.rename(columns={'index':'rate_asp_right'}, inplace=True)
print('shape -', df_error_asps.shape[0])
df_right_asps.head(3)

In [None]:
df_count_asps = df_error_asps.merge(df_right_asps, how='left', left_on='aspects', right_on='aspects')
df_count_asps.set_index(keys='aspects', inplace=True)

In [None]:
df_count_asps = df_count_asps.fillna(0)

In [None]:
df_count_asps = df_count_asps.astype({col: int for col in df_count_asps.columns[1:]})

In [None]:
df_count_asps['diff_rates'] = df_count_asps.rate_asp_error - df_count_asps.rate_asp_right

In [None]:
df_count_asps.sort_values(by='rate_asp_right', ascending=True).head(2)

In [None]:
df_error_asps.sort_values(by='rate_asp_error', ascending=True).head(2)

In [None]:
df_right_asps.sort_values(by='count_right', ascending=False).head(2)

In [None]:
len(df_right_asps)

In [None]:
asp = 'PF_o_Ch' # 97 diff_rate - 141 	236 	44 	140 	
game_ids_asp_lt = []

_= df_rate[['aspects', 'result']].apply(lambda x: game_ids_asp_lt.append(x.name) if asp in x[0] else None, axis=1)

df_game_ids_asp = df_rate[df_rate.index.isin(game_ids_asp_lt)][research_cols].copy()

In [None]:
df_game_ids_asp.result.value_counts()

In [None]:
df_game_ids_asp.head(2)

In [None]:
df_rate.shape[0]

In [None]:
research_cols[12]

In [None]:
df_rate[research_cols].sort_values(by=research_cols[9], ascending=False).head(200).result.value_counts()

In [None]:
df_rate[research_cols].sort_values(by=research_cols[9], ascending=False).head(200).win_bets.sum()

In [None]:
df_rate[research_cols].sort_values(by=research_cols[10], ascending=False).head(4000).result.value_counts()

In [None]:
df_rate[research_cols].sort_values(by=research_cols[10], ascending=False).head(10000).drow_bets.sum()

In [None]:
df_rate[research_cols].sort_values(by=research_cols[11], ascending=False).head(5000).result.value_counts()

In [None]:
df_rate[research_cols].sort_values(by=research_cols[11], ascending=False).head(5000).loss_bets.sum() 

In [None]:
df_rate.shape[0]

In [None]:
df_rate.loss_bets.sum()

In [None]:
df_rate.loss_bets.sum() / df_rate.shape[0] 

In [None]:
len(df_main_asps.game_utc.map(lambda x: str(x)[:10]).unique())

In [None]:
df_rate['month_year'] = df_rate.game_utc.map(lambda x: str(x)[:7])

In [None]:
research_cols.remove('game_utc')
research_cols.append('month_year')

In [None]:
# col_bet_add = 'home_w_more_d_plus_l_wt_drow_res'  #'away_w_more_d_or_l_res' 
# research_cols.append(col_bet_add)

In [None]:
df_rate_reserch = df_rate[research_cols].copy()

In [None]:
df_rate_reserch.head(1) #[(df_rate_away_reserch.result == df_rate_away_reserch.away_w_more_d_or_l_wt_drow_res)].head(2)

In [None]:
# research_cols.remove('home_w_more_d_plus_l_wt_drow_res')
# research_cols.remove('home_w_more_d_or_l_wt_drow_bet_None')

In [None]:
print('win mean  -', df_rate_reserch[research_cols[9]].mean())
print('drow mean -', df_rate_reserch[research_cols[10]].mean())
print('loss mean -', df_rate_reserch[research_cols[11]].mean())

In [None]:
print('win-win mean  -', df_rate_reserch[df_rate_reserch.result == 'fav_win'][research_cols[9]].mean())
print('win-drow mean -', df_rate_reserch[df_rate_reserch.result == 'fav_win'][research_cols[10]].mean())
print('win-loss mean -', df_rate_reserch[df_rate_reserch.result == 'fav_win'][research_cols[11]].mean())

In [None]:
cond_win = (df_rate_reserch[research_cols[9]] > 6.82) & (df_rate_reserch[research_cols[10]] < 14.87) & (df_rate_reserch[research_cols[11]] < 12.08) 
print(df_rate_reserch[cond_win].shape[0])
print(df_rate_reserch[cond_win].win_bets.sum())

In [None]:
print('drow-win mean  -', df_rate_reserch[df_rate_reserch.result == 'fav_drow'][research_cols[9]].mean())
print('drow-drow mean -', df_rate_reserch[df_rate_reserch.result == 'fav_drow'][research_cols[10]].mean())
print('drow-loss mean -', df_rate_reserch[df_rate_reserch.result == 'fav_drow'][research_cols[11]].mean())

In [None]:
cond_drow = (df_rate_reserch[research_cols[9]] < 6.21) & (df_rate_reserch[research_cols[10]] > 16.36) & (df_rate_reserch[research_cols[11]] < 11.93) 
print(df_rate_reserch[cond_drow].shape[0])
print(df_rate_reserch[cond_drow].drow_bets.sum())

In [None]:
print('loss-win mean  -', df_rate_reserch[df_rate_reserch.result == 'fav_loss'][research_cols[9]].mean())
print('loss-drow mean -', df_rate_reserch[df_rate_reserch.result == 'fav_loss'][research_cols[10]].mean())
print('loss-loss mean -', df_rate_reserch[df_rate_reserch.result == 'fav_loss'][research_cols[11]].mean())

In [None]:
cond_loss = (df_rate_reserch[research_cols[9]] < 6) & (df_rate_reserch[research_cols[10]] < 15) & (df_rate_reserch[research_cols[11]] > 14.46) 
print(df_rate_reserch[cond_loss].shape[0])
print(df_rate_reserch[cond_loss].loss_bets.sum())

In [None]:
def bets_choose(result: str, pred_res: str, bet_win: float, bet_drow: float, bet_loss: float):
    if result == pred_res == 'fav_win':
        return bet_win
    elif result == pred_res == 'fav_drow':
        return bet_drow
    elif result == pred_res == 'fav_loss':
        return bet_loss
    else:
        return 0

In [None]:
Stop ***********************************8

In [None]:
# df_rate_reserch = df_rate[research_cols].copy()

In [None]:
# cols = ['result', 'win_bets', 'drow_bets', 'loss_bets']
# cols.insert(1, col_bet_add)
# df_rate_reserch['profit_bets'] = df_rate_reserch[cols].apply(lambda x: bets_choose(x[0], x[1], x[2], x[3], x[4]), axis=1)

In [None]:
# loss_numb = [45, 45]
# drow_numb = [35, 50]
# win_numb  = [0, 5]


# condition =   (df_rate_reserch[research_cols[11]] > df_rate_reserch[research_cols[9]]  + loss_numb[0]) & (df_rate_reserch[research_cols[11]] > df_rate_reserch[research_cols[10]] + loss_numb[1]) \
#             | (df_rate_reserch[research_cols[10]] > df_rate_reserch[research_cols[9]]  + drow_numb[0]) & (df_rate_reserch[research_cols[10]] > df_rate_reserch[research_cols[11]] + drow_numb[1]) \
#             | (df_rate_reserch[research_cols[9]]  > df_rate_reserch[research_cols[10]] + win_numb[0])  & (df_rate_reserch[research_cols[9]]  > df_rate_reserch[research_cols[11]] + win_numb[1]) 

# numbs = literal_eval(df_conditions_games_id.sort_values(by='profit_monthes', ascending=False).head(10).condition[1429508])

# condition =   (df_rate_reserch[research_cols[11]] > df_rate_reserch[research_cols[9]] + numbs[0]) & (df_rate_reserch[research_cols[11]] > df_rate_reserch[research_cols[10]] + numbs[1]) \
#             | (df_rate_reserch[research_cols[10]] > df_rate_reserch[research_cols[9]] + numbs[2]) & (df_rate_reserch[research_cols[10]] > df_rate_reserch[research_cols[11]] + numbs[3]) \
#             | (df_rate_reserch[research_cols[9]]  > df_rate_reserch[research_cols[10]] + numbs[4]) & (df_rate_reserch[research_cols[9]] > df_rate_reserch[research_cols[11]] + numbs[5]) 

# df_rate_reserch = df_rate_reserch[condition].copy()

In [None]:
# df_rate_reserch_gb = df_rate_reserch.groupby('month_year').agg({'profit_bets':'sum', 'result':'count'})
# df_rate_reserch_gb.rename(columns={'profit_bets': 'profit_bets_mth', 'result':'result_count_mth'}, inplace=True)
# df_rate_reserch_gb['diff_prof_res'] = df_rate_reserch_gb.profit_bets_mth - df_rate_reserch_gb.result_count_mth
# df_rate_reserch_gb['percent_prof']  = round(100 / (df_rate_reserch_gb.profit_bets_mth / df_rate_reserch_gb.diff_prof_res), 2)

In [None]:
# print('percent profit          -', round(df_rate_reserch_gb.profit_bets_mth.sum() / df_rate_reserch_gb.result_count_mth.sum(), 3))
# print('count monthes           -', df_rate_reserch_gb.shape[0])
# print('monthes with profit     -', df_rate_reserch_gb[df_rate_reserch_gb.diff_prof_res > 0].shape[0])
# print('attitude monthes-profit -', df_rate_reserch_gb.shape[0] / df_rate_reserch_gb[df_rate_reserch_gb.diff_prof_res > 0].shape[0])
# print('count games             -', df_rate_reserch.shape[0])
# print('count all days          -', len(df_main_asps.game_utc.map(lambda x: str(x)[:10]).unique()))
# print('count games per week    -', round(df_rate_reserch_gb.result_count_mth.mean() / 4))

In [None]:
Stop **********************************************************************

In [None]:
# research_cols_home = ['aspects', 'result', '1', 'X', '2', 'fav_goals', 'win_bets', 'drow_bets', 'loss_bets', 'mean_diff_win_home', 'mean_diff_drow_home', 'mean_diff_loss_home', 'month_year']

# df_rate               = df_rate_home.copy()
# df_rate['month_year'] = df_rate.game_utc.map(lambda x: str(x)[:7])
# df_rate_reserch       = df_rate[research_cols_home].copy()

# col_bet_add_home = ['home_w_more_d_plus_l_res', 'home_w_more_d_or_l_res', 'home_w_more_d_or_l_wt_drow_res', 'home_w_more_d_plus_l_wt_drow_res', 'home_w_more_d_or_l_wt_loss_res'][3]
# cols_home        = ['result', 'win_bets', 'drow_bets', 'loss_bets']
# cols_home.insert(1, col_bet_add_home)

# df_rate_reserch['profit_bets'] = df_rate[cols_home].apply(lambda x: bets_choose(x[0], x[1], x[2], x[3], x[4]), axis=1)

# numbs_home = literal_eval(df_conditions_home.sort_values(by='profit_monthes', ascending=False).condition[0])
# print(col_bet_add_home, ' - ', numbs_home)

# condition_home =  (df_rate_reserch[research_cols_home[11]] > df_rate_reserch[research_cols_home[9]] + numbs_home[0]) & (df_rate_reserch[research_cols_home[11]] > df_rate_reserch[research_cols_home[10]] + numbs_home[1]) \
#                 | (df_rate_reserch[research_cols_home[10]] > df_rate_reserch[research_cols_home[9]] + numbs_home[2]) & (df_rate_reserch[research_cols_home[10]] > df_rate_reserch[research_cols_home[11]] + numbs_home[3]) \
#                 | (df_rate_reserch[research_cols_home[9]]  > df_rate_reserch[research_cols_home[10]] + numbs_home[4]) & (df_rate_reserch[research_cols_home[9]] > df_rate_reserch[research_cols_home[11]] + numbs_home[5]) 

# df_rate_reserch_home = df_rate_reserch[condition_home].copy()
# df_rate_reserch_home_gb = df_rate_reserch_home.groupby('month_year').agg({'profit_bets':'sum', 'result':'count'})
# df_rate_reserch_home_gb.rename(columns={'profit_bets': 'profit_bets_mth_hm', 'result':'result_count_mth_hm'}, inplace=True)
# df_rate_reserch_home_gb['diff_prof_res_hm'] = df_rate_reserch_home_gb.profit_bets_mth_hm - df_rate_reserch_home_gb.result_count_mth_hm
# df_rate_reserch_home_gb['percent_prof_hm']  = round(100 / (df_rate_reserch_home_gb.profit_bets_mth_hm / df_rate_reserch_home_gb.diff_prof_res_hm), 2)

# print('percent profit          -', round(df_rate_reserch_home_gb.profit_bets_mth_hm.sum() / df_rate_reserch_home_gb.result_count_mth_hm.sum(), 3))
# print('count monthes           -', df_rate_reserch_home_gb.shape[0])
# print('monthes with profit     -', df_rate_reserch_home_gb[df_rate_reserch_home_gb.diff_prof_res_hm > 0].shape[0])
# print('attitude monthes-profit -', df_rate_reserch_home_gb.shape[0] / df_rate_reserch_home_gb[df_rate_reserch_home_gb.diff_prof_res_hm > 0].shape[0])
# print('count games             -', df_rate_reserch_home.shape[0])
# print('count all days          -', len(df_main_asps.game_utc.map(lambda x: str(x)[:10]).unique()))
# print('count games per week    -', round(df_rate_reserch_home_gb.result_count_mth_hm.mean() / 4))

In [None]:
# Choose csv file for research and add this file to 'df_conditions' and 'col_bet_add' 4 - #########################################################################################################

In [None]:
# # Home - ['home_w_more_d_plus_l_res', 'home_w_more_d_or_l_res', 'home_w_more_d_or_l_wt_drow_res', 'home_w_more_d_plus_l_wt_drow_res', 'home_w_more_d_or_l_wt_loss_res']
df_conditions_home = pd.read_csv('csv_files/home_w_more_d_or_l_wt_loss_res_monthes_profit_dc.csv', names=['condition', 'profit_monthes'])

df_conditions_home.sort_values(by='profit_monthes', ascending=False, inplace=True)
df_conditions_home.reset_index(drop=True, inplace=True)
df_conditions_home.head()

In [None]:
# Away - ['away_w_more_d_plus_l_res', 'away_w_more_d_or_l_res', 'away_w_more_d_or_l_wt_drow_res', 'away_w_more_d_plus_l_wt_drow_res', 'away_w_more_d_or_l_wt_loss_res']
# df_conditions_away = pd.read_csv('csv_files/away_w_more_d_or_l_wt_loss_res_monthes_profit_dc.csv', names=['condition', 'profit_monthes'])

# df_conditions_away.sort_values(by='profit_monthes', ascending=False, inplace=True)
# df_conditions_away.reset_index(drop=True, inplace=True)
# df_conditions_away.head()

In [None]:
df_all_condition_res = pd.DataFrame()

df_conditions = df_conditions_home.copy() # Home  5 - #########################################################################################################
# df_conditions = df_conditions_away.copy()  # Away


for i in tqdm(range(0, 5000)):
    # # Home   6 - #####################################################################################################################
    df_rate       = df_rate_home.copy()
    research_cols = ['aspects', 'result', '1', 'X', '2', 'fav_goals', 'win_bets', 'drow_bets', 'loss_bets', 'mean_diff_win_home', 'mean_diff_drow_home', 'mean_diff_loss_home', 'month_year']
    col_bet_add   = ['home_w_more_d_plus_l_res', 'home_w_more_d_or_l_res', 'home_w_more_d_or_l_wt_drow_res', 'home_w_more_d_plus_l_wt_drow_res', 'home_w_more_d_or_l_wt_loss_res'][4]
    
    # Away
    # df_rate       = df_rate_away.copy()
    # research_cols = ['aspects', 'result', '1', 'X', '2', 'fav_goals', 'win_bets', 'drow_bets', 'loss_bets', 'mean_diff_win_away', 'mean_diff_drow_away', 'mean_diff_loss_away', 'month_year']
    # col_bet_add   = ['away_w_more_d_plus_l_res', 'away_w_more_d_or_l_res', 'away_w_more_d_or_l_wt_drow_res', 'away_w_more_d_plus_l_wt_drow_res', 'away_w_more_d_or_l_wt_loss_res'][4]

    df_rate['month_year'] = df_rate.game_utc.map(lambda x: str(x)[:7])
    df_rate_reserch       = df_rate[research_cols].copy()

    cols = ['result', 'win_bets', 'drow_bets', 'loss_bets']
    cols.insert(1, col_bet_add)

    df_rate_reserch['profit_bets'] = df_rate[cols].apply(lambda x: bets_choose(x[0], x[1], x[2], x[3], x[4]), axis=1)

    numbs = literal_eval(df_conditions.sort_values(by='profit_monthes', ascending=False).condition[i])

    # print(col_bet_add, ' - ', numbs)

    condition =  (df_rate_reserch[research_cols[11]] > df_rate_reserch[research_cols[9]] + numbs[0]) & (df_rate_reserch[research_cols[11]] > df_rate_reserch[research_cols[10]] + numbs[1]) \
                 | (df_rate_reserch[research_cols[10]] > df_rate_reserch[research_cols[9]] + numbs[2]) & (df_rate_reserch[research_cols[10]] > df_rate_reserch[research_cols[11]] + numbs[3]) \
                 | (df_rate_reserch[research_cols[9]]  > df_rate_reserch[research_cols[10]] + numbs[4]) & (df_rate_reserch[research_cols[9]] > df_rate_reserch[research_cols[11]] + numbs[5]) 

    df_rate_reserch = df_rate_reserch[condition].copy()
    df_rate_reserch_gb = df_rate_reserch.groupby('month_year').agg({'profit_bets':'sum', 'result':'count'})
    df_rate_reserch_gb.rename(columns={'profit_bets': 'profit_bets_mth', 'result':'result_count_mth'}, inplace=True)
    df_rate_reserch_gb['diff_prof_res'] = df_rate_reserch_gb.profit_bets_mth - df_rate_reserch_gb.result_count_mth
    df_rate_reserch_gb['percent_prof']  = round(100 / (df_rate_reserch_gb.profit_bets_mth / df_rate_reserch_gb.diff_prof_res), 2)

    condition_res = {'percent_profit': round(df_rate_reserch_gb.profit_bets_mth.sum() / df_rate_reserch_gb.result_count_mth.sum(), 3),
                    'count_monthes': df_rate_reserch_gb.shape[0],
                    'monthes_with_profit': df_rate_reserch_gb[df_rate_reserch_gb.diff_prof_res > 0].shape[0],
                    'attitude_monthes_profit': df_rate_reserch_gb.shape[0] / df_rate_reserch_gb[df_rate_reserch_gb.diff_prof_res > 0].shape[0],
                    'count_games': df_rate_reserch.shape[0],
                    'count_all_days': len(df_main_asps.game_utc.map(lambda x: str(x)[:10]).unique()),
                    'count_games_per_week': round(df_rate_reserch_gb.result_count_mth.mean() / 4)}
    
    df_condition_res = pd.DataFrame(condition_res, index=[str(numbs)])

    df_all_condition_res = pd.concat([df_condition_res, df_all_condition_res])

In [None]:
print(col_bet_add)

In [None]:
df_all_condition_res.sort_values(by='percent_profit', ascending=False)

In [None]:
# file = open('pickle_files/home_w_more_d_or_l_wt_loss_res_top_5t', 'wb')
# pickle.dump(df_all_condition_res, file) 
# file.close()

In [221]:
with open('pickle_files/df_all_condition_away_res_top_10t', 'rb') as f:
    df_condition_away_res_top_10t = pickle.load(f)
    
with open('pickle_files/df_all_condition_away_res_top_10t', 'rb') as f:
    df_condition_away_res_top_10t = pickle.load(f)
    
with open('pickle_files/df_all_condition_away_res_top_10t', 'rb') as f:
    df_condition_away_res_top_10t = pickle.load(f)
    
with open('pickle_files/df_all_condition_away_res_top_10t', 'rb') as f:
    df_condition_away_res_top_10t = pickle.load(f)    

FileNotFoundError: [Errno 2] No such file or directory: 'pickle_files/df_all_condition_away_res_top_10t'

In [None]:
df_condition_away_res_top_10t.head()

In [None]:
df_condition_away_res_top_10t.sort_values(by='monthes with profit', ascending=False)

In [None]:
df_rate_reserch_away_gb.head()

In [None]:
df_rate_reserch_gb = pd.merge(df_rate_reserch_home_gb, df_rate_reserch_away_gb, how='left', left_index=True, right_index=True)

In [None]:
df_rate_reserch_gb.head()

In [None]:
df_rate_reserch_gb['profit_bets_mth']  = df_rate_reserch_gb.profit_bets_mth_hm + df_rate_reserch_gb.profit_bets_mth_aw
df_rate_reserch_gb['result_count_mth'] = df_rate_reserch_gb.result_count_mth_hm + df_rate_reserch_gb.result_count_mth_aw
df_rate_reserch_gb['diff_prof_res']    = df_rate_reserch_gb.diff_prof_res_hm + df_rate_reserch_gb.diff_prof_res_aw
df_rate_reserch_gb['percent_prof']     = df_rate_reserch_gb.percent_prof_hm + df_rate_reserch_gb.percent_prof_aw

In [None]:
print('percent profit          -', round(df_rate_reserch_gb.profit_bets_mth.sum() / df_rate_reserch_gb.result_count_mth.sum(), 3))
print('count monthes           -', df_rate_reserch_gb.shape[0])
print('monthes with profit     -', df_rate_reserch_gb[df_rate_reserch_gb.diff_prof_res > 0].shape[0])
print('attitude monthes-profit -', df_rate_reserch_gb.shape[0] / df_rate_reserch_gb[df_rate_reserch_gb.diff_prof_res > 0].shape[0])
print('count games             -', df_rate_reserch_home.shape[0] + df_rate_reserch_away.shape[0])
print('count all days          -', len(df_main_asps.game_utc.map(lambda x: str(x)[:10]).unique()))
print('count games per week    -', round(df_rate_reserch_gb.result_count_mth.mean() / 4))

In [None]:
Stop ****************************************

In [None]:
df_research = df_rate.copy()

In [None]:
combo_lt = []
for i in product(range(0, 55, 5), repeat = 6):
    combo_lt.append(i)

combo_lt[:3]    

In [None]:
conditions_dc = dict()

cols    = ['result', 'win_bets', 'drow_bets', 'loss_bets']

# Change cols list for hom or away df
# col_res = ['away_w_more_d_plus_l_res', 'away_w_more_d_or_l_res', 'away_w_more_d_or_l_wt_drow_res', 'away_w_more_d_plus_l_wt_drow_res', 'away_w_more_d_or_l_wt_loss_res'][4]
col_res = ['home_w_more_d_plus_l_res', 'home_w_more_d_or_l_res', 'home_w_more_d_or_l_wt_drow_res', 'home_w_more_d_plus_l_wt_drow_res', 'home_w_more_d_or_l_wt_loss_res'][4]


cols.insert(1, col_res)
print(col_res)
    
df_research['profit_bets'] = df_research[cols].apply(lambda x: bets_choose(x[0], x[1], x[2], x[3], x[4]), axis=1)

cols.remove(col_res)

count = 0
for numbs in tqdm(combo_lt): 
    condition =   (df_research[research_cols[11]] > df_research[research_cols[9]] + numbs[0]) & (df_research[research_cols[11]] > df_research[research_cols[10]] + numbs[1]) \
                | (df_research[research_cols[10]] > df_research[research_cols[9]] + numbs[2]) & (df_research[research_cols[10]] > df_research[research_cols[11]] + numbs[3]) \
                | (df_research[research_cols[9]]  > df_research[research_cols[10]] + numbs[4]) & (df_research[research_cols[9]] > df_research[research_cols[11]] + numbs[5]) 
    
    df_research_cond   = df_research[condition].copy()
    df_rate_reserch_gb = df_research_cond.groupby('month_year').agg({'profit_bets':'sum', 'result':'count'})
    df_rate_reserch_gb['diff_prof_res'] = df_rate_reserch_gb.profit_bets - df_rate_reserch_gb.result
    
    conditions_dc.update({numbs : df_rate_reserch_gb[df_rate_reserch_gb.diff_prof_res > 0].shape[0]})
    count = count + 1
    
    file_name = col_res + '_monthes_profit_dc.csv'
    if ((count % 5000 == 0) & (count != 0)) | (numbs == combo_lt[-1]):
        with open('csv_files/' + file_name, 'a') as f:
            w = csv.writer(f)
            w.writerows(conditions_dc.items())

        conditions_dc.clear()

In [None]:
# df_conditions_games_id = pd.read_csv('csv_files/home_w_more_d_plus_l_wt_drow_res_monthes_profit_dc.csv', names=['condition', 'profit_monthes'])
# df_conditions_games_id.sort_values(by='profit_monthes', ascending=False).head(20)

In [None]:
df_conditions_away.sort_values(by='profit_monthes', ascending=False).head()

In [None]:
Stop ***********************************

In [None]:
game_ind = 19

In [None]:
aspects_lt = list()

for asp in df_main_asps_gb.fs_asps_wt_pls[game_ind]:
    _= df_aspect_stat.fs_asps_wt_pls_lt.map(lambda x: aspects_lt.append(x) if asp in x else None)    

In [None]:
all_asp_dc = {}

for asp in df_main_asps_gb.fs_asps_wt_pls[game_ind]:
    one_asp_lt = list()
    for find_lt in aspects_lt:
        one_asp_lt.append(find_lt.count(asp))

    all_asp_dc[asp] = one_asp_lt

In [None]:
df_one_asp_st = pd.DataFrame.from_dict(all_asp_dc)
df_one_asp_st['index_col'] = aspects_lt #
df_one_asp_st.set_index(keys='index_col', inplace=True)
df_one_asp_st['count_entries'] = df_one_asp_st.sum(axis=1)

In [None]:
df_one_asp_st.index              = df_one_asp_st.index.map(str)
df_aspect_stat.fs_asps_wt_pls_lt = df_aspect_stat.fs_asps_wt_pls_lt.map(str)

In [None]:
if df_main_asps_gb.iloc[game_ind].host_role == 'Fav':
    cols          = ['fs_asps_wt_pls_lt', 'count_asps', 'count_home', 'home_res', 'mean_diff_win_home', 'mean_diff_drow_home', 'mean_diff_loss_home']
    df_one_asp_st = df_one_asp_st.merge(df_aspect_stat[cols], how='left', left_on='index_col', right_on='fs_asps_wt_pls_lt')
    df_one_asp_st = df_one_asp_st[df_one_asp_st.count_home != 0].copy()
        
    df_one_asp_st.drop_duplicates(subset='fs_asps_wt_pls_lt', inplace=True)
    
    sum_win     = df_one_asp_st.home_res.map(lambda x: x[0]).sum()
    sum_drow    = df_one_asp_st.home_res.map(lambda x: x[1]).sum()
    sum_loss    = df_one_asp_st.home_res.map(lambda x: x[2]).sum()
    sum_results = sum_win + sum_drow + sum_loss
    
    print('sum  win -', sum_win, ' - diff mean win  =', round(fav_win_home_mean - sum_results/sum_win, 3))
    print('sum drow -', sum_drow, ' - diff mean drow =', round(fav_drow_home_mean - sum_results/sum_drow, 3))
    print('sum loss -', sum_loss, ' - diff mean loss =', round(fav_loss_home_mean - sum_results/sum_loss, 3))
    print('')    
    print(df_one_asp_st[['mean_diff_win_home', 'mean_diff_drow_home', 'mean_diff_loss_home']].apply(sum))
    print('')
    
    df_one_asp_st['mean_multi_count'] = df_one_asp_st[['count_home', 'mean_diff_win_home', 'mean_diff_drow_home', 'mean_diff_loss_home']].apply(lambda x: (x[0]*x[1], x[0]*x[2], x[0]*x[3]), axis=1)
    print('mean multi count sum  win  =', round(df_one_asp_st.mean_multi_count.map(lambda x: x[0]).sum(), 3))
    print('mean multi count sum  drow =', round(df_one_asp_st.mean_multi_count.map(lambda x: x[1]).sum(), 3))
    print('mean multi count sum  loss =', round(df_one_asp_st.mean_multi_count.map(lambda x: x[2]).sum(), 3))      
          
else:
    cols          = ['fs_asps_wt_pls_lt', 'count_asps', 'count_away', 'away_res', 'mean_diff_win_away', 'mean_diff_drow_away', 'mean_diff_loss_away'] 
    df_one_asp_st = df_one_asp_st.merge(df_aspect_stat[cols], how='left', left_on='index_col', right_on='fs_asps_wt_pls_lt')
    df_one_asp_st = df_one_asp_st[df_one_asp_st.count_away != 0].copy()
    
    df_one_asp_st.drop_duplicates(subset='fs_asps_wt_pls_lt', inplace=True)
    
    sum_win     = df_one_asp_st.away_res.map(lambda x: x[0]).sum()
    sum_drow    = df_one_asp_st.away_res.map(lambda x: x[1]).sum()
    sum_loss    = df_one_asp_st.away_res.map(lambda x: x[2]).sum()
    sum_results = sum_win + sum_drow + sum_loss
    
    print('sum  win -', sum_win, ' - diff mean win  =', round(fav_win_away_mean - sum_results/sum_win, 3))
    print('sum drow -', sum_drow, ' - diff mean drow =', round(fav_drow_away_mean - sum_results/sum_drow, 3))
    print('sum loss -', sum_loss, ' - diff mean loss =', round(fav_loss_away_mean - sum_results/sum_loss, 3))
    print('')
    print(df_one_asp_st[['mean_diff_win_away', 'mean_diff_drow_away', 'mean_diff_loss_away']].apply(sum))
    print('')
    
    df_one_asp_st['mean_multi_count'] = df_one_asp_st[['count_away', 'mean_diff_win_away', 'mean_diff_drow_away', 'mean_diff_loss_away']].apply(lambda x: (x[0]*x[1], x[0]*x[2], x[0]*x[3]), axis=1)
    print('mean multi count sum  win  =', round(df_one_asp_st.mean_multi_count.map(lambda x: x[0]).sum(), 3))
    print('mean multi count sum  drow =', round(df_one_asp_st.mean_multi_count.map(lambda x: x[1]).sum(), 3))
    print('mean multi count sum  loss =', round(df_one_asp_st.mean_multi_count.map(lambda x: x[2]).sum(), 3))

In [None]:
df_main_asps_gb.iloc[game_ind].to_frame().T

In [None]:
# df_one_asp_st.head()

In [None]:
# More popular aspects for win, drow or loss

In [None]:
# Count and determinate Fav and Pre aspects

In [None]:
# 1. Calculate mean diff for each aspect and 2. Calculate with division by count_games

In [None]:
# ML only games with 5 and less aspects

In [None]:
Stop ********************************************************

In [None]:
# All days 'top' leages

# if len_asps > 1 and len(count_asps[count_asps > len_asps - 1]) - 3167 games count
# 0 	bets_total 	1.009009
# 1 	bets_total_f0 	1.016969
# 2 	bets_total_f05 	1.023448
# 3 	mean_bets_total 	1.051272
# 4 	mean_bets_total_f0 	1.054787
# 5 	mean_bets_total_f05 	1.055971

# if len_asps > 2 and len(count_asps[count_asps > len_asps - 1]) - 1990 games count

# 0 	bets_total 	1.034784
# 1 	bets_total_f0 	1.028526
# 2 	bets_total_f05 	1.040695
# 3 	mean_bets_total 	1.061995
# 4 	mean_bets_total_f0 	1.068247
# 5 	mean_bets_total_f05 	1.080168

In [None]:
# if len_asps > 2 and len(count_asps[count_asps > len_asps - 1])
# Last 60 days	
# 0 	bets_total 	2.002222
# 1 	bets_total_f0 	1.097407
# 2 	bets_total_f05 	1.262963
# 3 	mean_bets_total 	1.134444
# 4 	mean_bets_total_f0 	1.134444
# 5 	mean_bets_total_f05 	1.344444

# Last 120 days
	
# 0 	bets_total 	1.648636
# 1 	bets_total_f0 	0.980455
# 2 	bets_total_f05 	1.062955
# 3 	mean_bets_total 	1.427955
# 4 	mean_bets_total_f0 	1.427955
# 5 	mean_bets_total_f05 	1.387500



# 	720 days
# 0 	bets_total 	1.230305
# 1 	bets_total_f0 	1.158286
# 2 	bets_total_f05 	1.112066
# 3 	mean_bets_total 	1.279366
# 4 	mean_bets_total_f0 	1.279366
# 5 	mean_bets_total_f05 	1.200587

# 1440 days
# 0 	bets_total 	1.105030
# 1 	bets_total_f0 	1.084671
# 2 	bets_total_f05 	1.054026
# 3 	mean_bets_total 	1.171673
# 4 	mean_bets_total_f0 	1.171673
# 5 	mean_bets_total_f05 	1.138542

# if len_asps > 3 and len(count_asps[count_asps > len_asps - 1]) > 0:
# 	720 days

# 0 	bets_total 	1.342489
# 1 	bets_total_f0 	1.174936
# 2 	bets_total_f05 	1.140215
# 3 	mean_bets_total 	1.240601
# 4 	mean_bets_total_f0 	1.240601
# 5 	mean_bets_total_f05 	1.211803


# 	1080 days
# 0 	bets_total 	1.293865
# 1 	bets_total_f0 	1.199018
# 2 	bets_total_f05 	1.122239
# 3 	mean_bets_total 	1.233098
# 4 	mean_bets_total_f0 	1.233098
# 5 	mean_bets_total_f05 	1.164448


# 1440 days
# 0 	bets_total 	1.182235
# 1 	bets_total_f0 	1.116998
# 2 	bets_total_f05 	1.082370
# 3 	mean_bets_total 	1.129932
# 4 	mean_bets_total_f0 	1.129932
# 5 	mean_bets_total_f05 	1.119097

# if len_asps > 5 and len(count_asps[count_asps > len_asps - 1]) > 0:
# 360 days 
# 0 	bets_total 	1.2272
# 1 	bets_total_f0 	0.8480
# 2 	bets_total_f05 	0.8900
# 3 	mean_bets_total 	1.0668
# 4 	mean_bets_total_f0 	1.0668
# 5 	mean_bets_total_f05 	1.0300


# 720 days
# 0 	bets_total 	1.103019
# 1 	bets_total_f0 	0.832075
# 2 	bets_total_f05 	0.864528
# 3 	mean_bets_total 	0.870189
# 4 	mean_bets_total_f0 	0.870189
# 5 	mean_bets_total_f05 	0.881509

# 1080 days
# 0 	bets_total 	1.312281
# 1 	bets_total_f0 	0.981053
# 2 	bets_total_f05 	0.976842
# 3 	mean_bets_total 	1.016491
# 4 	mean_bets_total_f0 	1.016491
# 5 	mean_bets_total_f05 	0.992632

# 1440 days
# 0 	bets_total 	1.091327
# 1 	bets_total_f0 	0.900408
# 2 	bets_total_f05 	0.969898
# 3 	mean_bets_total 	0.937959
# 4 	mean_bets_total_f0 	0.937959
# 5 	mean_bets_total_f05 	0.994286

# if len_asps > 6 and len(count_asps[count_asps > len_asps - 1]) > 0:
# 360 days
# 0 	bets_total 	1.14625
# 1 	bets_total_f0 	1.16375
# 2 	bets_total_f05 	1.08250
# 3 	mean_bets_total 	1.16375
# 4 	mean_bets_total_f0 	1.16375
# 5 	mean_bets_total_f05 	1.08250


# if len_asps > 7 and len(count_asps[count_asps > len_asps - 1]) > 0:
# 720 days

# 0 	bets_total 	1.287273
# 1 	bets_total_f0 	1.020000
# 2 	bets_total_f05 	0.947273
# 3 	mean_bets_total 	1.020000
# 4 	mean_bets_total_f0 	1.020000
# 5 	mean_bets_total_f05 	0.947273

# 1080 days
# 0 	bets_total 	1.287273
# 1 	bets_total_f0 	1.020000
# 2 	bets_total_f05 	0.947273
# 3 	mean_bets_total 	1.020000
# 4 	mean_bets_total_f0 	1.020000
# 5 	mean_bets_total_f05 	0.947273

In [None]:
# df_profit = pd.DataFrame(date_profit_lt, columns=['date', 'sum_bets', 'count_games', 'profit'])
# df_profit

In [None]:
# df_all_count_asps.head(10)

In [None]:
# Add more biggest different before mean statistic and statistic with some aspects gor win, drow and loss
# Add column with bets coefs for all equal aspects 

In [None]:
# df_main_asps[df_main_asps.game_id == '1735528'].fs_asps_wt_pls

In [None]:
# df_main_asps[df_main_asps.game_id.isin(df_count_asps.loc['1901627'].game_id_eq_asps)].fs_asps_wt_pls.value_counts().head()

In [None]:
# df_main_asps[df_main_asps.game_id == '1951686'].fs_asps_wt_pls

In [None]:
# df_main_asps[df_main_asps.game_id.isin(df_count_asps.loc['1951686'].game_id_eq_asps)].fs_asps_wt_pls.value_counts().head()

In [None]:
# ind = 5

# asps = df_main_asps[df_main_asps.game_id == df_unique_game_id.iloc[ind].game_id].fs_asps_wt_pls.to_list()
# asps

In [None]:
# df_main_asps[df_main_asps.game_id == '10934817']

In [None]:
# df_unique_game_id.iloc[ind]

In [None]:
# asps = [ 'H1_c_Pl', 'Mn_o_PF', 'H10_c_APl']# 'V10_o_ANN', 'H4_c_ADPF', 'Ur_o_V7', 'Mn_o_AV10',] 

# sr_game_ids     = df_main_asps[df_main_asps.fs_asps_wt_pls.isin(asps)].game_id.value_counts()
# ind_sr_game_ids = sr_game_ids[sr_game_ids > len(asps) - 4].index

# df_unique_game_id[df_unique_game_id.game_id.isin(ind_sr_game_ids)].result.value_counts()

In [None]:
# df_unique_game_id.iloc[ind].result

In [None]:
#    res     - st_res       / fora
# 1. fav_win - fav_win    - fav_win_bets
# 2. fav_win - fav_drow   - 0
# 3. fav_win - fav_loss   - 0

# 4. fav_drow - fav_drow  - fav_drow_bets / 1
# 5. fav_drow - fav_win   - 0
# 6. fav_drow - fav_loss  - 0 / 1

# 7. fav_loss - fav_loss  - fav_loss_bets / fav_loss_bets:0 / fav_loss_bets:0.5 
# 8. fav_loss - fav_win   - 0
# 9. fav_loss - fav_drow  - 0 / fav_loss_bets:0 / fav_loss_bets:0.5

In [None]:
# # Sample list
# numeric_list = [1, 3, 11.3, 12, 15, 16.7, 25]

# # Given number
# given_number = 14

# # Define the range
# range_limit = 3

# # Find the three closest numeric values within the given range
# closest_values = sorted((x for x in numeric_list if given_number - range_limit <= x <= given_number + range_limit), key=lambda x: abs(x - given_number))[:4]

# print("Three closest numeric values within range:", closest_values)

In [None]:
Stop

In [None]:
col_names   = ['fs_asps', 'cnt_asp_for_day', 'fs_asps_wt_pls', 'month_yr', 'cnt_asp_wt_pls_for_day'][2:4] # 
print('col_names -', col_names)
asps_cnt_lt = ['fs_asps_count', 'fs_asps_count_wt_pls'][1]

In [None]:
fav_win  = StatDataPrepare.fav_win_count
fav_drow = StatDataPrepare.fav_drow_count
fav_loss = StatDataPrepare.fav_loss_count

In [None]:
df_stat   = df_main_asps.groupby(col_names).agg({'day_month_yr':'nunique', 'f_point':'nunique', 's_point':'nunique',
                                     'result':['count', fav_win, fav_drow, fav_loss], # lambda x: x.tolist() - rename: '<lambda_0>':'list_results'
                                     'win_bets':'sum', 'drow_bets':'sum', 'loss_bets':'sum',            
                                    }).rename(columns={'count':'games', 'fav_win_count':'fav_win', 'fav_drow_count':'fav_drow', 'fav_loss_count':'fav_loss', })

df_stat['win_prof']  = round(df_stat[('win_bets', 'sum')] / df_stat[('result', 'games')], 2)
df_stat['drow_prof'] = round(df_stat[('drow_bets', 'sum')] / df_stat[('result', 'games')], 2)
df_stat['loss_prof'] = round(df_stat[('loss_bets', 'sum')] / df_stat[('result', 'games')], 2)

In [None]:
count_greater_than_1 = lambda x: (x > 1).sum()

In [None]:
df_fs_asps_stat = df_stat.groupby(level=col_names[0]).agg({('loss_prof',''): count_greater_than_1, ('day_month_yr', 'nunique'):'sum', ('result', 'games'):'sum', ('win_bets', 'sum'):'sum', ('drow_bets', 'sum'):'sum', 
                                                           ('loss_bets', 'sum'):'sum', ('result', 'fav_loss'):'sum'}).rename(
                                                               columns={'win_bets': 'fs_asps_win_bets', 'drow_bets': 'fs_asps_drow_bets', 'loss_bets': 'fs_asps_loss_bets'})   

df_stat = df_stat.merge(df_fs_asps_stat, how='left', left_index=True, right_index=True).rename(columns={'loss_prof_y':'month_profit_ov_1','loss_prof_x':'loss_prof', 'day_month_yr_x':'day_month_yr', 'day_month_yr_y':'fs_asps_day_month_yr', 
                                                                                                        'result_x':'result', 'result_y':'fs_asps_result'}).rename_axis(index={'cnt_asp_wt_pls_for_day':'cawpfd'})

df_stat['fs_win_prof']  = round(df_stat[('fs_asps_win_bets', 'sum')] / df_stat[('fs_asps_result', 'games')], 2)
df_stat['fs_drow_prof'] = round(df_stat[('fs_asps_drow_bets', 'sum')] / df_stat[('fs_asps_result', 'games')], 2)
df_stat['fs_loss_prof'] = round(df_stat[('fs_asps_loss_bets', 'sum')] / df_stat[('fs_asps_result', 'games')], 2)

In [222]:
df_fs_asps_stat

NameError: name 'df_fs_asps_stat' is not defined

In [None]:
df_month_yr = df_stat.index.get_level_values('fs_asps_wt_pls').value_counts().to_frame()
df_month_yr.reset_index(inplace=True)
df_month_yr.rename(columns={'index':'fawp', 'fs_asps_wt_pls':'month_yr_count'}, inplace=True)
df_month_yr.tail()

In [None]:
fs_asps_wt_pls_lt = df_stat.reset_index().fs_asps_wt_pls.to_list()

df_fawp = pd.DataFrame()
df_fawp['fawp'] = fs_asps_wt_pls_lt
df_fawp = df_fawp.merge(df_month_yr, how='left', left_on = 'fawp', right_on = 'fawp')

df_stat['month_yr_count'] = df_fawp.month_yr_count.to_list()
df_stat.insert(0, 'month_yr_count', df_stat.pop('month_yr_count'))

In [None]:
df_stat.insert(1, 'month_profit_ov_1', df_stat.pop('month_profit_ov_1'))
df_stat.insert(15, ('fs_asps_result', 'fav_loss'), df_stat.pop(('fs_asps_result', 'fav_loss')))

In [None]:
# Relationship resuts by games count for Fav Pre roles only in full DF
df_exp = df_unique_game_id[df_unique_game_id.host_role.isin(['Fav', 'Pre'])].copy()
df_exp.reset_index(drop=True, inplace=True)

print('shape - df_exp       - ', df_exp.shape[0])
coef_fav_win = round(df_exp.shape[0] / df_exp[df_exp.result == 'fav_win'].shape[0], 2)
print('df_exp fav_win       - ', coef_fav_win, ' - count -', df_exp[df_exp.result == 'fav_win'].shape[0])
coef_fav_drow = round(df_exp.shape[0] / df_exp[df_exp.result=='fav_drow'].shape[0], 2)
print('df_exp fav_drow      - ', coef_fav_drow, ' - count -', df_exp[df_exp.result == 'fav_drow'].shape[0])
coef_fav_loss = round(df_exp.shape[0] / df_exp[df_exp.result=='fav_loss'].shape[0], 2)
print('df_exp fav_loss      - ', coef_fav_loss, ' - count -', df_exp[df_exp.result == 'fav_loss'].shape[0])

In [None]:
print('Uniq days -', len(df_main_asps.day_month_yr.unique()), '                 ***********************************************************')
print('Uniq month -', len(df_main_asps.month_yr.unique()))

In [None]:
df_asps_w_profit = df_stat[((df_stat.fs_win_prof > 1.01) | (df_stat.fs_drow_prof > 1.01) | (df_stat.fs_loss_prof > 1.01)) & (df_stat[('fs_asps_day_month_yr', 'nunique')] > 200)].sort_values(by='fs_loss_prof', ascending=False)
df_asps_w_profit.head(1000)

In [None]:
# 10 H1_c_Pl        	1 	22 	1 	1 	22 	17 	3 	2 	20.39 	20.5 	40.5 	0.93 	0.93 1.84 	730 	1128 	1053.94 	873.12 	     1668.0 	0.93 	0.77 	1.48

# 9 - H1_c_Pl 	      1 	24 	1 	1 	24 	19 	3 	2 	22.92 	20.5 	40.5 	0.96 	0.85 1.69 	832 	1376 	1294.42 	1069.05 	1896.0 	   0.94 	0.78 	1.38

# 8.5 - H1_c_Pl 	  1 	26 	1 	1 	26 	20 	3 	3 	24.22 	20.5 	50.5 	0.93 	0.79 1.94 	885 	1547 	1463.2 	    1202.04 	2035.71 	0.95 	0.78 	1.32
# 8.5 - Mn_o_PF     	1 	49 	1 	1 	50 	45 	2 	3 	54.06 	10.45 	44.0 	1.08 	0.21 0.88 	912 	1133 	1100.23 	  760.64 	1313.04 	0.97 	0.67 	1.16

# 8 - H1_c_Pl   	1 	32 	1 	1 	32 	25 	4 	3 	30.86 	24.62 	50.5 	0.96 	0.77 	1.58 	954 	1740 	1640.06 	1407.81 	2216.46 	0.94 	0.81 	1.27

# 7.5 - H1_c_Pl   	1 	38 	1 	1 	38 	29 	6 	3 	36.31 	33.61 	50.5 	0.96 	0.88 	1.33 	1014 	1918 	1811.76 	1559.81 	2367.02 	0.94 	0.81 	1.23
# 7 - H1_c_Pl 	      1 	41 	1 	1 	41 	30 	7 	4 	37.63 	39.31 	58.9 	0.92 	0.96 1.44   1082 	2166 	2039.8 	    1783.98 	2641.22 	0.94 	0.82 	1.22
# 6.5 - H1_c_Pl     	1 	48 	1 	1 	48 	33 	8 	7 	41.96 	44.16 	83.2 	0.87 	0.92 1.73 	1169 	2490 	2340.83 	2043.96 	3037.5 	    0.94 	0.82 	1.22
# 6 - H1_c_Pl 	      1 	56 	1 	1 	56 	40 	8 	8 	51.72 	44.16 	91.0 	0.92 	0.79 1.62 	1260 	2849 	2680.44 	2381.58 	3359.0  	0.94 	0.84 	1.18
# 5.5 - Mn_t_PF 136 	2 	1 	1 	4 	3 	0 	1 	4.29 	0.0 	13.5 	1.07 	0.0 	3.38 	1292 	2069 	1992.13 	1516.42 	2394.45 	0.96 	0.73 	1.16


# 2.5-H4_c_APF 	31 	    2 	1 	1 	6 	3 	1 	2 	4.37 	3.96 	10.4 	0.73 	0.66 	1.73 	1279 	2223 	2018.19 	2075.79 	2534.74 	0.91 	0.93 	1.14
# 3 - H4_c_APF 	100 	1 	1 	1 	5 	2 	1 	2 	3.12 	4.25 	10.6 	0.62 	0.85 	2.12 	1146 	1870 	1710.25 	1743.39 	2140.59 	0.91 	0.93 	1.14
# 3.5-H4_c_APF 	1   150 1 	1 	151 	100 	34 	17 	138.15 	152.81 	126.2 	0.91  1.01 0.84 	1013 	1571 	1441.17 	1481.96 	1797.15 	0.92 	0.94 	1.14
# 2 - H4_c_APF 	107 	1 	1 	1 	7 	4 	1 	2 	7.2 	4.0 	10.0 	1.03 	0.57 	1.43 	1422 	2656 	2414.49 	2488.79 	2961.65 	0.91 	0.94 	1.12
# 1 - H4_c_APF 	11 	37 	1 	1 	135 	77 	29 	29 	129.78 	119.81 	150.61 	0.96 	0.89 	1.12 	1709 	3929 	3616.66 	3683.83 	4158.32 	0.92 	0.94 	1.06


# 4.5 - H1_c_Pl 	1 	83 	1 	1 	83 	57 	13 	13 	77.15 	64.94 	125.23 	0.93 	0.78 	1.51 	1518 	4357 	4068.25 	3798.68 	4927.04 	0.93 	0.87 	1.13
# 5 -   H1_c_Pl 	1 	75 	1 	1 	75 	51 	12 	12 	68.01 	60.99 	119.08 	0.91 	0.81 	1.59 	1433 	3753 	3517.09 	3246.56 	4257.64 	0.94 	0.87 	1.13


# 4 - Mn_q_APF 	119 	1 	1 	1 	5 	5 	0 	0 	7.38 	0.0 	0.0 	1.48 	0.0 	0.0 	1405 	2577 	2417.34 	2172.62 	2887.22 	0.94 	0.84 	1.12
# 1.5 - PF_c_Pl 	22 	11 	1 	1 	58 	34 	13 	11 	56.69 	52.43 	51.23 	0.98 	0.9 	0.88 	1180 	3312 	3092.89 	2989.15 	3560.81 	0.93 	0.9 	1.08


# 0.5 - H7_c_ANN 	106 	1 	1 	1 	51 	27 	15 	9 	53.72 	54.44 	32.78 	1.05 	1.07 0.64 	1407 	5395 	5061.64 	4810.1 	     5597.0 	0.94 	0.89 	1.04


In [None]:
# string = 'V1_c_APS'

# for key, val in asps_cnt_lt.items():
#     if string in key[1]:
#         print(key[1], val)

In [None]:
# asp     = string
# cnt_asp = 4
# col_res = '1'
# df_main_asps[(df_main_asps[col_names[0]] == asp)].sort_values(by='game_utc')[['game_utc', 'fs_asps', 'cnt_asp_for_day', 'result', col_res]]  #& (df_main_asps[col_names[1]] == cnt_asp)]#

In [None]:
# df_asps_w_profit.columns

In [None]:
Stop *********************************************************************************************

In [None]:
# with open('pickle_files/aspected_files/df_asps_w_profit_06_09__06_08_2023', 'rb') as f:
#     df_sept = pickle.load(f)

# with open('pickle_files/aspected_files/df_asps_w_profit_06_05__06_04_2023', 'rb') as f:
#     df_aprl = pickle.load(f)    
    
# with open('pickle_files/aspected_files/df_asps_w_profit_15_02__15_01_2023', 'rb') as f:
#     df_febr = pickle.load(f)    

In [None]:
# df_sept[df_sept.index.get_level_values(0).isin(df_aprl.index.get_level_values(0))]

In [None]:
for x in df_asps_w_profit.index.get_level_values(0).unique():
    if x in df_sept.index.get_level_values(0).unique() and x in df_aprl.index.get_level_values(0).unique() and x in df_febr.index.get_level_values(0).unique():
        print(x)

In [None]:
len(df_sept.index.get_level_values(0).unique())

In [None]:
res_ind = ['H1_c_ADPS']
res_ind

In [None]:
df_sept[df_sept.index.get_level_values(0).isin(res_ind)]

In [223]:
df_aprl[df_aprl.index.get_level_values(0).isin(res_ind)]

NameError: name 'df_aprl' is not defined

In [None]:
df_febr[df_febr.index.get_level_values(0).isin(res_ind)]

In [None]:
df_asps_w_profit[df_asps_w_profit.index.get_level_values(0).isin(res_ind)]

In [None]:
# file = open('pickle_files/aspected_files/df_asps_w_profit_15_02__15_01_2023', 'wb')
# pickle.dump(df_asps_w_profit, file) 
# file.close()

In [None]:
Stop ********************************************************************************************

In [None]:
main_asps_lt = df_main_asps.fs_asps.value_counts(ascending=True).keys().to_list()

In [None]:
reserch_asps = ['Mn_c_ADPS', 'PS_c_DPS', 'V7-DPS_c_APS', 'H4_c_Ur', 'Mn_i_APS', 'H1_c_Pl', 'Mn_c_ASn-AV7', 'V10-DPS_c_ANp', 'H10_c_Pl', 'Mn_q_APS', 'Mn_t_PF', 'Mn_o_PF',
               'Mn_o_PS']

str(df_main_asps[df_main_asps.fs_asps.isin(reserch_asps)].fs_asps_count.unique())

In [None]:
# 'Mn_c_ADPS' - f, PS_c_DPS - p, V7-DPS_c_APS - p
# 'H4_c_Ur' - (16-6 drow-25.21, 18-6 drow-19.57, 5-1 drow-4.3, 43-10 drow-.., 4-2 loss-9.5), 
# 'Mn_i_APS'- (7-5 drow-17.56, 6-2 drow-6.3, 7-2 drow-7.65, 15-8 drow-29.6, 8-4 drow-15)
# 'H1_c_Pl' - (31-8 drow-33.5, 35-11 drow-37.59, 13-4 drow-13.9, 24-5 drow-20.7)
# 'Mn_c_ASn-AV7' - (2-2 drow-7.7, 32-11 drow-37.94)
# 'V10-DPS_c_ANp' - (4- 2 drow-7.1,)
# 'H10_c_Pl' - (3-2 drow-7.17, 6-3 loss-13.12, 2-1 drow-7.1[1 loss-3.25], 2-1 drow-7.1)
# 'Mn_q_APS' - (6-2 loss-9.3, 29-21 win-33.8)
# 'Mn_t_PF' - (23-21 win-33.7, 27-23 win-5 bets)

In [None]:
# Find some aspects
seek_asp =  main_asps_lt[0] + '_1'
print(seek_asp)
seek_inds = list(map(lambda x: [x[0] for val in x[1] if seek_asp == val], enumerate(df.fs_asps_count)))
seek_inds = [x[0] for x in seek_inds if x != []]
seek_gid = list(df[df.index.isin(seek_inds)].game_id.values)
df[df.index.isin(seek_inds)]

In [None]:
df_main_asps.head(1)

In [None]:
df_main_asps[df_main_asps.game_id.isin(seek_gid)][['f_point', 's_point', 'type', 'approach', 'sing', 'tr_orb', 'bp_asp', 'den_point', 'fmain_ch', 'smain_ch', 'f_role', 's_role', 'fs_asps', 'fs_asps_count']]

In [None]:
# Find some aspects
seek_asp_2 = 'Mn_o_PS_8'
seek_inds_2 = list(map(lambda x: [x[0] for val in x[1] if seek_asp_2 == val], enumerate(df.fs_asps_count)))
seek_inds_2 = [x[0] for x in seek_inds_2 if x != []]
print('shape      ', df[df.index.isin(seek_inds_2)].shape[0])
df[df.index.isin(seek_inds_2)].result.value_counts(dropna=False)

In [None]:
df_exp = df[df.index.isin(seek_inds_2)].copy()
print('shape -', df_exp.shape[0])

In [None]:
count_win = df_exp[df_exp[['host_role', 'result', '1', '2']].apply(lambda x: StatDataPrepare.fav_win_bets_statistic_coeff(x[0], x[1], x[2], x[3]), axis=1).values > 0].result.count()
print('win - ', df_exp[['host_role', 'result', '1', '2']].apply(lambda x: StatDataPrepare.fav_win_bets_statistic_coeff(x[0], x[1], x[2], x[3]), axis=1).sum(), ' - ', count_win)

In [None]:
count_loss = df_exp[df_exp[['host_role', 'result', '1', '2']].apply(lambda x: StatDataPrepare.fav_loss_bets_statistic_coeff(x[0], x[1], x[2], x[3]), axis=1).values > 0].result.count()
print('loss -', df_exp[['host_role', 'result', '1', '2']].apply(lambda x: StatDataPrepare.fav_loss_bets_statistic_coeff(x[0], x[1], x[2], x[3]), axis=1).sum(), ' - ', count_loss)

In [224]:
count_drow = df_exp[df_exp[['result', 'X']].apply(lambda x: StatDataPrepare.fav_drow_bets_statistic_coeff(x[0], x[1]), axis=1).values > 0].result.count()
print('drow -', df_exp[['result', 'X']].apply(lambda x: StatDataPrepare.fav_drow_bets_statistic_coeff(x[0], x[1]), axis=1).sum(), ' - ', count_drow)

NameError: name 'df_exp' is not defined

In [None]:
seek_gid_2 = list(df[df.index.isin(seek_inds_2) & (df.result == 'fav_loss')].game_id)
seek_gid_2

In [None]:
df_main_asps[df_main_asps.game_id.isin(seek_gid_2)][['f_point', 's_point', 'type', 'approach', 'sing', 'tr_orb', 'bp_asp', 'den_point', 'fmain_ch', 'smain_ch', 'f_role', 's_role', 'fs_asps', 'fs_asps_count']]

In [None]:
# # Mean statistic
df.result.value_counts(dropna=False)

In [None]:
Stop *****************************************************************************************************************************

### OLD RESEARCH

In [None]:
## Load this file if need to check data about game
# with open('pickle_files/aspected_files/df_games_desc_cr_10_08_2022', 'rb') as f:
#     df_games_desc = pickle.load(f)
# print('shape - df_games_desc  -', df_games_desc.shape)    

In [None]:
# df_games_desc.tail(2)

In [None]:
# Create table with best aspects for Pre and Fav, which view more changes about mean count relation between ('fav_win'+'fav_drow'/'fav_loss') and ('fav_win'/'fav_drow'+'fav_loss')
# Add all aspect's changes for each game and create tabel with best game's rating changes.

In [None]:
with open('pickle_files/aspected_files/df_asps_table_cr_16_09_2022', 'rb') as f:
    df_asps_tb = pickle.load(f)
print('shape - df_asps_table  -', df_asps_tb.shape)    

In [None]:
df_asps_tb.tail(2)

#### Fix errors in aspects orbs values

In [None]:
df_orbs = df_asps_tb.tr_orb.value_counts(dropna=False, ascending=True).reset_index()

In [None]:
# 'index' col it's tr_orb values
df_orbs.head()

In [None]:
df_asps_tb.tr_orb = df_asps_tb.tr_orb.map(lambda x: StatDataPrepare.fix_erorr_aspects_values(x))

In [None]:
df_asps_tb.tr_orb.max()

#### Restructurisation aspects by rules

In [None]:
# Check some possible errors:
# Haven't aspects between 'Pars Fortuna' - 'Antes Pluto' - it aspects exist as 'Antes Pars Fortuna' - 'Pluto'
# Haven't aspects between 'Asc' - 'Pars Fortuna' - ?

In [None]:
# Keep one position from Rules in 'fmain_ch' and 'smain_ch'.
# Concatinate f_point with fmain_ch, and s_point with smain_ch in two new columns.

In [None]:
# Main rules:
# Rules for correct work manifestation of aspects in 'fmain_ch' and 'smain_ch' columns.
# 
# 'Moon' can't be any ruler or his antes:'ruler_asc', 'ruler_pars_fortuna', 'antes_ruler_mc' .
# 'Saturn' can be only: 'ruler_asc', 'ruler_desc', 'antes_ruler_asc', 'antes_ruler_desc', if 'Saturn' has multiple rulers position as (ruler_asc, ruler_pars_fortuna) - 
#          keep only 'ruler_asc'. 

# For each planet besides ('Moon', 'Saturn') if it has ruler position as 'ruler_asc', 'ruler_desc' and their 'antes' - keep only it and in multiple position too. -----
# For each planet besides ('Moon', 'Saturn') if it has ruler position as 'ruler_pars_fortuna' and other position as 'ruler_mc', 'ruler_ic' and their antes 
#          kepp 'ruler_pars_fortuna' and in multiple position too. ---------

# Remove planets without description besides 'Moon', 'Saturn', 'Sun' and higher planets. ---------

In [None]:
# Secondary rules:
# Add first 'fsec_ch' or 'ssec_ch' characteristic for each septener's planet besides ('Moon', 'Saturn') if 'fmain_chg_chr' or 'smain_chg_chr' are empty.

#### Change aspects columns values by Main rules.

In [None]:
df_asps_tb['fmain_chg_chr'] = df_asps_tb[['f_point', 'fmain_ch']].apply(lambda x: StatDataPrepare.change_main_characteristic_by_rules(x[0], x[1]), axis=1)
df_asps_tb['smain_chg_chr'] = df_asps_tb[['s_point', 'smain_ch']].apply(lambda x: StatDataPrepare.change_main_characteristic_by_rules(x[0], x[1]), axis=1)

In [None]:
df_asps_tb.fmain_chg_chr.value_counts(dropna=False)

In [None]:
df_asps_tb.smain_chg_chr.value_counts(dropna=False)

In [None]:
df_asps_tb[df_asps_tb.f_point == 'Moon'].fmain_chg_chr.value_counts(dropna=False)

In [None]:
df_asps_tb[df_asps_tb.s_point == 'Saturn'].smain_chg_chr.value_counts(dropna=False)

In [None]:
df_asps_tb[df_asps_tb.f_point == 'Sun'].fmain_chg_chr.value_counts(dropna=False)

In [225]:
df_asps_tb['fpoint_chr'] = df_asps_tb[['f_point', 'fmain_chg_chr']].apply(lambda x: x[0] if x[1] == None else x[1], axis=1)
df_asps_tb['spoint_chr'] = df_asps_tb[['s_point', 'smain_chg_chr']].apply(lambda x: x[0] if x[1] == None else x[1], axis=1)

NameError: name 'df_asps_tb' is not defined

In [None]:
print('len - ', len(df_asps_tb.fpoint_chr.value_counts(dropna=False).keys().tolist()))
str(df_asps_tb.fpoint_chr.value_counts(dropna=False).keys().tolist())

In [None]:
print('len - ', len(df_asps_tb.spoint_chr.value_counts(dropna=False).keys().tolist()))
str(df_asps_tb.spoint_chr.value_counts(dropna=False).keys().tolist())

#### Add values by Secondary rules.

In [None]:
df_asps_tb['fsec_chg_chr'] = df_asps_tb[['f_point', 'fsec_ch']].apply(lambda x: StatDataPrepare.change_sec_characteristic_by_rules(x[0], x[1]), axis=1)
df_asps_tb['ssec_chg_chr'] = df_asps_tb[['s_point', 'ssec_ch']].apply(lambda x: StatDataPrepare.change_sec_characteristic_by_rules(x[0], x[1]), axis=1)

In [None]:
df_asps_tb.fsec_chg_chr.value_counts(dropna=False)

In [None]:
df_asps_tb.ssec_chg_chr.value_counts(dropna=False)

In [None]:
points_for_sec_chr_lt = ['Sun', 'Mercury', 'Venus', 'Mars', 'Jupiter']

#### Chek points without both characteristic

In [None]:
df_asps_tb[(df_asps_tb.f_point == 'Mercury') & (df_asps_tb.fmain_ch == ()) & (df_asps_tb.fsec_ch == ())].head()
# Only 'Sun' haven't

In [None]:
df_asps_tb[(df_asps_tb.f_point == 'Mars') & df_asps_tb.fmain_chg_chr.isna() & df_asps_tb.fsec_chg_chr.isna()]

In [None]:
df_asps_tb.fpoint_chr = df_asps_tb[['fpoint_chr', 'fsec_chg_chr']].apply(lambda x: x[1] if x[0] in points_for_sec_chr_lt and x[1] != None else x[0], axis=1)
df_asps_tb.spoint_chr = df_asps_tb[['spoint_chr', 'ssec_chg_chr']].apply(lambda x: x[1] if x[0] in points_for_sec_chr_lt and x[1] != None else x[0], axis=1)

In [None]:
print('len - ', len(df_asps_tb.fpoint_chr.value_counts(dropna=False).keys().tolist()))
str(df_asps_tb.fpoint_chr.value_counts(dropna=False).keys().tolist())

In [None]:
print('len - ', len(df_asps_tb.spoint_chr.value_counts(dropna=False).keys().tolist()))
str(df_asps_tb.spoint_chr.value_counts(dropna=False).keys().tolist())

##### ****************************************************************************

In [None]:
df_asps_tb['points_chr'] = df_asps_tb[['fpoint_chr', 'spoint_chr']].apply(lambda x: (x[0], x[1]), axis=1)

In [None]:
df_asps_tb.points_chr.value_counts(dropna=False).count()

In [None]:
# df_asps_tb.points_chr.value_counts(dropna=False)

In [None]:
df_asps_tb.head(1)

In [None]:
df_asps_table = df_asps_tb.copy()

In [None]:
# Stop

#### Clear and restructurisation some description of aspects.

##### Create df all 'diff' - 'sing' aspects besides aspects of houses.

In [None]:
df_planets_sing_diff = df_asps_table[(~df_asps_table.f_point.isin(['Asc, Dess', 'MC', 'IC'])) & (df_asps_table.sing == 'diff')].copy()
print('shape - ', df_planets_sing_diff.shape)
df_planets_sing_diff.tail(3)

In [None]:
# df_asps_table = df_asps_table[~df_asps_table.index.isin(df_planets_sing_diff.index)]
print('shape - ', df_asps_table.shape)
df_asps_table.tail(3)

In [None]:
df_planets_sing_diff.reset_index(drop=True, inplace=True)
df_asps_table.reset_index(drop=True, inplace=True)

#### Create df 'Moon' divergent, with more 5 orbs and 'moon_conv' aspects.

In [None]:
df_moon_diver = df_asps_table[df_asps_table.approach.isin(['moon_diver', 'moon_diver_weak'])].copy()
print('shape - ', df_moon_diver.shape)
df_moon_diver.tail(3)

In [None]:
df_moon_more_orbs = df_asps_table[(df_asps_table.f_point == 'Moon') & (df_asps_table.tr_orb > 5)].copy()
print('shape - ', df_moon_more_orbs.shape)
df_moon_more_orbs.tail(3)

In [None]:
df_moon_conv_asps = df_asps_table[df_asps_table.approach.isin(['moon_conv'])].copy()
print('shape - ', df_moon_conv_asps.shape)
df_moon_conv_asps.tail(3)

In [226]:
# df_asps_table = df_asps_table[(~df_asps_table.index.isin(df_moon_diver.index)) & (~df_asps_table.index.isin(df_moon_more_orbs.index)) & (~df_asps_table.index.isin(df_moon_conv_asps.index))].copy()
print('shape - ', df_asps_table.shape)
df_asps_table.tail(3)

NameError: name 'df_asps_table' is not defined

In [None]:
df_asps_table[df_asps_table.f_point == 'Moon'].approach.value_counts(dropna=False)

In [None]:
df_moon_diver.reset_index(drop=True, inplace=True)
df_moon_more_orbs.reset_index(drop=True, inplace=True)
df_moon_conv_asps.reset_index(drop=True, inplace=True)
df_asps_table.reset_index(drop=True, inplace=True)

#### Create df aspect's points without characteristic

In [None]:
str(df_asps_table.fpoint_chr.value_counts(dropna=False).keys().tolist())

In [None]:
str(df_asps_table.spoint_chr.value_counts(dropna=False).keys().tolist())

In [None]:
point_chr_lt = ['Moon', 'Antes Moon', 'Pars Fortuna', 'Antes Pars Fortuna', 'Asc', 'Desc', 'MC', 'IC', 'Sun', 'Saturn', 'Uranus',  'Neptune', 'Pluto', 'ruler_desc', 'ruler_asc', 'ruler_ic', 'ruler_mc', 
                'ruler_pars_fortuna', 'North Node', 'South Node',  'antes_ruler_asc', 'antes_ruler_desc', 'antes_ruler_mc', 'antes_ruler_ic', 'Antes Saturn',  'Antes Uranus', 'Antes Pluto', 'Antes Neptune']

In [None]:
df_wt_charact = df_asps_table[(~df_asps_table.fpoint_chr.isin(point_chr_lt)) | (~df_asps_table.spoint_chr.isin(point_chr_lt))].copy()
print('shape - ', df_wt_charact.shape)
df_wt_charact.tail(3)

In [None]:
# df_asps_table = df_asps_table[~df_asps_table.index.isin(df_wt_charact.index)].copy()
print('shape - ', df_asps_table.shape)
df_asps_table.tail(3)

In [None]:
df_wt_charact.reset_index(drop=True, inplace=True)
df_asps_table.reset_index(drop=True, inplace=True)

#### Create df divergent aspects for rulers and parses.

In [None]:
rulers_lt = ['ruler_desc', 'ruler_asc', 'ruler_ic', 'ruler_mc', 'ruler_pars_fortuna']
pars_lt   = ['Pars Fortuna', 'Antes Pars Fortuna']

In [None]:
df_asps_table.approach.value_counts()

In [None]:
mask = (df_asps_table.approach.isin(['diver', 'diver_weak', 'diver_denide'])
        & ((df_asps_table.fpoint_chr.isin(rulers_lt) & df_asps_table.spoint_chr.isin(pars_lt)) | (df_asps_table.spoint_chr.isin(rulers_lt) & df_asps_table.fpoint_chr.isin(pars_lt))))

In [None]:
df_rulers_diver_asps = df_asps_table[mask].copy()
print('shape - ', df_rulers_diver_asps.shape)
df_rulers_diver_asps.tail(3)

In [None]:
# df_asps_table = df_asps_table[~df_asps_table.index.isin(df_rulers_diver_asps.index)].copy()
print('shape - ', df_asps_table.shape)
df_asps_table.tail(3)

In [None]:
df_rulers_diver_asps.reset_index(drop=True, inplace=True)
df_asps_table.reset_index(drop=True, inplace=True)

#### Create df not determinate aspects of hight planets.

In [None]:
str(point_chr_lt)

In [None]:
hight_planets        = ['Uranus', 'Neptune', 'Pluto', 'Antes Uranus', 'Antes Pluto', 'Antes Neptune']
aspected_points_hpls = ['Asc', 'Desc', 'MC', 'IC',  'Pars Fortuna', 'Antes Pars Fortuna', 'ruler_pars_fortuna']

In [None]:
mask = (((df_asps_table.fpoint_chr.isin(hight_planets)) & (~df_asps_table.spoint_chr.isin(aspected_points_hpls))) | 
        ((df_asps_table.spoint_chr.isin(hight_planets)) & (~df_asps_table.fpoint_chr.isin(aspected_points_hpls))))

df_not_determinate_hpls_asps = df_asps_table[mask]
print('shape - ', df_not_determinate_hpls_asps.shape)
df_not_determinate_hpls_asps.tail(3)

In [None]:
# df_asps_table = df_asps_table[~df_asps_table.index.isin(df_not_determinate_hpls_asps.index)].copy()
print('shape - ', df_asps_table.shape)
df_asps_table.tail(3)

In [227]:
df_not_determinate_hpls_asps.reset_index(drop=True, inplace=True)
df_asps_table.reset_index(drop=True, inplace=True)

NameError: name 'df_not_determinate_hpls_asps' is not defined

#### Statistic data win, drow, loss.

In [None]:
df_uniq = df_asps_table.drop_duplicates(subset='game_id').reset_index(drop=True)
print('shape - ', df_uniq.shape)
coef_fav_win = round(df_uniq.shape[0] / df_uniq[df_uniq.result == 'fav_win'].shape[0], 2)
print('df_coef fav_win       - ', coef_fav_win)
coef_fav_drow_loss = round(df_uniq.shape[0]/(df_uniq[df_uniq.result=='fav_drow'].shape[0] + df_uniq[df_uniq.result=='fav_loss'].shape[0]),2)
print('df_coef fav_drow_loss - ', coef_fav_drow_loss)
coef_fav_drow = round(df_uniq.shape[0] / df_uniq[df_uniq.result=='fav_drow'].shape[0], 2)
print('df_coef fav_drow      - ', coef_fav_drow)
coef_fav_loss = round(df_uniq.shape[0] / df_uniq[df_uniq.result=='fav_loss'].shape[0], 2)
print('df_coef fav_loss      - ', coef_fav_loss)

#### Aspects research

##### Orbs of Moon characteristic: ______ 0.3 <= 'max',  0.3 <= 1 - 'mid_max',  1 <= 2.5 - 'middle',  2.5 <= 4 - 'mid_min',  4 > 'min'
##### All orbs of planets characteristic: __ 0.2 <= 'max',  0.2 <= 1 - 'mid_max',  1 <= 1.3 - 'middle',  1.3 <= 2 - 'mid_min',  2 > 'min'

##### ****************************************************************************************************************************************

##### All planets and Moon have aspects with different sings - ('diff' in 'sings' column ) and ('diff_sing' in 'den_point' column).
##### Moon's orbs has (value > 5 deg, with 'den_point' - 'more_orb') and (bp_asp == 'yes' < 1 deg )- with 'diver' description. 

#### 'Moon' aspects research

In [None]:
# df_asps_moon = df_asps_table[df_asps_table.f_point == 'Moon'].copy()
# print('shape - ', df_asps_moon.shape)
# df_asps_moon.tail(2)

In [None]:
# df_asps_moon.approach.value_counts(dropna=False)

#### Hauses aspects research

In [None]:
# house_names    = ['Asc', 'Desc', 'MC', 'IC']
# df_asps_houses = df_asps_table[df_asps_table.f_point.isin(house_names)].copy()
# print('shape - ', df_asps_houses.shape)
# df_asps_houses.tail(2)

In [None]:
# df_asps_houses.approach.value_counts(dropna=False)

In [None]:
# df_asps_houses['chg_approach'] = df_asps_houses.approach.map(lambda x: 'in' if re.findall('in', x) else 'out')

In [None]:
# df_asps_houses.chg_approach.value_counts(dropna=False)

#### 'Saturn' and 'Antes Saturn' aspects research

In [None]:
# mask = ((df_asps_table.f_point == 'Saturn') | (df_asps_table.f_point == 'Antes Saturn') | (df_asps_table.s_point == 'Saturn') | (df_asps_table.s_point == 'Antes Saturn'))
# df_asps_saturn = df_asps_table[mask].copy()
# print('shape - ', df_asps_saturn.shape)
# df_asps_saturn.tail(2)

#### 'Sun' and 'Antes Sun' aspects research

In [None]:
# mask = ((df_asps_table.f_point == 'Sun') | (df_asps_table.f_point == 'Antes Sun') | (df_asps_table.s_point == 'Sun') | (df_asps_table.s_point == 'Antes Sun'))
# df_asps_sun = df_asps_table[mask].copy()
# print('shape - ', df_asps_sun.shape)
# df_asps_sun.tail(2)

#### Aspects without Moon, Sun and Saturn

In [None]:
# game_ids_lt = df_asps_moon.index.tolist() + df_asps_saturn.index.tolist() + df_asps_sun.index.tolist() + df_asps_houses.index.tolist()
# df_asps_usual_pls = df_asps_table[~df_asps_table.index.isin(game_ids_lt)].copy()
# print('shape - ', df_asps_usual_pls.shape)
# df_asps_usual_pls.tail(2)

#### Research statistic with experiments:

In [None]:
df_asps_table.head(1)

In [None]:
df_experiments = df_asps_table # df_asps_usual_pls , df_asps_moon, df_asps_saturn, df_asps_sun, df_asps_houses
col_names = ['points_chr', 'type'] # 'orb_char', 'ligue', 'approach', 'type'   for houses: 'chg_approach'

gb_result_loss      = df_experiments.groupby(col_names).agg({'result': ['count', StatDataPrepare.fav_loss_count]}).reset_index()   
gb_result_drow      = df_experiments.groupby(col_names).agg({'result': ['count', StatDataPrepare.fav_drow_count]}).reset_index()
gb_result_drow_loss = df_experiments.groupby(col_names).agg({'result': ['count', StatDataPrepare.fav_drow_loss_count]}).reset_index() 

In [None]:
print('coef_fav_drow_loss - ', coef_fav_drow_loss)
print('coef_fav_drow      - ', coef_fav_drow)
print('coef_fav_loss      - ', coef_fav_loss)

In [None]:
gb_result_loss['exp_loss']           = round(gb_result_loss[('result', 'count')] / coef_fav_loss) 
gb_result_loss['loss_coef_perc']     = round(gb_result_loss[('result', 'count')] / gb_result_loss[('result', 'fav_loss_count')], 2)
gb_result_loss['loss_coef_perc_dev'] = round(100 * (gb_result_loss.loss_coef_perc / coef_fav_loss - 1))

In [None]:
gb_result_drow['exp_drow']           = round(gb_result_drow[('result', 'count')] / coef_fav_drow) 
gb_result_drow['drow_coef_perc']     = round(gb_result_drow[('result', 'count')] / gb_result_drow[('result', 'fav_drow_count')], 2)
gb_result_drow['drow_coef_perc_dev'] = round(100 * (gb_result_drow.drow_coef_perc / coef_fav_drow - 1))

In [None]:
gb_result_drow_loss['exp_drow_loss']           = round(gb_result_drow_loss[('result', 'count')] / coef_fav_drow_loss) 
gb_result_drow_loss['drow_loss_coef_perc']     = round(gb_result_drow_loss[('result', 'count')] / gb_result_drow_loss[('result', 'fav_drow_loss_count')], 2)
gb_result_drow_loss['drow_loss_coef_perc_dev'] = round(100 * (gb_result_drow_loss.drow_loss_coef_perc / coef_fav_drow_loss - 1))

In [None]:
col_names = [('result', 'fav_drow_count'), ('exp_drow',''), ('drow_coef_perc', ''), ('drow_coef_perc_dev', '')]
gb_result = pd.merge(gb_result_loss, gb_result_drow[col_names], how='left', left_index=True, right_index=True)

In [None]:
col_names = [('result', 'fav_drow_loss_count'), ('exp_drow_loss',''), ('drow_loss_coef_perc', ''), ('drow_loss_coef_perc_dev', '')]
gb_result = pd.merge(gb_result, gb_result_drow_loss[col_names], how='left', left_index=True, right_index=True)

In [228]:
# Stop - aspects count 500

In [229]:
gb_result = gb_result[gb_result[('result', 'count')] > 500].sort_values(by='loss_coef_perc_dev', ascending=False)

NameError: name 'gb_result' is not defined

In [None]:
# gb_result[gb_result.points_chr == ('Moon', 'Pars Fortuna')] #'Antes Pluto'

In [None]:
gb_result['houses_features'] = gb_result[['points_chr', 'type']].apply(lambda x: (x[0][0] +'-'+ x[0][1] +'-'+ x[1]), axis=1) # chg_approach

In [None]:
df_hauses_features = gb_result[['points_chr', 'type', 'loss_coef_perc_dev', 'drow_coef_perc_dev', 'drow_loss_coef_perc_dev', 'houses_features']].copy()
df_hauses_features = df_hauses_features.droplevel(1, axis=1)
print('shape - ', df_hauses_features.shape[0])
df_hauses_features.sort_values('loss_coef_perc_dev', ascending=True).head(10)

In [None]:
# Stop

In [None]:
# file = open('pickle_files/df_aspects_loss_drow_stat_count_more_500', 'wb')
# pickle.dump(df_hauses_features, file)  
# file.close()

In [None]:
df_gr_asps_tb = pd.merge(df_asps_table, df_hauses_features, how='left', left_on=['points_chr', 'type'], right_on=['points_chr', 'type'])
print('shape - ', df_gr_asps_tb.shape)
df_gr_asps_tb.tail()

In [None]:
# file = open('pickle_files/df_aspects_tb_with_features_cr_10_08_2022', 'wb')
# pickle.dump(df_gr_asps_tb, file)  
# file.close()

In [None]:
df_gr_asps_tb.houses_features = df_gr_asps_tb[['tr_orb', 'houses_features']].apply(lambda x: (x[1], x[0]), axis=1)

In [None]:
df_gr_asps_tb.houses_features[0]

In [None]:
df_gr_asps_tb[df_gr_asps_tb.game_id == '1708335'].loss_coef_perc_dev.sum()

In [None]:
print('shape - df_game_asps_loss_drow_stat - ', df_uniq.shape[0])

In [None]:
df_loss_drow_stat_sum = df_gr_asps_tb.groupby('game_id').agg({'loss_coef_perc_dev': 'sum', 'drow_coef_perc_dev': 'sum', 'drow_loss_coef_perc_dev': 'sum'}).reset_index() 
print('shape - ', df_loss_drow_stat_sum.shape)
df_loss_drow_stat_sum

In [None]:
# file = open('pickle_files/df_games_loss_drow_aspects_stat_cr_10_08_2022', 'wb')
# pickle.dump(df_loss_drow_stat_sum, file)  
# file.close()

In [None]:
df_game_asps_loss_drow_stat = pd.merge(df_uniq, df_loss_drow_stat_sum, how='left', left_on='game_id', right_on='game_id')
print('shape - ', df_game_asps_loss_drow_stat.shape)
df_game_asps_loss_drow_stat.tail()

In [None]:
df_game_asps_loss_drow_stat.ligue.value_counts()

In [None]:
df_exp = df_game_asps_loss_drow_stat[(df_game_asps_loss_drow_stat.loss_coef_perc_dev < -245)]#| (df_game_asps_loss_drow_stat.drow_loss_coef_perc_dev < -180)]
print('shape - df_exp - ', df_exp.shape)
coef_fav_win_exp = round(df_exp.shape[0] / df_exp[df_exp.result == 'fav_win'].shape[0], 2)
print('df__exp fav_win_exp       - ', coef_fav_win_exp)
coef_fav_drow_loss_exp = round(df_exp.shape[0]/(df_exp[df_exp.result=='fav_drow'].shape[0] + df_exp[df_exp.result=='fav_loss'].shape[0]),2)
print('df_exp fav_drow_loss - ', coef_fav_drow_loss_exp)
coef_fav_drow_exp = round(df_exp.shape[0] / df_exp[df_exp.result=='fav_drow'].shape[0], 2)
print('df_exp fav_drow      - ', coef_fav_drow_exp)
coef_fav_loss_exp = round(df_exp.shape[0] / df_exp[df_exp.result=='fav_loss'].shape[0], 2)
print('df_exp fav_loss      - ', coef_fav_loss_exp)

#### Change df coef with statistic data`

In [None]:
coef_fav_loss / coef_fav_loss_exp

#### One day count of games with statistic

In [None]:
df_exp.shape[0] / 16 / 12 / 4 # 16 - years, 12 - monthes, 4 - weeks

#### Create features:

In [None]:
gb_game_features = df_gr_asps_tb.groupby('game_id').houses_features.apply(list).reset_index()
print('shape - ', gb_game_features.shape)
gb_game_features.tail()

In [None]:
gb_game_features['houses_dicts'] = gb_game_features.houses_features.map(lambda x: {var[0]:var[1] for var in x})

In [None]:
gb_game_features.houses_dicts

In [None]:
all_houses_dicts_lt = gb_game_features.houses_dicts.tolist()

In [None]:
all_houses_dicts_lt[0]

In [None]:
df_games_features = pd.DataFrame.from_dict(all_houses_dicts_lt)

In [None]:
len(df_games_features.columns)

In [230]:
df_games_features.head(1)

NameError: name 'df_games_features' is not defined

In [None]:
# df_games_features.fillna(0, inplace=True)

In [None]:
# df_games_features.columns = map(str.lower, df_games_features.columns)
df_games_features.columns = df_games_features.columns.map(lambda x: re.sub(' ', '_', x))

In [None]:
print('shape - ', df_games_features.shape)
df_games_features.tail(3)

In [None]:
df_games_features = pd.merge(gb_game_features.game_id, df_games_features, how='left', left_index=True, right_index=True)

In [None]:
print('shape - ', df_games_features.shape)
df_games_features.tail(3)

In [None]:
df_uniq.result.value_counts(dropna=False)

In [None]:
df_uniq['fav_loss']      = df_uniq.result.map(lambda x: 1 if x == 'fav_loss' else 0)
df_uniq['fav_drow']      = df_uniq.result.map(lambda x: 1 if x == 'fav_drow' else 0)
df_uniq['fav_loss_drow'] = df_uniq.result.map(lambda x: 1 if x == 'fav_loss' or x == 'fav_drow' else 0)

In [None]:
df_uniq.fav_loss.value_counts(dropna=False)

In [None]:
df_uniq.fav_drow.value_counts(dropna=False)

In [None]:
df_uniq.fav_loss_drow.value_counts(dropna=False)

In [None]:
col_names = ['game_id', 'fav_loss', 'fav_drow', 'fav_loss_drow']
df_games_features = pd.merge(df_games_features, df_uniq[col_names], how='left', left_on='game_id', right_on='game_id')

In [None]:
print('shape - ', df_games_features.shape)
df_games_features.tail(3)

In [None]:
# Stop

In [None]:
# file = open('pickle_files/aspected_files/df_games_features_cr_22_09_2022', 'wb')
# pickle.dump(df_games_features, file)  
# file.close()

#### Dataframes for reserch and additional special aspects.

#### df_planets_sing_diff, df_moon_diver , df_moon_more_orbs, df_moon_conv_asps, df_wt_charact, df_rulers_diver_asps, df_not_determinate_hpls_asps.

#### Features importance

In [None]:
df_hauses_features.tail()

In [None]:
loss_coef_plus_lt = df_hauses_features[df_hauses_features.loss_coef_perc_dev > 5].houses_features.values.tolist()
print('len - loss_coef_plus_lt - ', len(loss_coef_plus_lt))
str(loss_coef_plus_lt[:3])

In [None]:
loss_coef_minus_lt = df_hauses_features[df_hauses_features.loss_coef_perc_dev < -5].houses_features.values.tolist()
print('len - loss_coef_minus_lt - ', len(loss_coef_minus_lt))
str(loss_coef_minus_lt[:3])

In [231]:
features_imp_lt = loss_coef_plus_lt + loss_coef_minus_lt
print('len - features_imp_lt - ', len(features_imp_lt))
str(features_imp_lt[:3])

NameError: name 'loss_coef_plus_lt' is not defined

In [None]:
# file = open('pickle_files/aspected_files/lt_features_imp_pl_5_mn_5_cr_17_08_2022', 'wb')
# pickle.dump(features_imp_lt, file)  
# file.close()

In [None]:
# Stop

#### Find all not zero features from target == 1

In [None]:
target_col = 'fav_loss'                    # *****************************************************************************************************
target_inds_lt = df_games_features[df_games_features[target_col] == 1].index.tolist()
print('len - ', len(target_inds_lt))
target_inds_lt[:3]

In [None]:
sr_target_features_dicts = gb_game_features[gb_game_features.index.isin(target_inds_lt)].houses_dicts
sr_target_features_dicts.head(2)

In [None]:
target_features_lt = []
_ = sr_target_features_dicts.map(lambda x: [target_features_lt.append(key) for key in x.keys()])

In [None]:
len(set(target_features_lt))

In [None]:
# Nearly all target features is in features.

#### Save csv files for PaperSpace

In [None]:
with open('/home/cryptobrahman/Own/football_competitions_research/pickle_files/aspected_files/df_games_features_cr_22_09_2022', 'rb') as f:
    df_games_features = pickle.load(f)
print('shape - ', df_games_features.shape)    

In [None]:
df_games_features.head()

In [None]:
# For getting col_names with num index in csv file:
df_col_inds_names = pd.DataFrame(data=df_games_features.columns, columns=['col_names'])
df_col_inds_names

## df_col_inds_names.to_csv('files_paperspace/df_col_inds_names_cr_22_09_2022.csv', header=True, index=False)

In [None]:
# with open('/home/cryptobrahman/Own/football_competitions_research/pickle_files/aspected_files/lt_features_imp_pl_5_mn_5_cr_17_08_2022', 'rb') as f:
#     lt_features_imp = pickle.load(f)
# print('len - ', len(lt_features_imp))  
# str(lt_features_imp[:3])

In [None]:
# lt_features_imp = [x.lower() for x in lt_features_imp]
# lt_features_imp = [re.sub(' ', '_', x) for x in lt_features_imp] 
# str(lt_features_imp[:3])

In [None]:
# inds_future_imp_cols = []
# for ind, col in enumerate(df_games_features.columns):
#     if col in lt_features_imp:
#            inds_future_imp_cols.append(ind)

In [None]:
# print('len - ', len(inds_future_imp_cols))
# inds_future_imp_cols[:3]

In [None]:
# df_games_features.columns[5] in lt_features_imp

In [None]:
# with open('files_paperspace/lt_indexes_fut_imp_pl_5_mn_5_cr_17_08_2022.csv', 'w', newline='') as f:
#     wr = csv.writer(f, quoting=csv.QUOTE_ALL)
#     wr.writerow(inds_future_imp_cols)

In [None]:
df_games_features.columns = range(len(df_games_features.columns))

In [None]:
# df_games_features.head(1)

In [232]:
# df_games_features.rename(columns={0: 'id', 1462: 'l', 1463: 'd', 1464: 'ld'}, inplace=True)

In [233]:
df_games_features.head(1)

NameError: name 'df_games_features' is not defined

In [None]:
# df_games_features.to_csv('files_paperspace/df_paperspace_cr_22_09_2022.csv', header=True, index=False)

In [None]:
# df = pd.read_csv('files_paperspace/df_paperspace_cr_22_09_2022.csv')

In [None]:
# df

In [None]:
# with open('files_paperspace/lt_indexes_fut_imp_pl_5_mn_5_cr_17_08_2022.csv', newline='') as f:
#     reader = csv.reader(f)
#     col_inds_future_imp = list(reader)[0]
#     col_inds_future_imp = list(map(int, col_inds_future_imp))

# col_inds_future_imp[:3]    