In [1]:
from datetime import datetime,  timedelta
import os
import re
import time
import pandas as pd
import numpy as np
from multiprocessing import Process, Manager, current_process

CATCH_DAYS = 14

In [25]:
#funcs
def generate_dates(
                    time_target: str
    ) -> list[datetime]:
    """
    створення сортированого списку дат csv-файлів з папки -  "past" або "future"
    """
    a = [
        datetime(
            year=int(f"20{file_name.split('_')[2]}"),
            month=int(file_name.split('_')[1]),
            day=int(file_name.split('_')[0]),
          )#.strftime("%d_%m_%y")
        for file_name in os.listdir(f'csv/{time_target}/')
    ]
    a.sort()
    return a


def filter_from_tuesday(
                        target_day: datetime, 
                        list_days: list[datetime],
                        target_df: bool  = False,
    ) -> list[datetime] :
    """
    отримання списку дат, з вівторка по понеділок, включно
    результат = список з CATCH_DAYS дат
    """
    count_dates = 7 if target_df is True else CATCH_DAYS
    return list(filter(
        lambda x: target_day <= x < target_day + timedelta(days=count_dates),
        list_days
    ))


def create_df_w_date(
                csv_name: str, 
                time_target: str
    ) -> pd.DataFrame :
    """
    створення датафрейму з колонкою дати 
    """
    df = pd.read_csv(f'csv/{time_target}/{csv_name}')
    df['date'] = csv_name[:8]
    return df


def get_final_df(
                time_target: str,
                day_target: datetime,
                target_df: bool  = False,
    ) -> pd.DataFrame :
    """
    """
    list_days = generate_dates(time_target)
    filtred_list_days = filter_from_tuesday(day_target, list_days, target_df)

    file_ends = "_p.csv" if time_target == 'past' else "_f.csv"

    return pd.concat(
        [create_df_w_date(f"{days.strftime('%d_%m_%y')}{file_ends}", time_target) for days in filtred_list_days],
        ignore_index=True
    ) 


def create_np_from_df(
                    df: pd.DataFrame,
                    time_target: str='past',
    ) -> np.ndarray:
    """
    - добавлення необхідних колонок до датафрейму
    - створення масиву даних готових для обробки
    """
    def add_column(
                    data: pd.Series, 
                    cur_team: pd.Series, 
                    target_team: pd.Series,
        ) -> list[int]:
        rez = []
        for index in data.index:
            try:
                z_h, p_h = data[index].split('-')
                z_h, p_h = (int(z_h), int(p_h)) if cur_team[index] == target_team[index] else (int(p_h), int(z_h))

                rez.append(z_h - p_h) 
            except ValueError:
                rez.append(-999) 
        return rez
    
    def add_rez_column(
                    data: pd.Series, 
                    cur_team: pd.Series, 
                    target_team: pd.Series,
        ) -> list[int]:
        """
        створення колонки  - результата
        коланка буде містити  - 1 або 0 або None
        """
        rez = []
        for index in data.index:
            try:
                z_h, p_h = data[index].split('-')
                z_h, p_h = (int(z_h), int(p_h)) if cur_team[index] == target_team[index] else (int(p_h), int(z_h))

                rez.append(
                    1 if z_h == p_h else 0  # нічья
                ) 
            except ValueError:
                rez.append(None) 
        return rez


    df['rez_h_f_half'] = add_column(
                                    data=df['h_match_f_half'], 
                                    cur_team=df['cur_match_h_team'], 
                                    target_team=df['h_match_h_team']
                                )
    df['rez_h_match'] = add_column(
                                    data=df['h_match_score'], 
                                    cur_team=df['cur_match_h_team'], 
                                    target_team=df['h_match_h_team']
                                )

    df['rez_a_f_half'] = add_column(
                                    data=df['a_match_f_half'], 
                                    cur_team=df['cur_match_a_team'], 
                                    target_team=df['a_match_h_team']
                                )
    df['rez_a_match'] = add_column(
                                    data=df['a_match_score'], 
                                    cur_team=df['cur_match_a_team'], 
                                    target_team=df['a_match_h_team']
                                )
    df['index'] = df.index
    if time_target == 'past':
        df['rez_c_f_half'] = add_rez_column(
                                        data=df['cur_match_f_half'], 
                                        cur_team=df['cur_match_h_team'], 
                                        target_team=df['cur_match_h_team']
                                    )
        df['rez_c_match'] = add_rez_column(
                                        data=df['cur_match_score'], 
                                        cur_team=df['cur_match_h_team'], 
                                        target_team=df['cur_match_h_team']
                                    )
        df.dropna(inplace=True)
        return  df[[
                    'rez_h_f_half','rez_h_match',
                    'rez_a_f_half', 'rez_a_match',
                    'rez_c_f_half', 'rez_c_match',
                    'index'
                ]].to_numpy()
    
    df.dropna(inplace=True)
    return  df[[
                'rez_h_f_half','rez_h_match',
                'rez_a_f_half', 'rez_a_match',
                'index'
            ]].to_numpy()


def predict_past(
            df_np: np.ndarray,
            proc: float,
            count_games: int,
            verbose=True
    ) -> list[list[int]]:
    """
    """
    lists = range(-5,5+1)
    total_rez = []
    total_matches = 0
    
    for h_f in lists:
        for h_score in lists:
            for a_f in lists:
                for a_score in lists:
                    
                    rez = df_np[ 
                        (df_np[:,0] == h_f) &
                        (df_np[:,1] == h_score) &
                        (df_np[:,2] == a_f) &
                        (df_np[:,3] == a_score) 
                    ]
                    if rez.shape[0] == 0:
                        continue
                        
                    draw = rez[rez[:, 4] == 1].shape[0]
                   
                    win_1_2 = rez[rez[:, 4] == 0].shape[0] 
                    proc_draw = draw / (draw + win_1_2) if (draw + win_1_2) > 0 else 1.0


                   
                    if proc_draw >= proc and (draw + win_1_2) >= count_games:
                        
                        total_rez.append([h_f, h_score, a_f, a_score])
                        total_matches += rez.shape[0]

                        if verbose is True:
                            print(
                                f'{h_f:2d} {h_score:2d} {a_f:2d} {a_score:2d}'
                                f' ||| f_h =>{draw:3d} {win_1_2:3d} = {proc_draw:.3f}'
                            )

    return total_rez, total_matches



def check_predict(
                df_np: np.ndarray,
                list_rez: list[list[int]],
                time_target: str='past'
    ) -> dict[str, int]:
    total_rez = {
                'draw': 0,
                'win_1_2': 0,
                'total_games': 0,
                'indexes': []
    }
    for h_f, h_score, a_f, a_score in list_rez:
        r = df_np[
                (df_np[:,0] == h_f) &
                (df_np[:,1] == h_score) &
                (df_np[:,2] == a_f) &
                (df_np[:,3] == a_score) 
            ]
        if time_target == 'future':
            print(f'{r[:, 4]}',h_f, h_score, a_f, a_score)
            total_rez['indexes'] = [*total_rez['indexes'],*r[:, 4]]
            continue
            
        draw = r[r[:,4] == 1] 
        win_1_2 = r[r[:,4] == 0] 
        
 
        total_rez['draw'] += draw.shape[0]
        total_rez['win_1_2'] += win_1_2.shape[0]
        total_rez['total_games'] += r.shape[0]
        total_rez['indexes'] = [*total_rez['indexes'],*r[:, 6]]
        
#         print(f'{r[:, 6]}',h_f, h_score, a_f, a_score, '||', r_h_f_tb.shape[0], r.shape[0])
#         print(h_f, h_score, a_f, a_score, '||', r_h_f_tb.shape[0], r_h_f_tm.shape[0])
    return total_rez     

In [55]:
## отримання датафреймів(пачка з 7 днів)   для обробки, 
## df_work - для предсказання, 
## df_target - для перевірки 

time_target = 'past' 
# time_target = 'future' 
day_target = datetime(year=2023, month=5, day=2)
# day_target = datetime(year=2023, month=4, day=25)

df_work = get_final_df(
                time_target=time_target, 
                day_target=day_target
            )
df_target = get_final_df(
                time_target=time_target, 
                day_target=day_target+timedelta(days=CATCH_DAYS),
                target_df=True
            )
# df_work
# df_target

In [56]:
## отримання numpy.ndarray готових до обробки 
## df_np_work - для предсказання, 
## df_np_target - для перевірки 

df_np_work = create_np_from_df(df_work)
df_np_target = create_np_from_df(df_target)

In [28]:
# отримання найкращих стат
for proc_work in np.arange(0.51, 0.9, 0.01):
    for tot_work in np.arange(2, 30, 1):
        
        rez, tot_matches = predict_past(df_np_work, proc_work, tot_work, verbose=False)
#         print(proc_work, tot_work, tot_matches)
#         continue
        
#         if tot_matches >= 50: continue
            
        a = check_predict(df_np_target, rez) 
        
        if a['total_games'] == 0: continue
            
        draw = a['draw'] / a['total_games']
        win_1_2 = a['win_1_2'] / a['total_games']
        if draw >= 0.55:
            
            print(
                f'matches = {tot_matches} '
                f'proc_work={proc_work:.3f}, tot_work={tot_work} => '
#                 f'tb {tb:.3f}  || '
                f'draw {draw:.3f} ',
                f'tot_target= {a["total_games"]} \n',

            )

matches = 52 proc_work=0.600, tot_work=14 => draw 0.556  tot_target= 27 

matches = 52 proc_work=0.600, tot_work=15 => draw 0.556  tot_target= 27 

matches = 52 proc_work=0.600, tot_work=16 => draw 0.556  tot_target= 27 

matches = 36 proc_work=0.600, tot_work=17 => draw 0.632  tot_target= 19 

matches = 19 proc_work=0.600, tot_work=18 => draw 0.636  tot_target= 11 

matches = 19 proc_work=0.600, tot_work=19 => draw 0.636  tot_target= 11 

matches = 52 proc_work=0.610, tot_work=14 => draw 0.556  tot_target= 27 

matches = 52 proc_work=0.610, tot_work=15 => draw 0.556  tot_target= 27 

matches = 52 proc_work=0.610, tot_work=16 => draw 0.556  tot_target= 27 

matches = 36 proc_work=0.610, tot_work=17 => draw 0.632  tot_target= 19 

matches = 19 proc_work=0.610, tot_work=18 => draw 0.636  tot_target= 11 

matches = 19 proc_work=0.610, tot_work=19 => draw 0.636  tot_target= 11 

matches = 52 proc_work=0.620, tot_work=14 => draw 0.556  tot_target= 27 

matches = 52 proc_work=0.620, tot_work

In [57]:
## отримання  предсказання
# rez, tot_matches = predict_past(df_np_work, 0.83, 3)  # перевірено
rez, tot_matches = predict_past(df_np_work, 0.83, 3)  # тест
print(f'загальна кількість матчів = {tot_matches}')
rez

-1 -3 -2 -2 ||| f_h =>  3   0 = 1.000
-1 -3 -1  1 ||| f_h =>  3   0 = 1.000
-1  1 -2 -3 ||| f_h =>  3   0 = 1.000
 0  2  2  2 ||| f_h =>  5   1 = 0.833
 1 -1  0  2 ||| f_h =>  4   0 = 1.000
 1  2  0  2 ||| f_h =>  5   1 = 0.833
 1  3  1  0 ||| f_h =>  5   0 = 1.000
загальна кількість матчів = 30


[[-1, -3, -2, -2],
 [-1, -3, -1, 1],
 [-1, 1, -2, -3],
 [0, 2, 2, 2],
 [1, -1, 0, 2],
 [1, 2, 0, 2],
 [1, 3, 1, 0]]

In [58]:
a = check_predict(df_np_target, rez)  
if a['total_games'] > 0:
    draw = a['draw'] / a['total_games']
    win_1_2 = a['win_1_2'] / a['total_games']
    print(
        a, '\n', 
        f'draw {draw:.3f}  || '
        f'win_1_2 {win_1_2:.3f} \n',

    )

{'draw': 4, 'win_1_2': 2, 'total_games': 6, 'indexes': [293, 477, 2170, 2436, 727, 1256]} 
 draw 0.667  || win_1_2 0.333 



In [59]:
# df_target.loc[a['indexes']]['rez_c_f_half'].value_counts()
df_target.loc[a['indexes']] 

Unnamed: 0,cur_match_h_team,cur_match_a_team,cur_match_score,cur_match_f_half,cur_match_s_half,h_match_h_team,h_match_a_team,h_match_score,h_match_f_half,h_match_s_half,...,a_match_f_half,a_match_s_half,date,rez_h_f_half,rez_h_match,rez_a_f_half,rez_a_match,index,rez_c_f_half,rez_c_match
293,Нойштадт,Драссбург,0-1,0 - 0,0 - 1,Нойштадт,Виенер Спортклуб,0-3,0 - 1,0 - 2,...,0 - 1,2 - 0,18_05_23,-1,-3,-1,1,293,1,0
477,ЦСКА Ереван 2,Вест Армения,1-1,1 - 1,0 - 0,Мика,ЦСКА Ереван 2,0-2,0 - 0,0 - 2,...,2 - 0,1 - 1,19_05_23,0,2,2,2,477,1,1
2170,Каса Пиа,Эшторил,2-2,0 - 0,2 - 2,Порту,Каса Пиа,2-1,0 - 1,2 - 0,...,0 - 0,2 - 0,21_05_23,1,-1,0,2,2170,1,1
2436,Славия Градец,Яромерж,7-1,3 - 0,4 - 1,Хлумец-над-Цидлиноу (Б),Славия Градец,2-4,1 - 2,1 - 2,...,0 - 0,2 - 0,21_05_23,1,2,0,2,2436,0,0
727,Зноймо,Фридек-Мистек,0-0,0 - 0,0 - 0,Границе,Зноймо,0-3,0 - 1,0 - 2,...,1 - 0,0 - 1,19_05_23,1,3,1,0,727,1,1
1256,Ахмат,Химки,3-0,1 - 0,2 - 0,Динамо Москва,Ахмат,0-3,0 - 1,0 - 2,...,1 - 0,0 - 1,20_05_23,1,3,1,0,1256,0,0


In [31]:
df_work[
        (df_work['rez_h_f_half'] == -1) &
        (df_work['rez_h_match']  == -3) &
        (df_work['rez_a_f_half'] == -2) &
        (df_work['rez_a_match']  == -2) 
    ]

# df_target[
#         (df_target['rez_h_f_half'] == -1) &
#         (df_target['rez_h_match']  == -3) &
#         (df_target['rez_a_f_half'] == -2) &
#         (df_target['rez_a_match']  == -2) 
#     ]

Unnamed: 0,cur_match_h_team,cur_match_a_team,cur_match_score,cur_match_f_half,cur_match_s_half,h_match_h_team,h_match_a_team,h_match_score,h_match_f_half,h_match_s_half,...,a_match_f_half,a_match_s_half,date,rez_h_f_half,rez_h_match,rez_a_f_half,rez_a_match,index,rez_c_f_half,rez_c_match
783,Шибеник,Вараждин,0-2,0 - 0,0 - 2,Истра 1961,Шибеник,3-0,1 - 0,2 - 0,...,0 - 2,1 - 1,05_05_23,-1,-3,-2,-2,783,1,0
851,Буллс Академи,Сазерланд Шаркс,1-2,1 - 1,0 - 1,Блэктаун Сити,Буллс Академи,4-1,2 - 1,2 - 0,...,0 - 2,0 - 0,06_05_23,-1,-3,-2,-2,851,1,0
1367,Вислока Дебица,Уния Тарнув,0-0,0 - 0,0 - 0,Вязовница,Вислока Дебица,3-0,1 - 0,2 - 0,...,0 - 2,1 - 1,06_05_23,-1,-3,-2,-2,1367,1,1


In [18]:
## предсказання майбутніх матчів 
time_target = 'future' 
# day_target = datetime(year=2023, month=4, day=9)

df_future = pd.read_csv('csv/future/17_05_23_f.csv')
df_np_future = create_np_from_df(df_future ,time_target='future')
a = check_predict(
                df_np=df_np_future,
                list_rez=rez,
                time_target='future'
)
a

[] -2 -3 -1 -1
[] -2 -2 2 3
[112] -2 -1 0 -1
[] -1 -2 1 -1
[] 0 -2 2 3
[] 0 0 -1 -4
[] 0 0 1 -2
[] 0 1 -2 -3
[] 0 1 -2 -1
[] 1 4 -1 -1


{'tb': 0, 'tm': 0, 'total_games': 0, 'indexes': [112]}

In [19]:
df_future.loc[a['indexes']]

Unnamed: 0,cur_match_h_team,cur_match_a_team,cur_match_score,cur_match_f_half,cur_match_s_half,h_match_h_team,h_match_a_team,h_match_score,h_match_f_half,h_match_s_half,a_match_h_team,a_match_a_team,a_match_score,a_match_f_half,a_match_s_half,rez_h_f_half,rez_h_match,rez_a_f_half,rez_a_match,index
112,Тайпауэр,Taipei Deva Dragons,without_score,without_score,without_score,Тайпауэр,Хан Юэнь,1-2,0 - 2,1 - 0,Taipei Deva Dragons,Тайвань Шиху,1-2,1 - 1,0 - 1,-2,-1,0,-1,112


In [15]:
datetime(2023, 4, 18) + timedelta(days=15)

datetime.datetime(2023, 5, 3, 0, 0)