In [285]:
from datetime import datetime,  timedelta
import os
import re
import time
import pandas as pd
import numpy as np
from multiprocessing import Process, Manager, current_process

CATCH_DAYS = 14

In [286]:
#funcs
def generate_dates(
                    time_target: str
    ) -> list[datetime]:
    """
    створення сортированого списку дат csv-файлів з папки -  "past" або "future"
    """
    a = [
        datetime(
            year=int(f"20{file_name.split('_')[2]}"),
            month=int(file_name.split('_')[1]),
            day=int(file_name.split('_')[0]),
          )#.strftime("%d_%m_%y")
        for file_name in os.listdir(f'csv/{time_target}/')
    ]
    a.sort()
    return a


def filter_from_tuesday(
                        target_day: datetime, 
                        list_days: list[datetime]
    ) -> list[datetime] :
    """
    отримання списку дат, з вівторка по понеділок, включно
    результат = список з 7 дат
    """
    return list(filter(
        lambda x: target_day <= x < target_day + timedelta(days=CATCH_DAYS),
        list_days
    ))


def create_df_w_date(
                csv_name: str, 
                time_target: str
    ) -> pd.DataFrame :
    """
    створення датафрейму з колонкою дати 
    """
    df = pd.read_csv(f'csv/{time_target}/{csv_name}')
    df['date'] = csv_name[:8]
    return df


def get_final_df(
                time_target: str,
                day_target: datetime
    ) -> pd.DataFrame :
    """
    """
    list_days = generate_dates(time_target)
    filtred_list_days = filter_from_tuesday(day_target, list_days)

    file_ends = "_p.csv" if time_target == 'past' else "_f.csv"

    return pd.concat(
        [create_df_w_date(f"{days.strftime('%d_%m_%y')}{file_ends}", time_target) for days in filtred_list_days],
        ignore_index=True
    ) 


def create_np_from_df(
                    df: pd.DataFrame,
                    time_target: str='past',
    ) -> np.ndarray:
    """
    - добавлення необхідних колонок до датафрейму
    - створення масиву даних готових для обробки
    """
    def add_column(
                    data: pd.Series, 
                    cur_team: pd.Series, 
                    target_team: pd.Series,
        ) -> list[int]:
        rez = []
        for index in data.index:
            try:
                z_h, p_h = data[index].split('-')
                z_h, p_h = (int(z_h), int(p_h)) if cur_team[index] == target_team[index] else (int(p_h), int(z_h))

                rez.append(z_h - p_h) 
            except ValueError:
                rez.append(-999) 
        return rez
    
    def add_rez_column(
                    data: pd.Series, 
                    cur_team: pd.Series, 
                    target_team: pd.Series,
        ) -> list[int]:
        """
        створення колонки тотала більше 1.5 
        коланка буде містити  - 1 або 0 або None
        """
        rez = []
        for index in data.index:
            try:
                z_h, p_h = data[index].split('-')
                z_h, p_h = (int(z_h), int(p_h)) if cur_team[index] == target_team[index] else (int(p_h), int(z_h))

                rez.append(
                    int(abs(z_h + p_h) > 1.5)
                ) 
            except ValueError:
                rez.append(None) 
        return rez


    df['rez_h_f_half'] = add_column(
                                    data=df['h_match_f_half'], 
                                    cur_team=df['cur_match_h_team'], 
                                    target_team=df['h_match_h_team']
                                )
    df['rez_h_match'] = add_column(
                                    data=df['h_match_score'], 
                                    cur_team=df['cur_match_h_team'], 
                                    target_team=df['h_match_h_team']
                                )

    df['rez_a_f_half'] = add_column(
                                    data=df['a_match_f_half'], 
                                    cur_team=df['cur_match_a_team'], 
                                    target_team=df['a_match_h_team']
                                )
    df['rez_a_match'] = add_column(
                                    data=df['a_match_score'], 
                                    cur_team=df['cur_match_a_team'], 
                                    target_team=df['a_match_h_team']
                                )
    df['index'] = df.index
    if time_target == 'past':
        df['rez_c_f_half'] = add_rez_column(
                                        data=df['cur_match_f_half'], 
                                        cur_team=df['cur_match_h_team'], 
                                        target_team=df['cur_match_h_team']
                                    )
        df['rez_c_match'] = add_rez_column(
                                        data=df['cur_match_score'], 
                                        cur_team=df['cur_match_h_team'], 
                                        target_team=df['cur_match_h_team']
                                    )
        df.dropna(inplace=True)
        return  df[[
                    'rez_h_f_half','rez_h_match',
                    'rez_a_f_half', 'rez_a_match',
                    'rez_c_f_half', 'rez_c_match',
                    'index'
                ]].to_numpy()
    
    df.dropna(inplace=True)
    return  df[[
                'rez_h_f_half','rez_h_match',
                'rez_a_f_half', 'rez_a_match',
                'index'
            ]].to_numpy()


def predict_past(
            df_np: np.ndarray,
            proc: float,
            count_games: int,
            verbose=True
    ) -> list[list[int]]:
    """
    """
    lists = range(-5,5+1)
    total_rez = []
    
    for h_f in lists:
        for h_score in lists:
            for a_f in lists:
                for a_score in lists:
                    rez = df_np[ 
                        (df_np[:,0] == h_f) &
                        (df_np[:,1] == h_score) &
                        (df_np[:,2] == a_f) &
                        (df_np[:,3] == a_score) 
                    ]
                    if rez.shape[0] > 0:
                        
                        
                        r_h_f_tb = rez[rez[:,4] == 1].shape[0]
                        r_h_f_tm = rez[rez[:,4] == 0].shape[0] 
                        proc_f_tb = r_h_f_tb / (r_h_f_tb + r_h_f_tm) if r_h_f_tm > 0 else 1
                        
                        
                        # r_h_score_tb = rez[rez[:,5] == 1].shape[0]
                        # r_h_score_tm = rez[rez[:,5] == 0].shape[0] 
                        # proc_score = r_h_score_tb/(r_h_score_tb+r_h_score_tm)
                        # if proc_f_tb >= proc and (r_h_f_tb + r_h_f_tm) >= count_games:
                        if proc_f_tb >= proc and (r_h_f_tb + r_h_f_tm) >= count_games:

                        # if proc_score >= proc and (r_h_score+r_a_score) >= count_games:
                        # if proc_score <= proc and (r_h_score+r_a_score) >= count_games:
                            total_rez.append([h_f, h_score, a_f, a_score])
                            if verbose is True:
                                print(
                                    f'{h_f:2d} {h_score:2d} {a_f:2d} {a_score:2d}'
                                    f' ||| f_h =>{r_h_f_tb:3d} {r_h_f_tm:3d} = {proc_f_tb:.3f}'
                                    # f' ||| score{r_h_score:3d} {r_a_score:3d}= {proc_score:.3f}'
                                )

    return total_rez



def check_predict(
                df_np: np.ndarray,
                list_rez: list[list[int]],
                time_target: str='past'
    ) -> dict[str, int]:
    total_rez = {
                'tb': 0,
                'tm': 0,
                'total_games': 0,
                'indexes': []
    }
    for h_f, h_score, a_f, a_score in list_rez:
        r = df_np[
                (df_np[:,0] == h_f) &
                (df_np[:,1] == h_score) &
                (df_np[:,2] == a_f) &
                (df_np[:,3] == a_score) 
            ]
        if time_target == 'future':
            print(f'{r[:, 4]}',h_f, h_score, a_f, a_score)
            total_rez['indexes'] = [*total_rez['indexes'],*r[:, 4]]
            continue
            
        r_h_f_tb = r[r[:,4] == 1] 
        r_h_f_tm = r[r[:,4] == 0] 
        
 
        total_rez['tb'] += r_h_f_tb.shape[0]
        total_rez['tm'] += r_h_f_tm.shape[0]
        total_rez['total_games'] += r.shape[0]
        total_rez['indexes'] = [*total_rez['indexes'],*r[:, 6]]
        
#         print(f'{r[:, 6]}',h_f, h_score, a_f, a_score, '||', r_h_f_tb.shape[0], r.shape[0])
#         print(h_f, h_score, a_f, a_score, '||', r_h_f_tb.shape[0], r_h_f_tm.shape[0])
    return total_rez     

In [290]:
## отримання датафреймів(пачка з 7 днів)   для обробки, 
## df_work - для предсказання, 
## df_target - для перевірки 

time_target = 'past' 
# time_target = 'future' 
day_target = datetime(year=2023, month=4, day=4)

df_work = get_final_df(time_target=time_target, day_target=day_target)
df_target = get_final_df(time_target=time_target, day_target=day_target+timedelta(days=CATCH_DAYS))
# df_work
# df_target

In [291]:
## отримання numpy.ndarray готових до обробки 
## df_np_work - для предсказання, 
## df_np_target - для перевірки 

df_np_work = create_np_from_df(df_work)
df_np_target = create_np_from_df(df_target)

In [307]:
# отримання найкращих стат
for proc_work in np.arange(0.4, 0.9, 0.01):
    for tot_work in np.arange(3, 50, 2):
        rez = predict_past(df_np_work, proc_work, tot_work, verbose=False)   
        a = check_predict(df_np_target, rez)  
        if a['total_games'] == 0: continue
        tb = a['tb'] / a['total_games']
        tm = a['tm'] / a['total_games']
        if tb >= 0.5:
            
            print(
                f'proc_work={proc_work:.3f}, tot_work={tot_work} => '
#                 f'tb {tb:.3f}  || '
                f'tb {tb:.3f} ',
                f'tot_target= {a["total_games"]} \n',

            )

proc_work=0.800, tot_work=4 => tb 1.000  tot_target= 1 

proc_work=0.810, tot_work=4 => tb 1.000  tot_target= 1 

proc_work=0.820, tot_work=4 => tb 1.000  tot_target= 1 

proc_work=0.830, tot_work=4 => tb 1.000  tot_target= 1 

proc_work=0.840, tot_work=4 => tb 1.000  tot_target= 1 

proc_work=0.850, tot_work=4 => tb 1.000  tot_target= 1 

proc_work=0.860, tot_work=4 => tb 1.000  tot_target= 1 

proc_work=0.870, tot_work=4 => tb 1.000  tot_target= 1 

proc_work=0.880, tot_work=4 => tb 1.000  tot_target= 1 

proc_work=0.890, tot_work=4 => tb 1.000  tot_target= 1 



In [317]:
## отримання  предсказання
# rez = predict_past(df_np_work, 0.83, 3)  # перевірено
rez = predict_past(df_np_work, 0.83, 3)  # тест

rez

 0 -1  0 -3 ||| f_h =>  3   0 = 1.000
 0  0 -3 -3 ||| f_h =>  3   0 = 1.000
 1 -2  0 -1 ||| f_h =>  3   0 = 1.000
 2  2  1  3 ||| f_h =>  4   0 = 1.000


[[0, -1, 0, -3], [0, 0, -3, -3], [1, -2, 0, -1], [2, 2, 1, 3]]

In [313]:
a = check_predict(df_np_target, rez)  
if a['total_games'] > 0:
    tb = a['tb'] / a['total_games']
    tm = a['tm'] / a['total_games']
    print(
        a, '\n', 
        f'tb {tb:.3f}  || '
        f'tm {tm:.3f} \n',

    )

{'tb': 4, 'tm': 1, 'total_games': 5, 'indexes': [131, 769, 2155, 3147, 971]} 
 tb 0.800  || tm 0.200 



In [304]:
# df_target.loc[a['indexes']]['rez_c_f_half'].value_counts()
df_target.loc[a['indexes']] 

Unnamed: 0,cur_match_h_team,cur_match_a_team,cur_match_score,cur_match_f_half,cur_match_s_half,h_match_h_team,h_match_a_team,h_match_score,h_match_f_half,h_match_s_half,...,a_match_f_half,a_match_s_half,date,rez_h_f_half,rez_h_match,rez_a_f_half,rez_a_match,index,rez_c_f_half,rez_c_match
131,Аль-Дафра,Хаур-Факкан,1-3,0 - 2,1 - 1,Аль-Шаржа,Аль-Дафра,1-0,0 - 0,1 - 0,...,0 - 0,0 - 3,18_04_23,0,-1,0,-3,131,1,1
769,Русенгорд (Ж),Виттсье (Ж),4-0,3 - 0,1 - 0,Юргорден (Ж),Русенгорд (Ж),2-1,0 - 0,2 - 1,...,0 - 0,0 - 3,21_04_23,0,-1,0,-3,769,1,1
2155,Сан-Паулу,Америка МГ,3-0,1 - 0,2 - 0,Ботафого,Сан-Паулу,2-1,1 - 1,1 - 0,...,0 - 0,0 - 3,23_04_23,0,-1,0,-3,2155,0,1
3147,Гамба Осака,Йокогама ФК,1-1,1 - 1,0 - 0,Киото,Гамба Осака,2-1,1 - 1,1 - 0,...,0 - 0,0 - 3,23_04_23,0,-1,0,-3,3147,1,1
971,Манчестер Сити U18,Сандерленд U18,4-2,4 - 1,0 - 1,Манчестер Юнайтед U18,Манчестер Сити U18,1-3,0 - 2,1 - 1,...,1 - 0,3 - 1,22_04_23,2,2,1,3,971,1,1


In [232]:
# df_work[
#         (df_work['rez_h_f_half'] == -1) &
#         (df_work['rez_h_match']  == -3) &
#         (df_work['rez_a_f_half'] == -1) &
#         (df_work['rez_a_match']  == -1) 
#     ]

df_target[
        (df_target['rez_h_f_half'] == -1) &
        (df_target['rez_h_match']  == -3) &
        (df_target['rez_a_f_half'] == -1) &
        (df_target['rez_a_match']  == -1) 
    ]

Unnamed: 0,cur_match_h_team,cur_match_a_team,cur_match_score,cur_match_f_half,cur_match_s_half,h_match_h_team,h_match_a_team,h_match_score,h_match_f_half,h_match_s_half,...,a_match_f_half,a_match_s_half,date,rez_h_f_half,rez_h_match,rez_a_f_half,rez_a_match,index,rez_c_f_half,rez_c_match
533,Витоша Бистрица,Спартак Плевен,2-2,2 - 2,0 - 0,Струмска Слава,Витоша Бистрица,3-0,1 - 0,2 - 0,...,0 - 1,0 - 0,14_04_23,-1,-3,-1,-1,533,1,1
883,АСМ Оран,КР Темушен,0-2,0 - 0,0 - 2,Бен Акнун,АСМ Оран,4-1,1 - 0,3 - 1,...,0 - 1,1 - 1,15_04_23,-1,-3,-1,-1,883,0,1
1689,Денизлиспор,Кечиоренгюджю,0-4,0 - 2,0 - 2,Тузласпор,Денизлиспор,3-0,1 - 0,2 - 0,...,0 - 1,2 - 2,15_04_23,-1,-3,-1,-1,1689,1,1
1882,Блэк Старз,Кёниц,2-1,2 - 0,0 - 1,Тун II,Блэк Старз,5-2,1 - 0,4 - 2,...,0 - 1,0 - 0,15_04_23,-1,-3,-1,-1,1882,1,1
2475,Giugliano,Потенца,3-2,2 - 2,1 - 0,Фоджа,Giugliano,3-0,1 - 0,2 - 0,...,0 - 1,1 - 1,16_04_23,-1,-3,-1,-1,2475,1,1
2945,Мутенице,Ратишковице,2-1,1 - 0,1 - 1,Kunstat,Мутенице,4-1,2 - 1,2 - 0,...,0 - 1,1 - 1,16_04_23,-1,-3,-1,-1,2945,0,1
2949,Bolatice,Долни Датыне,1-1,1 - 1,0 - 0,Вратимов,Bolatice,5-2,3 - 2,2 - 0,...,1 - 2,0 - 0,16_04_23,-1,-3,-1,-1,2949,1,1


In [315]:
## предсказання майбутніх матчів 
time_target = 'future' 
# day_target = datetime(year=2023, month=4, day=9)

df_future = pd.read_csv('csv/future/25_04_23_f.csv')
df_np_future = create_np_from_df(df_future ,time_target='future')
a = check_predict(
                df_np=df_np_future,
                list_rez=rez,
                time_target='future'
)
a

[] 0 -1 0 -3
[] 0 0 -3 -3
[166] 1 -2 0 -1
[45] 2 2 1 3


{'tb': 0, 'tm': 0, 'total_games': 0, 'indexes': [166, 45]}

In [316]:
df_future.loc[a['indexes']]

Unnamed: 0,cur_match_h_team,cur_match_a_team,cur_match_score,cur_match_f_half,cur_match_s_half,h_match_h_team,h_match_a_team,h_match_score,h_match_f_half,h_match_s_half,a_match_h_team,a_match_a_team,a_match_score,a_match_f_half,a_match_s_half,rez_h_f_half,rez_h_match,rez_a_f_half,rez_a_match,index
166,Нубленсе,Аудакс Итальяно,without_score,without_score,without_score,Нубленсе,Кокьюимбо,1-3,1 - 0,0 - 3,Аудакс Итальяно,Универсидад де Чили,1-2,1 - 1,0 - 1,1,-2,0,-1,166
45,Манчестер Юнайтед U18,Вулверхэмптон U18,without_score,without_score,without_score,Мидлсбро U18,Манчестер Юнайтед U18,0-2,0 - 2,0 - 0,Вулверхэмптон U18,Лидс U18,5-2,3 - 2,2 - 0,2,2,1,3,45


In [243]:
rez

[[-1, -3, -1, -1], [0, -2, -1, -1], [2, 2, 0, 0]]