In [67]:
from datetime import datetime as dt, timedelta
import os
import re
import time
import pandas as pd
import numpy as np
from multiprocessing import Process, Manager, current_process

In [211]:
#funcs
def generate_dates(
                    time_target: str
    ) -> list[datetime]:
    """
    створення сортированого списку дат csv-файлів з папки -  "past" або "future"
    """
    a = [
        dt(
            year=int(f"20{file_name.split('_')[2]}"),
            month=int(file_name.split('_')[1]),
            day=int(file_name.split('_')[0]),
          )#.strftime("%d_%m_%y")
        for file_name in os.listdir(f'csv/{time_target}/')
    ]
    a.sort()
    return a


def filter_from_tuesday(
                        target_day: datetime, 
                        list_days: list[datetime]
    ) -> list[datetime] :
    """
    отримання списку дат, з вівторка по понеділок, включно
    результат = список з 7 дат
    """
    return list(filter(
        lambda x: target_day <= x < target_day + timedelta(days=7),
        list_days
    ))


def create_df_w_date(
                csv_name: str, 
                time_target: str
    ) -> pd.DataFrame :
    """
    створення датафрейму з колонкою дати 
    """
    df = pd.read_csv(f'csv/{time_target}/{csv_name}')
    df['date'] = csv_name[:8]
    return df


def get_final_df(
                time_target: str,
                day_target: datetime
    ) -> pd.DataFrame :
    """
    """
    list_days = generate_dates(time_target)
    filtred_list_days = filter_from_tuesday(day_target, list_days)

    file_ends = "_p.csv" if time_target == 'past' else "_f.csv"

    return pd.concat(
        [create_df_w_date(f"{days.strftime('%d_%m_%y')}{file_ends}", time_target) for days in filtred_list_days],
        ignore_index=True
    ) 


def create_np_from_df(
                    df: pd.DataFrame,
                    time_target: str='past',
    ) -> np.ndarray:
    """
    - добавлення необхідних колонок до датафрейму
    - створення масиву даних готових для обробки
    """
    def add_column(
                    data: pd.Series, 
                    cur_team: pd.Series, 
                    target_team: pd.Series,
        ) -> list[int]:
        rez = []
        for index in data.index:
            try:
                z_h, p_h = data[index].split('-')
                z_h, p_h = (int(z_h), int(p_h)) if cur_team[index] == target_team[index] else (int(p_h), int(z_h))

                rez.append(z_h - p_h) 
            except ValueError:
                rez.append(-999) 
        return rez


    df['rez_h_f_half'] = add_column(
                                    data=df['h_match_f_half'], 
                                    cur_team=df['cur_match_h_team'], 
                                    target_team=df['h_match_h_team']
                                )
    df['rez_h_match'] = add_column(
                                    data=df['h_match_score'], 
                                    cur_team=df['cur_match_h_team'], 
                                    target_team=df['h_match_h_team']
                                )

    df['rez_a_f_half'] = add_column(
                                    data=df['a_match_f_half'], 
                                    cur_team=df['cur_match_a_team'], 
                                    target_team=df['a_match_h_team']
                                )
    df['rez_a_match'] = add_column(
                                    data=df['a_match_score'], 
                                    cur_team=df['cur_match_a_team'], 
                                    target_team=df['a_match_h_team']
                                )
    df['index'] = df.index
    if time_target == 'past':
        df['rez_c_f_half'] = add_column(
                                        data=df['cur_match_f_half'], 
                                        cur_team=df['cur_match_h_team'], 
                                        target_team=df['cur_match_h_team']
                                    )
        df['rez_c_match'] = add_column(
                                        data=df['cur_match_score'], 
                                        cur_team=df['cur_match_h_team'], 
                                        target_team=df['cur_match_h_team']
                                    )
        return  df[[
                    'rez_h_f_half','rez_h_match',
                    'rez_a_f_half', 'rez_a_match',
                    'rez_c_f_half', 'rez_c_match',
                    'index'
                ]].to_numpy()
    return  df[[
                'rez_h_f_half','rez_h_match',
                'rez_a_f_half', 'rez_a_match',
                'index'
            ]].to_numpy()


def predict_past(
            df_np: np.ndarray,
            proc: float,
            count_games: int
    ) -> list[list[int]]:
    """
    """
    lists = range(-5,5+1)
    total_rez = []
    
    for h_f in lists:
        for h_score in lists:
            for a_f in lists:
                for a_score in lists:
                    rez = df_np[ 
                        (df_np[:,0] == h_f) &
                        (df_np[:,1] == h_score) &
                        (df_np[:,2] == a_f) &
                        (df_np[:,3] == a_score) 
                    ]

                    if rez.shape[0] > 0:
                        r_h_f = rez[rez[:,4] >= 0].shape[0]
                        r_h_score = rez[rez[:,5] >= 0].shape[0]


                        r_a_f = rez[rez[:,4] <= 0].shape[0]
                        r_a_score = rez[rez[:,5] <= 0].shape[0]
                        proc_f = r_h_f/(r_h_f+r_a_f)
                        proc_score = r_h_score/(r_h_score+r_a_score)

                        if proc_f >= proc and (r_h_f+r_a_f) >= count_games:
    #                     if proc_f <= proc and (r_h_f+r_a_f) >= count_games:

    #                     if proc_score >= proc and (r_h_score+r_a_score) >= count_games:
    #                     if proc_score <= proc and (r_h_score+r_a_score) >= count_games:
                            total_rez.append([h_f, h_score, a_f, a_score])
                            print(
                                f'{h_f:2d} {h_score:2d} {a_f:2d} {a_score:2d}'
                                f' ||| f_h =>{r_h_f:3d} {r_a_f:3d} = {proc_f:.3f}'
                                f' ||| score{r_h_score:3d} {r_a_score:3d}= {proc_score:.3f}')

    print('done')
    return total_rez

In [271]:
## отримання датафреймів(пачка з 7 днів)   для обробки, 
## df_work - для предсказання, 
## df_target - для перевірки 

time_target = 'past' 
# time_target = 'future' 
day_target = dt(year=2023, month=3, day=21)

df_work = get_final_df(time_target=time_target, day_target=day_target)
df_target = get_final_df(time_target=time_target, day_target=day_target+timedelta(days=7))
df_work

Unnamed: 0,cur_match_h_team,cur_match_a_team,cur_match_score,cur_match_f_half,cur_match_s_half,h_match_h_team,h_match_a_team,h_match_score,h_match_f_half,h_match_s_half,a_match_h_team,a_match_a_team,a_match_score,a_match_f_half,a_match_s_half,date
0,Motz/Silz,Имст,2-3,2 - 2,0 - 1,Motz/Silz,Куфштайн,1-1,1 - 1,0 - 0,Имст,СВ Воргль,1-2,1 - 0,0 - 2,21_03_23
1,Аккрингтон,Плимут,0-2,0 - 1,0 - 1,Аккрингтон,Милтон Кинс,0-1,0 - 1,0 - 0,Плимут,Форест Грин,2-0,1 - 0,1 - 0,21_03_23
2,Барнсли,Шеффилд Уэнсдей,4-2,2 - 1,2 - 1,Уикомб Уондерерз,Барнсли,0-1,0 - 0,0 - 1,Шеффилд Уэнсдей,Болтон,1-1,1 - 1,0 - 0,21_03_23
3,Бредфорд Сити,Карлайл Юнайтед,0-0,0 - 0,0 - 0,Бредфорд Сити,Хартлпул Юнайтед,2-2,0 - 1,2 - 1,Карлайл Юнайтед,Стивенейдж,0-0,0 - 0,0 - 0,21_03_23
4,Гиллингем,Крю,2-1,0 - 0,2 - 1,Уолсолл,Гиллингем,2-0,0 - 0,2 - 0,Нортхэмптон,Крю,1-0,1 - 0,0 - 0,21_03_23
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1992,Лос Андес,Deportes Rengo,1-1,0 - 0,1 - 1,Депортес Консепсьон,Лос Андес,2-3,0 - 3,2 - 0,Deportes Rengo,Хенераль Веласкес,1-0,1 - 0,0 - 0,27_03_23
1993,Рейнджерс (Ж),Селтик (Ж),1-1,1 - 0,0 - 1,Рейнджерс (Ж),Гамильтон (Ж),6-0,5 - 0,1 - 0,Хиберниан (Ж),Селтик (Ж),0-2,0 - 2,0 - 0,27_03_23
1994,Макара,Cuniburo,0-0,0 - 0,0 - 0,Чакаритас,Макара,0-1,0 - 1,0 - 0,Cuniburo,Америка Кито,1-3,0 - 3,1 - 0,27_03_23
1995,Портмор,Faulkland,4-0,2 - 0,2 - 0,Данбихолден,Портмор,1-3,0 - 0,1 - 3,Faulkland,Кавальер,2-2,1 - 0,1 - 2,27_03_23


In [272]:
## отримання numpy.ndarray готових до обробки 
## df_np_work - для предсказання, 
## df_np_target - для перевірки 

df_np_work = create_np_from_df(df_work)
df_np_target = create_np_from_df(df_target)

## отримання  предсказання
rez = predict_past(df_np_work, 0.79, 5)
rez

-1 -2 -1 -2 ||| f_h =>  4   1 = 0.800 ||| score  4   3= 0.571
 1 -1  0 -1 ||| f_h =>  8   2 = 0.800 ||| score  8   2= 0.800
 1  1  1  2 ||| f_h =>  4   1 = 0.800 ||| score  3   2= 0.600
 1  2  2  3 ||| f_h =>  4   1 = 0.800 ||| score  3   1= 0.750
done


[[-1, -2, -1, -2], [1, -1, 0, -1], [1, 1, 1, 2], [1, 2, 2, 3]]

In [273]:
def check_predict(
                df_np: np.ndarray,
                list_rez: list[list[int]],
                time_target: str='past'
    ) -> dict[str, int]:
    total_rez = {
                'k1': 0,
                'k2': 0,
                'total_games': 0,
                'indexes': []
    }
    for h_f, h_score, a_f, a_score in list_rez:
        r = df_np[
                (df_np[:,0] == h_f) &
                (df_np[:,1] == h_score) &
                (df_np[:,2] == a_f) &
                (df_np[:,3] == a_score) 
            ]
        if time_target == 'future':
            print(f'{r[:, 4]}',h_f, h_score, a_f, a_score)
            total_rez['indexes'] = [*total_rez['indexes'],*r[:, 4]]
            continue
        r_h_f = r[r[:,4] >= 0]
        r_h_score = r[r[:,5] >= 0]
        
        

        r_a_f = r[r[:,4] <= 0]
        r_a_score = r[r[:,5] <= 0]
        
        total_rez['k1'] += r_h_f.shape[0]
        total_rez['k2'] += r_a_f.shape[0]
        total_rez['total_games'] += r.shape[0]
        total_rez['indexes'] = [*total_rez['indexes'],*r[:, 6]]
        
        
        print(f'{r[:, 6]}',h_f, h_score, a_f, a_score, '||', r_h_f.shape[0], r.shape[0])
    return total_rez
        
a = check_predict(df_np_target, rez)  
print(
    a, '\n', 
    'k1', a['k1'] / a['total_games'], '\n',
    'k2', a['k2'] / a['total_games'], '\n'
)

[  41  502 1047 1468 1657 1919 2027 2344 2642] -1 -2 -1 -2 || 7 9
[1086] 1 -1 0 -1 || 1 1
[ 196  253  705  940 1041 1506 1511 1848 1892 2016 2674] 1 1 1 2 || 9 11
[448 686] 1 2 2 3 || 1 2
{'k1': 18, 'k2': 11, 'total_games': 23, 'indexes': [41, 502, 1047, 1468, 1657, 1919, 2027, 2344, 2642, 1086, 196, 253, 705, 940, 1041, 1506, 1511, 1848, 1892, 2016, 2674, 448, 686]} 
 k1 0.782608695652174 
 k2 0.4782608695652174 



In [226]:
df_np_target[
        (df_np_target[:,0] == 0) &
        (df_np_target[:,1] == 0) &
        (df_np_target[:,2] == -3) &
        (df_np_target[:,3] == -3) 
    ]

array([[   0,    0,   -3,   -3,   -1,   -3, 1329]])

In [228]:
df_target[
        (df_target['rez_h_f_half'] == 1) &
        (df_target['rez_h_match'] == -1) &
        (df_target['rez_a_f_half'] == 0) &
        (df_target['rez_a_match'] == -1) 
    ]
# df_target.loc[1666]
# [1666]] -2 -1 0 -2 || 1 1

Unnamed: 0,cur_match_h_team,cur_match_a_team,cur_match_score,cur_match_f_half,cur_match_s_half,h_match_h_team,h_match_a_team,h_match_score,h_match_f_half,h_match_s_half,...,a_match_f_half,a_match_s_half,date,rez_h_f_half,rez_h_match,rez_a_f_half,rez_a_match,index,rez_c_f_half,rez_c_match
1096,Макартур,ВС Уондерерс,2-2,0 - 1,2 - 1,Перт Глори,Макартур,2-1,0 - 1,2 - 0,...,1 - 1,1 - 2,08_04_23,1,-1,0,-1,1096,-1,0


In [274]:
## предсказання майбутніх матчів 
time_target = 'future' 
# day_target = dt(year=2023, month=4, day=9)

df_future = pd.read_csv('csv/future/09_04_23_f.csv')
df_np_future = create_np_from_df(df_future ,time_target='future')
a = check_predict(
                df_np=df_np_future,
                list_rez=rez,
                time_target='future'
)
a

[590] -1 -2 -1 -2
[209 484 547] 1 -1 0 -1
[447] 1 1 1 2
[595] 1 2 2 3


{'k1': 0, 'k2': 0, 'total_games': 0, 'indexes': [590, 209, 484, 547, 447, 595]}

In [275]:
df_future.loc[a['indexes']]

Unnamed: 0,cur_match_h_team,cur_match_a_team,cur_match_score,cur_match_f_half,cur_match_s_half,h_match_h_team,h_match_a_team,h_match_score,h_match_f_half,h_match_s_half,a_match_h_team,a_match_a_team,a_match_score,a_match_f_half,a_match_s_half,rez_h_f_half,rez_h_match,rez_a_f_half,rez_a_match,index
590,Данди Юнайтед,Хиберниан,without_score,without_score,without_score,Рейнджерс,Данди Юнайтед,2-0,1 - 0,1 - 0,Хиберниан,Мотеруэлл,1-3,0 - 1,1 - 2,-1,-2,-1,-2,590
209,Замалек,Национальный Банк Египта,without_score,without_score,without_score,Аль-Масри,Замалек,3-2,1 - 2,2 - 0,Энппи,Национальный Банк Египта,2-1,1 - 1,1 - 0,1,-1,0,-1,209
484,Бурирам Юнайтед,Лампанг,without_score,without_score,without_score,Бангкок Юнайтед,Бурирам Юнайтед,4-3,1 - 2,3 - 1,Лампанг,Патум Юнайтед,0-1,0 - 0,0 - 1,1,-1,0,-1,484
547,Биловец,Рымаржов,without_score,without_score,without_score,Френштат-под-Радгоштем,Биловец,2-1,0 - 1,2 - 0,Карвина (Б),Рымаржов,1-0,0 - 0,1 - 0,1,-1,0,-1,547
447,Medzilaborce,Сокол Люботице,without_score,without_score,without_score,Собранце,Medzilaborce,0-1,0 - 1,0 - 0,Сокол Люботице,Velky Saris,2-0,1 - 0,1 - 0,1,1,1,2,447
595,Таллинна Калев,Левадия,without_score,without_score,without_score,Пярну Вапрус,Таллинна Калев,0-2,0 - 1,0 - 1,Левадия,Курессааре,4-1,3 - 1,1 - 0,1,2,2,3,595
