In [37]:
import pandas as pd
import sympy as sp
import warnings
import re
import warnings
import numpy as np
from datetime import datetime
import math

warnings.filterwarnings("ignore", category=pd.errors.PerformanceWarning)

In [None]:
#sect_1 calculate rating

In [1]:
%run mathlab.ipynb

In [39]:
def cal_variance(previous_date, current_date, variance, threshold_variance = 0.7, gamma = 1/3, delta = 3):
    if previous_date == 'first_registered':
        return variance
        
    if not isinstance(previous_date, datetime):
        previous_date = pd.to_datetime(previous_date)
    if not isinstance(current_date, datetime):
        current_date = pd.to_datetime(current_date)
        
    difference = (current_date - previous_date).days 
    if difference > 45:
        return 3.5
    difference = max(difference, 1)
    new_var = math.log10(difference) * variance

    final_var = max(new_var, threshold_variance)
    return final_var
    
    
    
    

In [7]:
def update_storage(df_storage, team_id, new_home_rating, new_away_rating, date, variance):
    counter = (len(df_storage.columns) - 2) // 4
    if team_id in df_storage['teamID'].values:
        team_data = df_storage.loc[df_storage['teamID'] == team_id]
        
        for i in range(2, len(df_storage.columns), 4):
            home_col = team_data.columns[i]
            away_col = team_data.columns[i + 1]
            date_col = team_data.columns[i + 2]
            var_col = team_data.columns[i + 3]

            if pd.isna(team_data[home_col].values[0]) or pd.isna(team_data[away_col].values[0]) or pd.isna(team_data[date_col].values[0]):
                df_storage.loc[df_storage['teamID'] == team_id, home_col] = str(new_home_rating)
                df_storage.loc[df_storage['teamID'] == team_id, away_col] = str(new_away_rating)
                df_storage.loc[df_storage['teamID'] == team_id, date_col] = pd.to_datetime(date)
                df_storage.loc[df_storage['teamID'] == team_id, var_col] = float(variance)
                return df_storage

        df_storage[f'home_rating_{counter}'] = None
        df_storage[f'away_rating_{counter}'] = None
        df_storage[f'date_{counter}'] = None
        df_storage[f'var_{counter}'] = None

        # Update the new columns with the provided values
        df_storage.loc[df_storage['teamID'] == team_id, f'home_rating_{counter}'] = str(new_home_rating)
        df_storage.loc[df_storage['teamID'] == team_id, f'away_rating_{counter}'] = str(new_away_rating)
        df_storage.loc[df_storage['teamID'] == team_id, f'date_{counter}'] = date
        df_storage.loc[df_storage['teamID'] == team_id, f'var_{counter}'] = float(variance)

    else:
        print(f"Team ID {team_id} not found in the storage.")

    return df_storage


In [9]:
def get_last_record(df_storage, team_id):
    team_data = df_storage.loc[df_storage['teamID'] == team_id]
    
    if len(team_data.columns) == 6:
        team_home_data = team_data.iloc[:, -4].values[0]
        team_away_data = team_data.iloc[:, -3].values[0]
        date_data = team_data.iloc[:, -2].values[0]
        var_data = team_data.iloc[:, -1].values[0]
        
    for i in range(2, len(team_data.columns)):  
        if pd.isna(team_data.iloc[:, i].values[0]):  
            team_home_data = team_data.iloc[:, i - 4].values[0]
            team_away_data = team_data.iloc[:, i - 3].values[0]
            date_data = team_data.iloc[:, i - 2].values[0]
            var_data = team_data.iloc[:, i - 1].values[0]
            return team_home_data, team_away_data, date_data, var_data

    team_home_data = team_data.iloc[:, -4].values[0]
    team_away_data = team_data.iloc[:, -3].values[0]
    date_data = team_data.iloc[:, -2].values[0]
    var_data = team_data.iloc[:, -1].values[0]
    return team_home_data, team_away_data, date_data, var_data


In [11]:
def convert_to_goal(value, table_of_value):
    pattern = r'\d+(\.\d+)?(e[+-]?\d+)?!'
    larger_pattern = re.compile(pattern)
    max_prob_indx = table_of_value['probability'].idxmax()
    big_df = table_of_value.loc[max_prob_indx:]
    small_df = table_of_value.loc[:max_prob_indx]

    df = big_df if re.match(larger_pattern, str(value)) else small_df
    
    if isinstance(value, bytes):
        value = value.decode('utf-8')
    if isinstance(value, str):
        if re.match(larger_pattern, value):
            value = float(value.strip('!'))
        else:
            value = float(value)

    nearest_probability_dist = float('inf')
    nearest_x_val = None

    for _, row in df.iterrows():
        prob_value = row['probability']
        x_value = row['x']
        distance_to_val = abs(value - prob_value)
        
        if distance_to_val < nearest_probability_dist:
            nearest_probability_dist = distance_to_val
            nearest_x_val = x_value

    return nearest_x_val


In [13]:
def error_alg(predicted_goal_difference, real_goal_difference, c = 0.05):
    x = sp.symbols('x')
    formula = sp.log(c*x+1, 10)
    formula_function = sp.lambdify(x, formula)
    real_error = abs(real_goal_difference - predicted_goal_difference)
    if real_error == 0:
        return 0
        
    error = formula_function(real_error)
    return error

In [15]:
def convergence_alg(current_home_rating, current_away_rating, learning_rate_home, learning_rate_away, error, predicted_goal_difference, real_goal_difference, mode_probability):
    pattern = r'\d+(\.\d+)?(e[+-]?\d+)?!'
    larger_pattern = re.compile(pattern)

    if isinstance(current_home_rating, bytes):
        current_home_rating = current_home_rating.decode('utf-8')
        
    if isinstance(current_away_rating, bytes):
        current_away_rating = current_away_rating.decode('utf-8')
        
    if re.match(larger_pattern, str(current_home_rating)) and re.match(larger_pattern, str(current_away_rating)): 
        current_home_rating = current_home_rating.strip('!')
        current_away_rating = current_away_rating.strip('!')

        current_home_rating = float(current_home_rating)
        current_away_rating = float(current_away_rating)
        if predicted_goal_difference > real_goal_difference:
            team_home_rating = current_home_rating + error*learning_rate_home
            team_away_rating = current_away_rating + (team_home_rating - current_home_rating) * learning_rate_away

            strip_home = 0
            strip_away = 0
            if team_home_rating > mode_probability:
                team_home_rating = mode_probability - abs(team_home_rating - mode_probability)
                if team_home_rating <= 0.0000001:
                    team_home_rating = 0.0000001
        
                strip_home += 1

            if team_away_rating > mode_probability:
                team_away_rating = mode_probability - abs(team_away_rating - mode_probability)
                if team_away_rating <= 0.0000001:
                    team_away_rating = 0.0000001 

                strip_away += 1

            if strip_home == 0:
                team_home_rating = str(team_home_rating) + '!'

            if strip_away == 0: 
                team_away_rating = str(team_away_rating) + '!'

            team_home_rating = str(team_home_rating)
            team_away_rating = str(team_away_rating)

        elif predicted_goal_difference < real_goal_difference:
            team_home_rating = current_home_rating - error*learning_rate_home
            team_away_rating = current_away_rating + (team_home_rating - current_home_rating) * learning_rate_away
            if team_home_rating <= 0.0000001:
                team_home_rating = 0.0000001

            if team_away_rating <= 0.0000001:
                team_away_rating = 0.0000001 
            team_home_rating = str(team_home_rating)
            team_away_rating = str(team_away_rating)
            
            team_home_rating = team_home_rating + '!'
            team_away_rating = team_away_rating + '!'

            
        else:
            team_home_rating = str(current_home_rating) + '!'
            team_away_rating = str(current_away_rating) + '!'

        return team_home_rating, team_away_rating 

    if re.match(larger_pattern, str(current_home_rating)):
        current_home_rating = current_home_rating.strip('!')
        current_home_rating = float(current_home_rating)
        current_away_rating = float(current_away_rating)
        
        if predicted_goal_difference < real_goal_difference:
            team_home_rating = current_home_rating - error*learning_rate_home
            team_away_rating = current_away_rating - (team_home_rating - current_home_rating) * learning_rate_away

            if team_home_rating <= 0.0000001:
                team_home_rating = 0.0000001

            if team_away_rating > mode_probability:
                team_away_rating = mode_probability - abs(team_away_rating - mode_probability)

                if team_away_rating <= 0.0000001:
                    team_away_rating = 0.0000001 
                team_away_rating = str(team_away_rating)
                team_away_rating = team_away_rating + '!'

            team_home_rating = str(team_home_rating)
            team_home_rating = team_home_rating + '!'
            
        elif predicted_goal_difference > real_goal_difference:
            team_home_rating = current_home_rating + error*learning_rate_home
            team_away_rating = current_away_rating - (team_home_rating - current_home_rating) * learning_rate_away

            if team_away_rating <= 0.0000001:
                team_away_rating = 0.0000001 

            strip_home = 0
            if team_home_rating > mode_probability:
                team_home_rating = mode_probability - abs(team_home_rating - mode_probability)
                if team_home_rating <= 0.0000001:
                    team_home_rating = 0.0000001
                strip_home += 1
                
            team_home_rating = str(team_home_rating)
            if strip_home == 0:
                team_home_rating = team_home_rating + '!'
            team_away_rating = str(team_away_rating)
    
        else:
            team_home_rating = str(current_home_rating) + '!'
            team_away_rating = str(current_away_rating)

        return team_home_rating, team_away_rating

    if re.match(larger_pattern, str(current_away_rating)):
        current_away_rating = current_away_rating.strip('!')
        current_home_rating = float(current_home_rating)
        current_away_rating = float(current_away_rating)
        
        if predicted_goal_difference < real_goal_difference:
            team_home_rating = current_home_rating + error*learning_rate_home
            team_away_rating = current_away_rating - (team_home_rating - current_home_rating) * learning_rate_away
            
            if team_away_rating <= 0.0000001:
                team_away_rating = 0.0000001 

            if team_home_rating > mode_probability:
                team_home_rating = mode_probability - abs(team_home_rating - mode_probability)
                if team_home_rating <= 0.0000001:
                    team_home_rating = 0.0000001
                
                team_home_rating = str(team_home_rating)
                team_home_rating = team_home_rating + '!'

            team_home_rating = str(team_home_rating)
            team_away_rating = str(team_away_rating)
            team_away_rating = team_away_rating + '!'

        elif predicted_goal_difference > real_goal_difference:
            team_home_rating = current_home_rating - error*learning_rate_home
            team_away_rating = current_away_rating - (team_home_rating - current_home_rating) * learning_rate_away
            
            if team_home_rating <= 0.0000001:
                team_home_rating = 0.0000001

            strip_away = 0
            if team_away_rating > mode_probability:
                team_away_rating = mode_probability - abs(team_away_rating - mode_probability)
                strip_away += 1

            team_home_rating = str(team_home_rating)
            team_away_rating = str(team_away_rating)
            if strip_away == 0:
                team_away_rating = team_away_rating + '!'
    
        else:
            team_home_rating = str(current_home_rating)
            team_away_rating = str(current_away_rating) + '!'
        return team_home_rating, team_away_rating

    else:
        current_home_rating = float(current_home_rating)
        current_away_rating = float(current_away_rating)
        if predicted_goal_difference < real_goal_difference:
            team_home_rating = current_home_rating + error*learning_rate_home
            team_away_rating = current_away_rating + (team_home_rating - current_home_rating) * learning_rate_away

            if team_away_rating > mode_probability:
                team_away_rating = mode_probability - abs(team_away_rating - mode_probability)
                if team_away_rating <= 0.0000001:
                    team_away_rating = 0.0000001 
                team_away_rating = str(team_away_rating)
                team_away_rating = team_away_rating + '!'

            if team_home_rating > mode_probability:
                team_home_rating = mode_probability - abs(team_home_rating - mode_probability)
                if team_home_rating <= 0.0000001:
                    team_home_rating = 0.0000001
                team_home_rating = str(team_home_rating)
                team_home_rating = team_home_rating + '!'

            team_home_rating = str(team_home_rating)
            team_away_rating = str(team_away_rating)

        elif predicted_goal_difference > real_goal_difference:
            team_home_rating = current_home_rating - error*learning_rate_home
            team_away_rating = current_away_rating + (team_home_rating - current_home_rating) * learning_rate_away
            
            if team_home_rating <= 0.0000001:
                team_home_rating = 0.0000001

            if team_away_rating <= 0.0000001:
                team_away_rating = 0.0000001 
                
            team_home_rating = str(team_home_rating)
            team_away_rating = str(team_away_rating)
            
        else:
            team_home_rating = str(current_home_rating)
            team_away_rating = str(current_away_rating)
        return team_home_rating, team_away_rating


In [17]:
def modify_rate_calc(input_value, b = 2):
    input_value = abs(input_value + 0.25)
    modify_rate = 1/(b**input_value)
    return modify_rate
    

In [19]:
convert_table = inverse_pdf_normal_table(0.25, 1.75)
mean = 0.25
standard_deviation = 1.7

In [233]:
def calculate_rating(index, real_goal_scr_home, real_goal_scr_away, home_team_id, away_team_id, cur_date, df_storage, mode_probability, epilson = 0.7, b = 2, c = 0.03, threshold_variance = 0.7, gamma = 1/3, delta = 3):
    current_home_rating_home_team, current_away_rating_home_team, pre_date, var = get_last_record(df_storage, home_team_id)
    current_home_rating_away_team, current_away_rating_away_team, pre_date, var = get_last_record(df_storage, away_team_id)

    input_home_team = convert_to_goal(current_home_rating_home_team, convert_table)
    input_away_team = convert_to_goal(current_away_rating_away_team, convert_table)
    
    predicted_goal_diff = input_home_team - input_away_team
    
    if isinstance(real_goal_scr_home, str):
        real_goal_scr_home = float(real_goal_scr_home)

    if isinstance(real_goal_scr_away, str):
        real_goal_scr_away = float(real_goal_scr_away)
    real_goal_difference = real_goal_scr_home - real_goal_scr_away
    error = error_alg(predicted_goal_diff, real_goal_difference, c = c)

    new_var = cal_variance(pre_date, cur_date, var, threshold_variance = threshold_variance, gamma = gamma, delta = delta)

    modify_rate_main_home = new_var * modify_rate_calc(input_home_team, b = b)
    modify_rate_sub_home = modify_rate_main_home*epilson
    modify_rate_main_away = new_var * modify_rate_calc(input_away_team, b = b)
    modify_rate_sub_away = modify_rate_main_away*epilson

    new_home_team_home_rating, new_home_team_away_rating = convergence_alg(current_home_rating_home_team, current_away_rating_home_team, learning_rate_home = modify_rate_main_home, learning_rate_away = modify_rate_sub_home, error = error, predicted_goal_difference = predicted_goal_diff , real_goal_difference = real_goal_difference, mode_probability = mode_probability)
    new_away_team_away_rating, new_away_team_home_rating = convergence_alg(current_away_rating_away_team, current_home_rating_away_team, learning_rate_home = modify_rate_main_away, learning_rate_away = modify_rate_sub_away, error = error, predicted_goal_difference = real_goal_difference, real_goal_difference = predicted_goal_diff, mode_probability = mode_probability)
        
    df_storage = update_storage(df_storage, home_team_id, new_home_team_home_rating, new_home_team_away_rating, cur_date, new_var)
    df_storage = update_storage(df_storage, away_team_id, new_away_team_home_rating, new_away_team_away_rating, cur_date, new_var) 

    return input_home_team, input_away_team, predicted_goal_diff
    

In [33]:
def calculating_algorithm(df_match, df_storage, mode_probability, epilson = 0.7, b = 2, c = 0.03, threshold_variance = 0.7, gamma = 1/3, delta = 3): 
    #counter = 0
    for index, row in df_match.iterrows():
        match = row
        home_team_id = match.loc['7']
        away_team_id = match.loc['8']
        cur_date = match.loc['5']
        
        current_home_rating_home_team, current_away_rating_home_team, pre_date, var = get_last_record(df_storage, home_team_id)
        current_home_rating_away_team, current_away_rating_away_team, pre_date, var = get_last_record(df_storage, away_team_id)

        input_home_team = convert_to_goal(current_home_rating_home_team, convert_table)
        input_away_team = convert_to_goal(current_away_rating_away_team, convert_table)
        df_match.loc[index, 'Home_team_pred'] = input_home_team
        df_match.loc[index, 'Away_team_pred'] = input_away_team
        predicted_goal_diff = input_home_team - input_away_team
        df_match.loc[index, 'Rating_predicted_goal_diff'] = predicted_goal_diff 
        
        real_goal_scr_home = match.loc['9']
        real_goal_scr_away = match.loc['10']

        if isinstance(real_goal_scr_home, str):
            real_goal_scr_home = float(real_goal_scr_home)

        if isinstance(real_goal_scr_away, str):
            real_goal_scr_away = float(real_goal_scr_away)
        real_goal_difference = real_goal_scr_home - real_goal_scr_away
        df_match.loc[index, 'Real_goal_diff'] = real_goal_difference
        error = error_alg(predicted_goal_diff, real_goal_difference, c = c)
        print(error)

        new_var = cal_variance(pre_date, cur_date, var, threshold_variance = threshold_variance, gamma = gamma, delta = delta)
        
        modify_rate_main_home = new_var * modify_rate_calc(input_home_team, b = b)
        modify_rate_sub_home = modify_rate_main_home*epilson
        modify_rate_main_away = new_var * modify_rate_calc(input_away_team, b = b)
        modify_rate_sub_away = modify_rate_main_away*epilson
        
        new_home_team_home_rating, new_home_team_away_rating = convergence_alg(current_home_rating_home_team, current_away_rating_home_team, learning_rate_home = modify_rate_main_home, learning_rate_away = modify_rate_sub_home, error = error, predicted_goal_difference = predicted_goal_diff , real_goal_difference = real_goal_difference, mode_probability = mode_probability)

        new_away_team_away_rating, new_away_team_home_rating = convergence_alg(current_away_rating_away_team, current_home_rating_away_team, learning_rate_home = modify_rate_main_away, learning_rate_away = modify_rate_sub_away, error = error, predicted_goal_difference = real_goal_difference, real_goal_difference = predicted_goal_diff, mode_probability = mode_probability)

        df_storage = update_storage(df_storage, home_team_id, new_home_team_home_rating, new_home_team_away_rating, cur_date, new_var)
        df_storage = update_storage(df_storage, away_team_id, new_away_team_home_rating, new_away_team_away_rating, cur_date, new_var)  
        #counter += 1
        #print(counter)

#need to add feature to add rating into df_match    

In [None]:
#sect_2, calculate form

In [183]:
form_dict = {}

In [95]:
appearance_frame = {}
def count_appearance(playerID, teamID, season, match_count):
    if season not in appearance_frame.keys():
        appearance_frame[season] = {}
    season_appearance_frame = appearance_frame[season]
    
    if teamID not in season_appearance_frame.keys():
        season_appearance_frame[teamID] = {}
    team_appearance_frame = season_appearance_frame[teamID]
    
    if playerID in team_appearance_frame.keys():
        team_appearance_frame[playerID]['count'] += 1
        team_appearance_frame[playerID]['last_match'] = match_count 

    else:
        team_appearance_frame[playerID] = {'count': 1, 'last_match': match_count}



In [121]:
def get_constantly_and_recently_this_season(teamID, season):
    if season not in appearance_frame.keys():
        return 'New season'
    season_appearance_frame = appearance_frame[season]
    
    if teamID not in season_appearance_frame.keys():
        return 'Team first appear'
    team_appearance_frame = season_appearance_frame[teamID]
    
    sorted_players = sorted(team_appearance_frame.items(), key=lambda x: x[1]['count'], reverse=True)
    top_11_players = sorted_players[:11]

    max_match_count = max(data['last_match'] for data in team_appearance_frame.values())
    last_match_player = [playerID for playerID, data in team_appearance_frame.items() if data['last_match'] == max_match_count]

    return top_11_players, last_match_player
        
        


In [175]:
def constantly_appear_player_percentage(lineup_player, constantly_appear_player):
    appear = 0
    for player in constantly_appear_player:
        if player in lineup_player:
            appear += 1
    frct = appear/len(lineup_player)
    return frct

In [173]:
def recently_appear_player_percentage(lineup_player, recently_appear_player):
    appear = 0
    for player in recently_appear_player:
        if player in lineup_player:
            appear += 1
    frct = appear/len(lineup_player)
    return frct

In [33]:
#incompleted
def weighted_expected_goal(value,key):
    diff = last_match_predicted_goal_difference - last_match_real_goal_difference
    return diff
    

In [35]:
def weighted_expected_point(last_match_win_prob, last_match_lost_prob, last_match_draw_prob):
    weighted_expected_point = last_match_win_prob * 3 + last_match_draw_prob * 1
    return weighted_expected_point

In [185]:
def append_form(teamID, season, form):
    key = f"{teamID}-{season}"
    form_dict[key] = form
    
    

In [187]:
def retrieve_form(teamID, season):
    key = f"{teamID}-{season}"
    if key not in form_dict:
        return 0
    form = form_dict[key]
    return form 
    

In [37]:
def form():
    pass
    
#Using same method as in EP rating 
#incompleted
#this will be a perceptrons, connect to previous form by an EMA formula 

In [None]:
#previous_value will be another perceptrons to add_in 

In [None]:
#sect_3, fatigue

In [39]:
def get_last_match_date(df_storage, team_id):
    team_data = df_storage.loc[df_storage['teamID'] == team_id]
    
    if len(team_data.columns) == 6:
        date_data = team_data.iloc[:, -2].values[0]
        
    for i in range(2, len(team_data.columns)):  
        if pd.isna(team_data.iloc[:, i].values[0]):  
            date_data = team_data.iloc[:, i - 2].values[0]
            return date_data

    date_data = team_data.iloc[:, -2].values[0]
    return date_data
    

In [153]:
def tiredness(last_match_date, cur_date, a = 0.1):
    resting_time = (cur_date - last_match_date).total_seconds() / 3600
    value = 1/(1+sp.exp(-a*(resting_time-70)))
    
    return value

In [43]:
def toughness(predicted_goal_difference, sigma = 0.6):
    input = (predicted_goal_difference)
    #opt1
    toughness = (1 / (sp.sqrt(2 * sp.pi * sigma**2))) * sp.exp(-((x)**2) / (2 * sigma**2))
    #opt2
    #using reverse softmax function
    return toughness
#choose sigma according to the modification of the initial rating
    

In [None]:
#sect_4, motivation/team spirit/team chems

In [45]:
league_dist = {}
team_result = {}
streak_frame = {}

In [47]:
def create_league_table(leagueID, season):
    key = f"{leagueID}-{season}"
    league_dist[key] = pd.DataFrame({'teamID': [],'match_played': [], 'points': [], 'GD': []})


In [49]:
def get_team_in_league_table(position, leagueID, season):
    key = f"{leagueID}-{season}"
    league_frame = league_dist[key]
    team = league_frame.loc[position]
    return team

In [107]:
def get_team_data_in_league_table(teamID, leagueID, season):
    key = f"{leagueID}-{season}"
    if key not in league_dist:
        create_league_table(leagueID, season)
    league_frame = league_dist[key]
    if teamID not in league_frame['teamID']:
        update_frame = pd.DataFrame({'teamID' : teamID, 'match_played': [0], 'points': [0], 'GD': [0]})
        league_frame = pd.concat([update_frame, league_frame], ignore_index=True)
        league_dist[key] = league_frame
        return update_frame

    team_data = league_frame.loc[league_frame['teamID'] == teamID]
    return team_data
    


In [53]:
#use -real_goal_diff for away_team
def update_league_table(leagueID, season, teamID, real_goal_diff):
    key = f"{leagueID}-{season}"
    league_table = league_dist[key]

    if real_goal_diff > 0:
        point = 3
    elif real_goal_diff < 0:
        point = 0
    else: 
        point = 1

    gd = real_goal_diff

    league_table.loc[league_table['teamID'] == teamID, 'points'] += point
    league_table.loc[league_table['teamID'] == teamID, 'GD'] += gd
    league_table.loc[league_table['teamID'] == teamID, 'match_played'] += 1
    
    league_table = league_table.sort_values(by=['points', 'GD'], ascending=[False, False]).reset_index(drop=True)
    league_dist[key] = league_table
    

In [163]:
def append_result(real_goal_difference, teamID, season):
    key = f"{teamID}-{season}"
    if key not in team_result:
        team_result[key] = []
    result_list = team_result[key]
    if real_goal_difference > 0:
        result_list.append('W')
    elif real_goal_difference < 0:
        result_list.append('L')
    else: 
        result_list.append('D')
    team_result[key] = result_list
    

In [169]:
def update_streak(teamID, season):
    key = f"{teamID}-{season}"
    values = team_result[key]
    recent_result = values[::-1]
    last_result = recent_result[0]
    if last_result == 'W':
        streak_frame[key]['w_streak'] += 1
        streak_frame[key]['unb_streak'] += 1
        streak_frame[key]['l_streak'] = 0
        streak_frame[key]['unw_streak'] = 0
    elif last_result == 'D':
        streak_frame[key]['w_streak'] = 0
        streak_frame[key]['unb_streak'] += 1
        streak_frame[key]['l_streak'] = 0
        streak_frame[key]['unw_streak'] += 1
    else:
        streak_frame[key]['w_streak'] = 0
        streak_frame[key]['unb_streak'] = 0 
        streak_frame[key]['l_streak'] += 1
        streak_frame[key]['unw_streak'] += 1
        

    

In [59]:
def get_streak(teamID, season):
    key =f"{teamID}-{season}"
    if key not in streak_frame:
        streak_frame[key] = {
            'w_streak': 0,
            'l_streak': 0,
            'unb_streak': 0,
            'unw_streak': 0
        }
        return 'No streak found'
    w_streak = streak_frame[key]['w_streak']
    l_streak = streak_frame[key]['l_streak']
    unb_streak = streak_frame[key]['unb_streak']
    unw_streak = streak_frame[key]['unw_streak']
    return w_streak, unb_streak, l_streak, unw_streak
    

In [147]:
def winning_motivation(match_played, leagueID, points_to_first_place = None, points_gap = None):
    match_t_34_league = [4769, 7809, 13274, 15722, 17642]
    match_t_38_league = [1729, 10257, 21518]
    match_phase_league = [1, 19694, 24558]
    if leagueID in match_t_34_league:
        total_match = 34
    elif leagueID in match_t_38_league:
        total_match = 38
    else:
        return f'Phase league {leagueID}'
    available_points_left = (total_match - match_played) * 3
    if points_to_first_place:
        input = points_to_first_place
    elif points_gap:
        input = points_gap
        
    input_value = input/ available_points_left

    if input_value > 1:
        return 0
    value = 1/(1+sp.exp(6*input_value - 3.2))
    return value

In [63]:
def determine_league_phase(match_played, leagueID):
    if leagueID == 1:
        if match_played <= 30:
            return 'first'
        else:
            return 'second'
    else:
        if match_played <= 33:
            return 'first'
        else:
            return 'second'

In [149]:
def league_phase_motivation(match_played, leagueID, phase, points_gap_to_6th_place = None, points_to_first_place = None, points_gap = None):
    if phase == 'first':
        if leagueID == 1:
            total_match = 30
        else:
            total_match = 33
            
        if points_gap_to_6th_place:
            input = points_gap_to_6th_place
        elif points_to_first_place:
            input = points_to_first_place
        elif points_gap:
            input = points_gap

        available_points_left = (total_match - match_played) * 3
        input_value = input/ available_points_left
        value = 1/(1+exp(6*input_value - 3.2))
    else:
        if leagueID == 1:
            total_match = 40
        else:
            total_match = 38

        if points_to_first_place:
            input = points_to_first_place
        elif points_gap:
            input = points_gap

        available_points_left = (total_match - match_played) * 3
        input_value = input/ available_points_left
        value = 1/(1+sp.exp(6*input_value - 3.2))
    return value
        
        

In [None]:
# def champions_league_qualification_motivation():
#dont have enough data

In [None]:
# def europa_league_european_qualification_motivation():
#dont have enough data

In [13]:
def get_data(df_match, df_storage, mode_probability, epilson = 0.7, b = 2, c = 0.03, threshold_variance = 0.7, gamma = 1/3, delta = 3):
    for index, match in df_match.iterrows():
        hometeamID = float(match['7'])
        awayteamID = float(match['8'])
    
        season = match['3']
        cur_date = pd.to_datetime(match['5'])

        home_col_loc = ['55','56', '57', '58', '59', '60', '61', '62', '63', '64', '65']
        home_team_playerID = match[home_col_loc].values.tolist()
        away_col_loc = ['66', '67', '68', '69', '70', '71', '72', '73', '74', '75', '76']
        away_team_playerID = match[away_col_loc].values.tolist()
        print(home_team_playerID)
    
        home_result_frq = get_constantly_and_recently_this_season(hometeamID, season)
        if isinstance(home_result_frq, str):
            home_constantly_appear_frct = 1
            home_recently_appear_frct =  1
        else: 
            home_team_constantly_appear, home_team_recently_appear = home_result_frq
            home_constantly_appear_frct = constantly_appear_player_percentage(home_team_playerID, home_team_constantly_appear)
            home_recently_appear_frct = recently_appear_player_percentage(home_team_playerID, home_team_recently_appear)
        print(home_constantly_appear_frct)
        df_match.loc[index, 'home_constantly_appear_frct'] = home_constantly_appear_frct
        df_match.loc[index, 'home_recently_appear_frct'] = home_recently_appear_frct
    
        
        away_result_frq = get_constantly_and_recently_this_season(awayteamID, season)
        if isinstance(away_result_frq, str):
            away_constantly_appear_frct = 1
            away_recently_appear_frct =  1
        else: 
            away_team_constantly_appear, away_team_recently_appear = away_result_frq
            away_constantly_appear_frct = constantly_appear_player_percentage(away_team_playerID, away_team_constantly_appear)
            away_recently_appear_frct = recently_appear_player_percentage(away_team_playerID, away_team_recently_appear)
        df_match.loc[index, 'away_constantly_appear_frct'] = away_constantly_appear_frct
        df_match.loc[index, 'away_recently_appear_frct'] = away_recently_appear_frct
            
        home_last_match_date = get_last_match_date(df_storage, hometeamID)
        if home_last_match_date == 'first_registered':
            home_tiredness = 0
            home_time_to_last_match = 9999
        else:
            home_last_match_date = pd.to_datetime(home_last_match_date)
            home_tiredness = tiredness(home_last_match_date, cur_date, a = 0.1)
            home_time_to_last_match = (cur_date - home_last_match_date).days
        df_match.loc[index, 'home_tiredness'] = home_tiredness 
        df_match.loc[index, 'home_time_to_last_match'] = home_time_to_last_match
        
        away_last_match_date = get_last_match_date(df_storage, awayteamID)
        if away_last_match_date == 'first_registered':
            away_tiredness = 0
            away_time_to_last_match = 0 
        else:
            away_last_match_date = pd.to_datetime(away_last_match_date)
            away_tiredness = tiredness(away_last_match_date, cur_date, a = 0.1)
            away_time_to_last_match = (cur_date - away_last_match_date).days
        df_match.loc[index, 'away_tiredness'] = away_tiredness 
        df_match.loc[index, 'away_time_to_last_match'] = away_time_to_last_match
        
        # last_match_home_predicted = 
        # home_last_game_tough = toughness(last_match_home_predicted, sigma = 0.6)
        # df_match.loc[index, 'home_last_toughness'] = home_last_game_tough
    
        # last_match_away_predicted = 
        # away_last_game_tough = toughness(last_match_away_predicted, sigma = 0.6)
        # df_match.loc[index, 'away_last_toughness'] = away_last_game_tough
        

        leagueID = float(match['2'])
        home_league_data = get_team_data_in_league_table(hometeamID, leagueID, season)
        home_position_in_the_league = home_league_data.index
        home_match_played = float(home_league_data['match_played'].iloc[0])
        home_points = float(home_league_data['points'].iloc[0])

        away_league_data = get_team_data_in_league_table(awayteamID, leagueID, season)
        away_position_in_the_league = away_league_data.index
        away_match_played = float(away_league_data['match_played'].iloc[0])
        away_points = float(away_league_data['points'].iloc[0])

        for playerID in home_team_playerID:
            count_appearance(playerID, hometeamID, season, home_match_played + 1)
        
        for playerID in away_team_playerID:
            count_appearance(playerID, awayteamID, season, away_match_played + 1)

        home_result_streak = get_streak(hometeamID, season)
        if isinstance(home_result_streak, str):
            df_match.loc[index, 'home_w_streak'] = 0
            df_match.loc[index, 'home_unb_streak'] = 0
            df_match.loc[index, 'home_l_streak'] = 0
            df_match.loc[index, 'home_unw_streak'] = 0
    
        else:
            home_w_streak, home_unb_streak, home_l_streak, home_unw_streak = home_result_streak
            df_match.loc[index, 'home_w_streak'] = home_w_streak
            df_match.loc[index, 'home_unb_streak'] = home_unb_streak
            df_match.loc[index, 'home_l_streak'] = home_l_streak
            df_match.loc[index, 'home_unw_streak'] = home_unw_streak

        
        away_result_streak = get_streak(awayteamID, season)
        if isinstance(away_result_streak, str):
            df_match.loc[index, 'away_w_streak'] = 0
            df_match.loc[index, 'away_unb_streak'] = 0
            df_match.loc[index, 'away_l_streak'] = 0
            df_match.loc[index, 'away_unw_streak'] = 0
    
        else:
            away_w_streak, away_unb_streak, away_l_streak, away_unw_streak = away_result_streak
            df_match.loc[index, 'away_w_streak'] = away_w_streak
            df_match.loc[index, 'away_unb_streak'] = away_unb_streak
            df_match.loc[index, 'away_l_streak'] = away_l_streak
            df_match.loc[index, 'away_unw_streak'] = away_unw_streak
        
        if leagueID in [1, 19694, 24558]:
            league_phase = determine_league_phase(home_match_played, leagueID)
            if league_phase == 'first':
                if home_match_played >= 10:
                    if home_position_in_the_league <= 6:
                        seventh_team_data = get_team_in_league_table(7, leagueID, season)
                        home_points_gap_to_seventh = abs(home_points - seventh_team_data['points'])
                        home_motivation_score = winning_motivation(home_match_played, leagueID, points_to_first_place = None, points_gap = home_points_gap_to_seventh)
                    else:
                        sixth_team_data = get_team_in_league_table(6, leagueID, season)
                        home_points_gap_to_sixth = abs(home_points - sixth_team_data['points'])
                        home_motivation_score = winning_motivation(home_match_played, leagueID, points_to_first_place = home_points_gap_to_sixth, points_gap = None)
                if home_match_played < 10:
                    home_motivation_score = 0 

                if away_match_played >= 10:
                    if away_position_in_the_league <= 6:
                        seventh_team_data = get_team_in_league_table(7, leagueID, season)
                        away_points_gap_to_seventh = abs(away_points - seventh_team_data['points'])
                        away_motivation_score = winning_motivation(away_match_played, leagueID, points_to_first_place = None, points_gap = away_points_gap_to_seventh)
                    else:
                        sixth_team_data = get_team_in_league_table(6, leagueID, season)
                        away_points_gap_to_sixth = abs(away_points - sixth_team_data['points'])
                        away_motivation_score = winning_motivation(away_match_played, leagueID, points_to_first_place = away_points_gap_to_sixth, points_gap = None)
                if away_match_played < 10:
                    away_motivation_score = 0 
                
            else:
                if home_position_in_the_league == 1:
                    second_team_data = get_team_in_league_table(2, leagueID, season)
                    home_points_gap_to_second = abs(home_points - second_team_data['points'])
                    home_motivation_score = winning_motivation(home_match_played, leagueID, points_to_first_place = None, points_gap = home_points_gap_to_second)
                if home_team_position_in_the_league != 1:
                    first_team_data = get_team_in_league_table(1, leagueID, season)
                    jome_points_gap_to_first = abs(home_points - first_team_data['points'])
                    home_motivation_score = winning_motivation(home_match_played, leagueID, points_to_first_place = home_points_gap_to_first, points_gap = None)

                if away_position_in_the_league == 1:
                    second_team_data = get_team_in_league_table(2, leagueID, season)
                    away_points_gap_to_second = abs(away_points - second_team_data['points'])
                    away_motivation_score = winning_motivation(away_match_played, leagueID, points_to_first_place = None, points_gap = away_points_gap_to_second)
                if away_team_position_in_the_league != 1:
                    first_team_data = get_team_in_league_table(1, leagueID, season)
                    away_points_gap_to_first = abs(away_points - first_team_data['points'])
                    away__motivation_score = winning_motivation(away_match_played, leagueID, points_to_first_place = away_points_gap_to_first, points_gap = None)
                    
        else:
            if home_match_played >= 10:               
                if home_position_in_the_league == 1:
                    second_team_data = get_team_in_league_table(2, leagueID, season)
                    home_points_gap_to_second = abs(home_points - second_team_data['points'])
                    home_motivation_score = winning_motivation(home_match_played, leagueID, points_to_first_place = None, points_gap = home_points_gap_to_second)

                else:
                    first_team_data = get_team_in_league_table(1, leagueID, season)
                    home_points_gap_to_first = abs(home_points - first_team_data['points'])
                    home_motivation_score = winning_motivation(home_match_played, leagueID, points_to_first_place = home_points_gap_to_first, points_gap = None)

            if home_match_played < 10:  
                home_motivation_score = 0 

            if away_match_played >= 10:               
                if away_position_in_the_league == 1:
                    second_team_data = get_team_in_league_table(2, leagueID, season)
                    away_points_gap_to_second = abs(away_points - second_team_data['points'])
                    away_motivation_score = winning_motivation(away_match_played, leagueID, points_to_first_place = None, points_gap = away_points_gap_to_second)

                else:
                    first_team_data = get_team_in_league_table(1, leagueID, season)
                    away_points_gap_to_first = abs(away_points - first_team_data['points'])
                    away_motivation_score = winning_motivation(away_match_played, leagueID, points_to_first_place = away_points_gap_to_first, points_gap = None)

            if away_match_played < 10:  
                away_motivation_score = 0 

        df_match.loc[index, 'home_winning_motivation'] = home_motivation_score
        df_match.loc[index, 'away_winning_motivation'] = away_motivation_score

        home_form = retrieve_form(hometeamID, season)
        away_form = retrieve_form(awayteamID, season)

        real_goal_scr_home = match['9']
        real_goal_scr_away = match['10']
        input_home_team, input_away_team, predicted_goal_diff = calculate_rating(index, real_goal_scr_home, real_goal_scr_away, hometeamID, awayteamID, cur_date, df_storage, mode_probability, epilson = epilson, b = b, c = c, threshold_variance = threshold_variance, gamma = 1/3, delta = 3)
        df_match.loc[index, 'input_home_team'] = input_home_team
        df_match.loc[index, 'input_away_team'] = input_away_team
        df_match.loc[index, 'Rating_predicted_goal_diff'] = predicted_goal_diff
        
        real_goal_diff = real_goal_scr_home - real_goal_scr_away
        df_match.loc[index, 'Real_goal_diff'] = real_goal_diff

        append_result( real_goal_diff, hometeamID, season)
        append_result(- real_goal_diff, awayteamID, season)

        update_streak(hometeamID, season)
        update_streak(awayteamID, season)

        

In [None]:
#data will be seperated as
#08/09: training for rating data
#09/10, 10/11, 11/12, 12/13, 13/14: training_data
#14/15, dev set
#15/16, test set