# 3. Data

In [1]:
import pandas as pd
import glob
import numpy as np
from scipy.optimize import minimize
import os
from scipy.stats import poisson

In [2]:
path =r'..\\data'
filenames = glob.glob(path + "\\football-data*.csv")

dfs = []
for filename in filenames:
    dfs.append(pd.read_csv(filename))

df = pd.concat(dfs)

df.dropna(subset=["Date", "HomeTeam", "AwayTeam", "FTHG", "FTAG"], inplace=True)

df.reset_index(inplace=True)
df

# TEST
#df.to_csv("test.csv")
#os.startfile("test.csv")

Unnamed: 0,index,Div,Date,HomeTeam,AwayTeam,FTHG,FTAG,FTR,HTHG,HTAG,...,AvgC<2.5,AHCh,B365CAHH,B365CAHA,PCAHH,PCAHA,MaxCAHH,MaxCAHA,AvgCAHH,AvgCAHA
0,0,E1,06/08/10,Norwich,Watford,2.0,3.0,A,0.0,2.0,...,,,,,,,,,,
1,1,E1,07/08/10,Bristol City,Millwall,0.0,3.0,A,0.0,1.0,...,,,,,,,,,,
2,2,E1,07/08/10,Burnley,Nott'm Forest,1.0,0.0,H,1.0,0.0,...,,,,,,,,,,
3,3,E1,07/08/10,Coventry,Portsmouth,2.0,0.0,H,1.0,0.0,...,,,,,,,,,,
4,4,E1,07/08/10,Crystal Palace,Leicester,3.0,2.0,H,3.0,0.0,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10247,375,E0,23/05/2021,Liverpool,Crystal Palace,2.0,0.0,H,1.0,0.0,...,3.49,-2.25,1.86,2.04,1.88,2.03,1.98,2.14,1.88,2.00
10248,376,E0,23/05/2021,Man City,Everton,5.0,0.0,H,2.0,0.0,...,2.77,-1.75,2.01,1.89,1.99,1.89,2.20,2.00,2.03,1.85
10249,377,E0,23/05/2021,Sheffield United,Burnley,1.0,0.0,H,1.0,0.0,...,2.05,0.00,2.04,1.86,2.05,1.86,2.17,1.90,2.03,1.84
10250,378,E0,23/05/2021,West Ham,Southampton,3.0,0.0,H,2.0,0.0,...,2.14,-0.75,2.00,1.90,2.02,1.91,2.06,2.01,1.99,1.89


In [3]:
df_1 = df[["FTHG", "FTAG"]]
df_1

Unnamed: 0,FTHG,FTAG
0,2.0,3.0
1,0.0,3.0
2,1.0,0.0
3,2.0,0.0
4,3.0,2.0
...,...,...
10247,2.0,0.0
10248,5.0,0.0
10249,1.0,0.0
10250,3.0,0.0


## TABLE 1
*Empirical estimates for each score probability for joint and marginal probability functions*

In [4]:
goalsCrosstable = pd.crosstab(index=df["FTHG"], columns=df["FTAG"])/len(df_1)
goalsCrosstable.style.background_gradient(cmap="RdYlGn")

FTAG,0.0,1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0
FTHG,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0.0,0.07579,0.078131,0.047015,0.0238,0.007608,0.002048,0.00078,9.8e-05,9.8e-05,9.8e-05
1.0,0.101736,0.119782,0.072083,0.028872,0.008584,0.002341,0.00078,9.8e-05,0.0,0.0
2.0,0.077058,0.088275,0.054038,0.019021,0.004487,0.001463,9.8e-05,0.0,0.0,0.0
3.0,0.042236,0.040187,0.024581,0.010242,0.002536,0.000488,0.000195,9.8e-05,0.0,0.0
4.0,0.016387,0.014339,0.007998,0.003804,0.00078,0.000293,9.8e-05,0.0,0.0,0.0
5.0,0.006633,0.004292,0.002439,0.001171,0.00039,0.000293,0.0,0.0,0.0,0.0
6.0,0.001853,0.002048,0.000293,0.000195,0.0,0.0,0.0,0.0,0.0,0.0
7.0,0.000293,0.000585,0.00039,9.8e-05,0.0,0.0,0.0,0.0,0.0,0.0
8.0,0.00039,0.0,9.8e-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9.0,9.8e-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


## TABLE 2
*Estimates the ratios of the observed joint probability function and the empirical probability function obtained under the assumption of independence between the home and away scores* 

In [5]:
independenceCrosstable = goalsCrosstable.copy()

In [6]:
# Marginal home goals
f_H_i = independenceCrosstable.sum(axis=1)

In [7]:
# Marginal away goals
f_A_j = independenceCrosstable.sum(axis=0)

In [8]:
# Probabilities if independent
independentCrosstable = f_H_i.apply(lambda r: r*f_A_j)
#independentCrosstable.style.background_gradient(cmap="RdYlGn")

In [9]:
independenceTestCrosstable = (independenceCrosstable/independentCrosstable)*100
independenceTestCrosstable

FTAG,0.0,1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0
FTHG,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0.0,99.81355,95.447851,95.565009,115.91073,132.503065,125.612332,169.875725,141.563104,424.689312,424.689312
1.0,94.379126,103.075432,103.20945,99.048678,105.30213,101.122404,119.661512,99.717926,0.0,0.0
2.0,97.75788,103.881548,105.807835,89.232821,75.274062,86.42919,20.454908,0.0,0.0,0.0
3.0,108.636352,95.884774,97.582334,97.418605,86.262783,58.41196,82.944984,276.483279,0.0,0.0
4.0,116.288566,94.386574,87.604208,99.829219,73.228571,96.692656,114.419643,0.0,0.0,0.0
5.0,135.173034,81.13327,76.701621,88.212012,105.148718,277.681473,0.0,0.0,0.0,0.0
6.0,130.932312,134.238683,31.907874,50.96694,0.0,0.0,0.0,0.0,0.0,0.0
7.0,66.450609,123.280423,136.748033,81.911154,0.0,0.0,0.0,0.0,0.0,0.0
8.0,248.082275,0.0,95.723623,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9.0,310.102843,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


# 4. Model and Inference

In [10]:
def tau(x, y, _lambda, mu, rho):
    if (x==0 and y==0):
        return (1 - _lambda*mu*rho)
    elif (x==0 and y==1):
        return (1 + _lambda*rho)
    elif (x==1 and y==0):
        return (1 + mu*rho)
    elif (x==1 and y==1):
        return (1 - rho)
    else:
        return 1.

In [11]:
def match_log_likelihood(x_k, y_k, alpha_ik, beta_ik, alpha_jk, beta_jk, rho, gamma):
    
    lambda_k = np.exp(alpha_ik + beta_jk + gamma)
    mu_k = np.exp(alpha_jk + beta_ik)
    
    return (
        np.log(tau(x_k, y_k, lambda_k, mu_k, rho))
        + np.log(poisson.pmf(x_k, lambda_k))
        + np.log(poisson.pmf(y_k, mu_k))
    )

In [12]:
epl1718 = pd.read_csv(path+"\\football-data EPL 17-18.csv")

In [13]:
dataset = epl1718[["HomeTeam", "AwayTeam", "FTHG", "FTAG"]]
dataset.head()

Unnamed: 0,HomeTeam,AwayTeam,FTHG,FTAG
0,Arsenal,Leicester,4,3
1,Brighton,Man City,0,2
2,Chelsea,Burnley,2,3
3,Crystal Palace,Huddersfield,0,3
4,Everton,Stoke,1,0


In [14]:
teams = dataset['HomeTeam'].unique()
n_teams = len(teams)

In [15]:
init_vals = np.concatenate((np.random.uniform(0,1,(n_teams)), # attack strength
                              np.random.uniform(0,-1,(n_teams)), # defence strength
                              np.array([0, 1.0]) # rho (score correction), gamma (home advantage)
                             ))

In [16]:
def log_likelhood(params):
    
    score_coefs = dict(zip(teams, params[:n_teams]))
    defend_coefs = dict(zip(teams, params[n_teams:(2*n_teams)]))
    rho, gamma = params[-2:]
    
    log_like = [match_log_likelihood(row.FTHG, row.FTAG, score_coefs[row.HomeTeam], defend_coefs[row.HomeTeam],
             score_coefs[row.AwayTeam], defend_coefs[row.AwayTeam], rho, gamma) for row in dataset.itertuples()]
    
    return -sum(log_like)

In [17]:
def equality_constraint(params):
    return (sum(params[:n_teams]) - n_teams)

In [18]:
opt_params = minimize(
    log_likelhood,
    init_vals, 
    constraints={'type':'eq', 'fun': equality_constraint}, 
    options={'disp': True, 'maxiter':100}
)

  + np.log(poisson.pmf(x_k, lambda_k))
  + np.log(poisson.pmf(y_k, mu_k))
  np.log(tau(x_k, y_k, lambda_k, mu_k, rho))


Optimization terminated successfully    (Exit mode 0)
            Current function value: 1050.800745422244
            Iterations: 56
            Function evaluations: 2492
            Gradient evaluations: 56


In [19]:
parameters = dict(zip(["attack_"+team for team in teams] + 
                        ["defence_"+team for team in teams] +
                        ['rho', 'home_adv'],
                        opt_params.x))
parameters

{'attack_Arsenal': 1.4475818708141488,
 'attack_Brighton': 0.6846846184207547,
 'attack_Chelsea': 1.2572226728797695,
 'attack_Crystal Palace': 0.9493696104566667,
 'attack_Everton': 0.9377343932111655,
 'attack_Southampton': 0.7651761561942642,
 'attack_Watford': 0.933868375062114,
 'attack_West Brom': 0.5837440977620745,
 'attack_Man United': 1.3309414126371109,
 'attack_Newcastle': 0.7670295045484432,
 'attack_Bournemouth': 0.9564006354441252,
 'attack_Burnley': 0.6983185694002149,
 'attack_Leicester': 1.189889626933518,
 'attack_Liverpool': 1.5643615678409812,
 'attack_Stoke': 0.719583287822325,
 'attack_Swansea': 0.4664421510946951,
 'attack_Huddersfield': 0.48932999897352847,
 'attack_Tottenham': 1.4273450439774522,
 'attack_Man City': 1.7860035722269434,
 'attack_West Ham': 1.0449728342997,
 'defence_Arsenal': -0.9058110468028684,
 'defence_Brighton': -0.8945998829214581,
 'defence_Chelsea': -1.2203514326577272,
 'defence_Crystal Palace': -0.853671352685454,
 'defence_Everton': 

In [20]:
# Use these parameters to make predictions

def calc_means(param_dict, homeTeam, awayTeam):
    return [np.exp(param_dict['attack_'+homeTeam] + param_dict['defence_'+awayTeam] + param_dict['home_adv']),
            np.exp(param_dict['defence_'+homeTeam] + param_dict['attack_'+awayTeam])]

def dixon_coles_simulate_match(params_dict, homeTeam, awayTeam, max_goals=10):
    team_avgs = calc_means(params_dict, homeTeam, awayTeam)
    team_pred = [[poisson.pmf(i, team_avg) for i in range(0, max_goals+1)] for team_avg in team_avgs]
    output_matrix = np.outer(np.array(team_pred[0]), np.array(team_pred[1]))
    correction_matrix = np.array([[tau(home_goals, away_goals, team_avgs[0],
                                                   team_avgs[1], params_dict['rho']) for away_goals in range(2)]
                                   for home_goals in range(2)])
    output_matrix[:2,:2] = output_matrix[:2,:2] * correction_matrix
    return output_matrix

In [21]:
def probabilities(output_matrix) -> list[str, str, str]:  # [home_win_prob, draw_prob, away_win_prob]
        home_win_prob = np.tril(out).sum() - np.trace(out)
        draw_prob = np.trace(out)
        away_win_prob = np.triu(out).sum() - np.trace(out)
        return [home_win_prob, draw_prob, away_win_prob]

## Model Enhancement

In [22]:
df_me = epl1718.copy()

In [23]:
# Dates come in the two formats which is a pain
df_me["lower_case_dates"] = pd.to_datetime(df_me["Date"], format='%d/%m/%y', errors='coerce')
df_me["upper_case_dates"] = pd.to_datetime(df_me["Date"], format='%d/%m/%Y', errors='coerce')
df_me["lower_case_dates"].fillna(df_me["upper_case_dates"], inplace=True)
df_me["Date"] = df_me["lower_case_dates"]
df_me.drop(columns=["lower_case_dates", "upper_case_dates"], inplace=True)

In [24]:
df_me['time_diff'] = (max(df_me['Date']) - df_me['Date']).dt.days
df_me = df_me[['HomeTeam','AwayTeam','FTHG','FTAG', 'FTR', 'time_diff']]
df_me

Unnamed: 0,HomeTeam,AwayTeam,FTHG,FTAG,FTR,time_diff
0,Arsenal,Leicester,4,3,H,275
1,Brighton,Man City,0,2,A,274
2,Chelsea,Burnley,2,3,A,274
3,Crystal Palace,Huddersfield,0,3,A,274
4,Everton,Stoke,1,0,H,274
...,...,...,...,...,...,...
375,Newcastle,Chelsea,3,0,H,0
376,Southampton,Man City,0,1,A,0
377,Swansea,Stoke,1,2,A,0
378,Tottenham,Leicester,5,4,H,0


In [25]:
def solve_parameters_decay(dataset, xi, init_vals=None, options={'disp': True, 'maxiter':100}):
    teams = np.sort(dataset['HomeTeam'].unique())
    n_teams = len(teams)
    
    if init_vals is None:
        init_vals = np.concatenate((np.random.uniform(0,1,(n_teams)), # attack strength
                              np.random.uniform(0,-1,(n_teams)), # defence strength
                              np.array([0, 1.0]) # rho (score correction), gamma (home advantage)
                             ))
    
    def me_match_log_likelihood(x_k, y_k, alpha_ik, beta_ik, alpha_jk, beta_jk, rho, gamma, t, xi):
        
        lambda_k = np.exp(alpha_ik + beta_jk + gamma)
        mu_k = np.exp(alpha_jk + beta_ik)
        
        return (
            np.exp(-xi*t)
            *
            (
                np.log(tau(x_k, y_k, lambda_k, mu_k, rho))
            + np.log(poisson.pmf(x_k, lambda_k))
            + np.log(poisson.pmf(y_k, mu_k))
            )
        )

    def me_log_likelhood(params):
        
        score_coefs = dict(zip(teams, params[:n_teams]))
        defend_coefs = dict(zip(teams, params[n_teams:(2*n_teams)]))
        rho, gamma = params[-2:]

        log_like = [me_match_log_likelihood(row.FTHG, row.FTAG, score_coefs[row.HomeTeam], defend_coefs[row.HomeTeam],
                 score_coefs[row.AwayTeam], defend_coefs[row.AwayTeam], rho, gamma, row.time_diff, xi) for row in dataset.itertuples()]

        return -sum(log_like)
    
    def equality_constraint(params):
        return (sum(params[:n_teams]) - n_teams)
    
    opt_ouptut = minimize(
        me_log_likelhood, 
        init_vals, 
        constraints={'type':'eq', 'fun': equality_constraint}, 
        options={'disp': True, 'maxiter':100}
    )
    
    parameters = dict(zip(["attack_"+team for team in teams] + 
                        ["defence_"+team for team in teams] +
                        ['rho', 'home_adv'],
                        opt_params.x))


    return parameters

In [26]:
params_xi = solve_parameters_decay(df_me, xi=0.0018)

  + np.log(poisson.pmf(x_k, lambda_k))
  + np.log(poisson.pmf(y_k, mu_k))
  np.log(tau(x_k, y_k, lambda_k, mu_k, rho))


Optimization terminated successfully    (Exit mode 0)
            Current function value: 832.6598931777829
            Iterations: 43
            Function evaluations: 1913
            Gradient evaluations: 43


In [27]:
params_xi

{'attack_Arsenal': 1.4475975309144724,
 'attack_Bournemouth': 0.6846901186705484,
 'attack_Brighton': 1.2572215227107404,
 'attack_Burnley': 0.949349741939888,
 'attack_Chelsea': 0.9377334980689495,
 'attack_Crystal Palace': 0.7651692936737745,
 'attack_Everton': 0.9338934970517637,
 'attack_Huddersfield': 0.5837283952479225,
 'attack_Leicester': 1.3309417053379093,
 'attack_Liverpool': 0.7670270109891769,
 'attack_Man City': 0.9564158796121193,
 'attack_Man United': 0.6983313556861245,
 'attack_Newcastle': 1.1899034527545171,
 'attack_Southampton': 1.5643724541866344,
 'attack_Stoke': 0.7195611386221663,
 'attack_Swansea': 0.4664416970309393,
 'attack_Tottenham': 0.4893052648604346,
 'attack_Watford': 1.427343582858235,
 'attack_West Brom': 1.7860048112733684,
 'attack_West Ham': 1.0449680485103217,
 'defence_Arsenal': -0.9057588747615293,
 'defence_Bournemouth': -0.8945616284129333,
 'defence_Brighton': -1.2203561032791284,
 'defence_Burnley': -0.853700110220836,
 'defence_Chelsea': 

In [60]:
def calc_means(param_dict, homeTeam, awayTeam):
    return [np.exp(param_dict['attack_'+homeTeam] + param_dict['defence_'+awayTeam] + param_dict['home_adv']),
            np.exp(param_dict['defence_'+homeTeam] + param_dict['attack_'+awayTeam])]

def dixon_coles_simulate_match(params_dict, homeTeam, awayTeam, max_goals=10):
    team_avgs = calc_means(params_dict, homeTeam, awayTeam)
    team_pred = [[poisson.pmf(i, team_avg) for i in range(0, max_goals+1)] for team_avg in team_avgs]
    output_matrix = np.outer(np.array(team_pred[0]), np.array(team_pred[1]))
    correction_matrix = np.array([[tau(home_goals, away_goals, team_avgs[0],
                                                   team_avgs[1], params_dict['rho']) for away_goals in range(2)]
                                   for home_goals in range(2)])
    output_matrix[:2,:2] = output_matrix[:2,:2] * correction_matrix
    return output_matrix

def get_1x2_probs(match_score_matrix):
    return dict({"H":np.sum(np.tril(match_score_matrix, -1)), 
                 "A":np.sum(np.triu(match_score_matrix, 1)), "D":np.sum(np.diag(match_score_matrix))})

def build_temp_model(dataset, time_diff, xi, init_params=None):
    test_dataset = dataset[((dataset['time_diff']<=time_diff) & (dataset['time_diff']>=time_diff-2))]
    if len(test_dataset)==0:
        return 0
    train_dataset = dataset[dataset['time_diff']>time_diff]
    train_dataset['time_diff'] = train_dataset['time_diff'] - time_diff
    params = solve_parameters_decay(train_dataset, xi=xi, init_vals=init_params)
    predictive_score = sum([np.log(get_1x2_probs(dixon_coles_simulate_match(
                    params, row.HomeTeam, row.AwayTeam))[row.FTR]) for row in test_dataset.itertuples()])
    return predictive_score

def check_xi(match_day):
    xi_score = build_temp_model(df_me, match_day, my_xi)
    return xi_score

In [61]:
my_xi = 0.
match_days = [day for day in range(99,-1,-3) if len(df_me[((df_me['time_diff']<=day) & (df_me['time_diff']>=(day-2)))])]

In [63]:
h = [build_temp_model(df_me, day, my_xi) for day in match_days]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_dataset['time_diff'] = train_dataset['time_diff'] - time_diff
  + np.log(poisson.pmf(x_k, lambda_k))
  + np.log(poisson.pmf(y_k, mu_k))
  np.log(tau(x_k, y_k, lambda_k, mu_k, rho))


Optimization terminated successfully    (Exit mode 0)
            Current function value: 682.7011311475158
            Iterations: 50
            Function evaluations: 2220
            Gradient evaluations: 50


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_dataset['time_diff'] = train_dataset['time_diff'] - time_diff
  + np.log(poisson.pmf(x_k, lambda_k))
  + np.log(poisson.pmf(y_k, mu_k))
  np.log(tau(x_k, y_k, lambda_k, mu_k, rho))


Optimization terminated successfully    (Exit mode 0)
            Current function value: 713.0086368659736
            Iterations: 56
            Function evaluations: 2486
            Gradient evaluations: 56


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_dataset['time_diff'] = train_dataset['time_diff'] - time_diff
  + np.log(poisson.pmf(x_k, lambda_k))
  + np.log(poisson.pmf(y_k, mu_k))
  np.log(tau(x_k, y_k, lambda_k, mu_k, rho))


Optimization terminated successfully    (Exit mode 0)
            Current function value: 739.4745680493337
            Iterations: 56
            Function evaluations: 2485
            Gradient evaluations: 56


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_dataset['time_diff'] = train_dataset['time_diff'] - time_diff
  + np.log(poisson.pmf(x_k, lambda_k))
  + np.log(poisson.pmf(y_k, mu_k))
  np.log(tau(x_k, y_k, lambda_k, mu_k, rho))


Optimization terminated successfully    (Exit mode 0)
            Current function value: 741.6221118174285
            Iterations: 45
            Function evaluations: 2002
            Gradient evaluations: 45


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_dataset['time_diff'] = train_dataset['time_diff'] - time_diff
  + np.log(poisson.pmf(x_k, lambda_k))
  + np.log(poisson.pmf(y_k, mu_k))
  np.log(tau(x_k, y_k, lambda_k, mu_k, rho))


Optimization terminated successfully    (Exit mode 0)
            Current function value: 767.4361464449499
            Iterations: 55
            Function evaluations: 2443
            Gradient evaluations: 55


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_dataset['time_diff'] = train_dataset['time_diff'] - time_diff
  + np.log(poisson.pmf(x_k, lambda_k))
  + np.log(poisson.pmf(y_k, mu_k))
  np.log(tau(x_k, y_k, lambda_k, mu_k, rho))


Optimization terminated successfully    (Exit mode 0)
            Current function value: 770.2339346282233
            Iterations: 54
            Function evaluations: 2403
            Gradient evaluations: 54


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_dataset['time_diff'] = train_dataset['time_diff'] - time_diff
  + np.log(poisson.pmf(x_k, lambda_k))
  + np.log(poisson.pmf(y_k, mu_k))
  np.log(tau(x_k, y_k, lambda_k, mu_k, rho))


Optimization terminated successfully    (Exit mode 0)
            Current function value: 793.021773072453
            Iterations: 55
            Function evaluations: 2440
            Gradient evaluations: 55


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_dataset['time_diff'] = train_dataset['time_diff'] - time_diff
  + np.log(poisson.pmf(x_k, lambda_k))
  + np.log(poisson.pmf(y_k, mu_k))
  np.log(tau(x_k, y_k, lambda_k, mu_k, rho))


Optimization terminated successfully    (Exit mode 0)
            Current function value: 797.1296464324214
            Iterations: 48
            Function evaluations: 2131
            Gradient evaluations: 48


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_dataset['time_diff'] = train_dataset['time_diff'] - time_diff
  + np.log(poisson.pmf(x_k, lambda_k))
  + np.log(poisson.pmf(y_k, mu_k))
  np.log(tau(x_k, y_k, lambda_k, mu_k, rho))


Optimization terminated successfully    (Exit mode 0)
            Current function value: 817.3063418732966
            Iterations: 55
            Function evaluations: 2446
            Gradient evaluations: 55


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_dataset['time_diff'] = train_dataset['time_diff'] - time_diff
  + np.log(poisson.pmf(x_k, lambda_k))
  + np.log(poisson.pmf(y_k, mu_k))
  np.log(tau(x_k, y_k, lambda_k, mu_k, rho))


Optimization terminated successfully    (Exit mode 0)
            Current function value: 825.7672470281091
            Iterations: 43
            Function evaluations: 1914
            Gradient evaluations: 43


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_dataset['time_diff'] = train_dataset['time_diff'] - time_diff
  + np.log(poisson.pmf(x_k, lambda_k))
  + np.log(poisson.pmf(y_k, mu_k))
  np.log(tau(x_k, y_k, lambda_k, mu_k, rho))


Optimization terminated successfully    (Exit mode 0)
            Current function value: 836.3313817078588
            Iterations: 42
            Function evaluations: 1869
            Gradient evaluations: 42


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_dataset['time_diff'] = train_dataset['time_diff'] - time_diff
  + np.log(poisson.pmf(x_k, lambda_k))
  + np.log(poisson.pmf(y_k, mu_k))
  np.log(tau(x_k, y_k, lambda_k, mu_k, rho))


Optimization terminated successfully    (Exit mode 0)
            Current function value: 856.1433039023036
            Iterations: 49
            Function evaluations: 2181
            Gradient evaluations: 49


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_dataset['time_diff'] = train_dataset['time_diff'] - time_diff
  + np.log(poisson.pmf(x_k, lambda_k))
  + np.log(poisson.pmf(y_k, mu_k))
  np.log(tau(x_k, y_k, lambda_k, mu_k, rho))


Optimization terminated successfully    (Exit mode 0)
            Current function value: 861.9934257026172
            Iterations: 55
            Function evaluations: 2445
            Gradient evaluations: 55


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_dataset['time_diff'] = train_dataset['time_diff'] - time_diff
  + np.log(poisson.pmf(x_k, lambda_k))
  + np.log(poisson.pmf(y_k, mu_k))
  np.log(tau(x_k, y_k, lambda_k, mu_k, rho))


Optimization terminated successfully    (Exit mode 0)
            Current function value: 890.3386224821855
            Iterations: 56
            Function evaluations: 2488
            Gradient evaluations: 56


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_dataset['time_diff'] = train_dataset['time_diff'] - time_diff
  + np.log(poisson.pmf(x_k, lambda_k))
  + np.log(poisson.pmf(y_k, mu_k))
  np.log(tau(x_k, y_k, lambda_k, mu_k, rho))


Optimization terminated successfully    (Exit mode 0)
            Current function value: 917.6744494649662
            Iterations: 54
            Function evaluations: 2400
            Gradient evaluations: 54


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_dataset['time_diff'] = train_dataset['time_diff'] - time_diff
  + np.log(poisson.pmf(x_k, lambda_k))
  + np.log(poisson.pmf(y_k, mu_k))
  np.log(tau(x_k, y_k, lambda_k, mu_k, rho))


Optimization terminated successfully    (Exit mode 0)
            Current function value: 924.0722342784899
            Iterations: 48
            Function evaluations: 2139
            Gradient evaluations: 48


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_dataset['time_diff'] = train_dataset['time_diff'] - time_diff
  + np.log(poisson.pmf(x_k, lambda_k))
  + np.log(poisson.pmf(y_k, mu_k))
  np.log(tau(x_k, y_k, lambda_k, mu_k, rho))


Optimization terminated successfully    (Exit mode 0)
            Current function value: 935.3773529001893
            Iterations: 54
            Function evaluations: 2402
            Gradient evaluations: 54


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_dataset['time_diff'] = train_dataset['time_diff'] - time_diff
  + np.log(poisson.pmf(x_k, lambda_k))
  + np.log(poisson.pmf(y_k, mu_k))
  np.log(tau(x_k, y_k, lambda_k, mu_k, rho))


Optimization terminated successfully    (Exit mode 0)
            Current function value: 944.7345980696566
            Iterations: 56
            Function evaluations: 2489
            Gradient evaluations: 56


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_dataset['time_diff'] = train_dataset['time_diff'] - time_diff
  + np.log(poisson.pmf(x_k, lambda_k))
  + np.log(poisson.pmf(y_k, mu_k))
  np.log(tau(x_k, y_k, lambda_k, mu_k, rho))


Optimization terminated successfully    (Exit mode 0)
            Current function value: 972.4028920566312
            Iterations: 43
            Function evaluations: 1915
            Gradient evaluations: 43


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_dataset['time_diff'] = train_dataset['time_diff'] - time_diff
  + np.log(poisson.pmf(x_k, lambda_k))
  + np.log(poisson.pmf(y_k, mu_k))
  np.log(tau(x_k, y_k, lambda_k, mu_k, rho))


Optimization terminated successfully    (Exit mode 0)
            Current function value: 1003.0129328953776
            Iterations: 55
            Function evaluations: 2449
            Gradient evaluations: 55


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_dataset['time_diff'] = train_dataset['time_diff'] - time_diff
  + np.log(poisson.pmf(x_k, lambda_k))
  + np.log(poisson.pmf(y_k, mu_k))
  np.log(tau(x_k, y_k, lambda_k, mu_k, rho))


Optimization terminated successfully    (Exit mode 0)
            Current function value: 1015.4946673567883
            Iterations: 57
            Function evaluations: 2534
            Gradient evaluations: 57


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_dataset['time_diff'] = train_dataset['time_diff'] - time_diff
  + np.log(poisson.pmf(x_k, lambda_k))
  + np.log(poisson.pmf(y_k, mu_k))
  np.log(tau(x_k, y_k, lambda_k, mu_k, rho))


Optimization terminated successfully    (Exit mode 0)
            Current function value: 1018.2426978575662
            Iterations: 48
            Function evaluations: 2138
            Gradient evaluations: 48


In [64]:
h

[-9.793710510923653,
 -13.377836168624965,
 -2.614651466027779,
 -12.56391295591882,
 -2.151609759598545,
 -10.567785384725537,
 -1.1493995291137524,
 -8.792416027562439,
 -3.681616754140795,
 -6.755145459934086,
 -11.890830788712638,
 -2.031622014116496,
 -16.39772507681777,
 -12.631723205308889,
 -4.180761185282476,
 -6.541363101070569,
 -3.1393001313772815,
 -11.91811172661839,
 -9.931534731160466,
 -4.936857906205047,
 -1.1160957928207516,
 -15.767660738777376]

In [66]:
sum(h)

-171.9316704148385