# Code for the regression for the effect of managerial decision-making on the flow of momentum

In [None]:
#import pitch control and PCV file
import pickle
import statsmodels.api as sm
import pandas as pd
from progressbar import ProgressBar
infile = open('input PC regression decisions on Momentum.p','rb')
df_dec = pickle.load(infile)
infile.close()

In [None]:
#create three additional columns for binary values for the type of decision
neutral = []
offensive = []
averse = []
for i in range(len(df_dec)):
    if df_dec.iloc[i]['Type of decision'] == 'neutral':
        neutral.append(1)
    else:
        neutral.append(0)
    if df_dec.iloc[i]['Type of decision'] == 'offensive':
        offensive.append(1)
    else:
        offensive.append(0)
    if df_dec.iloc[i]['Type of decision'] == 'defensive':
        averse.append(1)
    else:
        averse.append(0)
        
df_dec['neutral'] = neutral
df_dec['offensive'] = offensive
df_dec['defensive'] = averse
    

In [None]:
#create additional columns for the pitch control and PCV (WPC) values before and after the decisions depending on the team
final_pc_b = []
final_wpc_b = []
final_pc_a = []
final_wpc_a = []
for i in range(len(df_dec)):
    if df_dec.iloc[i]['team'] == 'Home':
        final_pc_b.append(df_dec.iloc[i]['PC home 5min before'])
        final_wpc_b.append(df_dec.iloc[i]['WPC home 5min before'])
        final_pc_a.append(df_dec.iloc[i]['PC home 5min after'])
        final_wpc_a.append(df_dec.iloc[i]['WPC home 5min after'])
    else:
        final_pc_b.append(df_dec.iloc[i]['PC away 5min before'])
        final_wpc_b.append(df_dec.iloc[i]['WPC away 5min before'])
        final_pc_a.append(df_dec.iloc[i]['PC away 5min after'])
        final_wpc_a.append(df_dec.iloc[i]['WPC away 5min after'])
df_dec['PC team 5min before'] = final_pc_b
df_dec['WPC team 5min before'] = final_wpc_b
df_dec['PC team 5min after'] = final_pc_a
df_dec['WPC team 5min after'] = final_wpc_a

In [None]:
#normalize the PCV values
maximum = max(df_dec['WPC team 5min before'].max(), df_dec['WPC team 5min after'].max())
minimum = min(df_dec['WPC team 5min before'].min(), df_dec['WPC team 5min after'].min())
df_dec['WPC team 5min before'] = (df_dec['WPC team 5min before'] - minimum)/(maximum-minimum)
df_dec['WPC team 5min after'] = (df_dec['WPC team 5min after'] - minimum)/(maximum-minimum)

In [None]:
#only select the relevant columns
import pandas as pd
df_dec = df_dec.dropna()
df = pd.DataFrame(df_dec,columns=['PC team 5min before','PC team 5min after', 'WPC team 5min before', 'WPC team 5min after', 'neutral', 'offensive', 'defensive'])

In [None]:
#regression for pitch control and offensive decisions
p_valuepc = []
p_valuetype = []
pc_param = []
type_param = []
rsq = []
mse = []
pbar = ProgressBar()
#iterate the regression over a hundred times
for i in pbar(range(100)):
    pc_before = []
    pc_after = []
    wpc_before = []
    wpc_after = []
    offensive = []
    neutral = []
    defensive = []
    df_offensive = df.loc[df['offensive'] == 1]
    df_defensive = df.loc[df['defensive'] == 1]
    df_neutral = df.loc[df['neutral'] == 1]
    #take 80% random samples of the types of decisins to solve the imblanced decisions    
    df_offensive = df_offensive.sample(frac=0.8, replace=True)
    df_defensive = df_defensive.sample(frac=0.8, replace=True)
    df_neutral = df_neutral.sample(frac=0.8, replace=True)
    df_new = pd.concat([df_offensive, df_defensive, df_neutral])
    for j in range(len(df_new)):
        pc_before.append(df_new.iloc[j]['PC team 5min before'])
        pc_after.append(df_new.iloc[j]['PC team 5min after'])
        wpc_before.append(df_new.iloc[j]['WPC team 5min before'])
        wpc_after.append(df_new.iloc[j]['WPC team 5min after'])
        offensive.append(df_new.iloc[j]['offensive'])
        defensive.append(df_new.iloc[j]['defensive'])
        neutral.append(df_new.iloc[j]['neutral'])

    #create dataframe with the necessary data for regression
    data = {'pc before': pc_before,
           'pc after': pc_after,
           'wpc before': wpc_before,
           'wpc after': wpc_after,
           'offensive': offensive,
           'defensive': defensive,
           'neutral': neutral}
    df_reg = pd.DataFrame(data)
    
    #Execute the regression
    X = df_reg[['pc before', 'offensive']]
    y = df_reg['pc after']

    model = sm.OLS(y, X).fit()
    predictions = model.predict(X) # make the predictions by the model

    #Create a dataframe with the important statistics
    p_valuepc.append(model.pvalues[0])
    p_valuetype.append(model.pvalues[1])
    pc_param.append(model.params[0])
    type_param.append(model.params[1])
    rsq.append(model.rsquared_adj)
    mse.append(model.mse_model)
data = {'p_value pc': p_valuepc,
       'p_value type': p_valuetype,
       'beta pc': pc_param,
       'beta type': type_param,
       'rsquared': rsq,
       'MSE': mse}

#final dataframe with the statistics this regression
df_pc_offensive = pd.DataFrame(data)
     

In [None]:
#regression for pitch control and defensive decisions
p_valuepc = []
p_valuetype = []
pc_param = []
type_param = []
rsq = []
mse = []
pbar = ProgressBar()
#iterate the regression over a hundred times
for i in pbar(range(100)):
    pc_before = []
    pc_after = []
    wpc_before = []
    wpc_after = []
    offensive = []
    neutral = []
    defensive = []
    df_offensive = df.loc[df['offensive'] == 1]
    df_defensive = df.loc[df['defensive'] == 1]
    df_neutral = df.loc[df['neutral'] == 1]
    #take 80% random samples of the types of decisins to solve the imblanced decisions    
    df_offensive = df_offensive.sample(frac=0.8, replace=True)
    df_defensive = df_defensive.sample(frac=0.8, replace=True)
    df_neutral = df_neutral.sample(frac=0.8, replace=True)
    df_new = pd.concat([df_offensive, df_defensive, df_neutral])
    for j in range(len(df_new)):
        pc_before.append(df_new.iloc[j]['PC team 5min before'])
        pc_after.append(df_new.iloc[j]['PC team 5min after'])
        wpc_before.append(df_new.iloc[j]['WPC team 5min before'])
        wpc_after.append(df_new.iloc[j]['WPC team 5min after'])
        offensive.append(df_new.iloc[j]['offensive'])
        defensive.append(df_new.iloc[j]['defensive'])
        neutral.append(df_new.iloc[j]['neutral'])

    #create dataframe with the necessary data for regression
    data = {'pc before': pc_before,
           'pc after': pc_after,
           'wpc before': wpc_before,
           'wpc after': wpc_after,
           'offensive': offensive,
           'defensive': defensive,
           'neutral': neutral}
    df_reg = pd.DataFrame(data)
    
    #Execute the regression
    X = df_reg[['pc before', 'defensive']]
    y = df_reg['pc after']

    model = sm.OLS(y, X).fit()
    predictions = model.predict(X) # make the predictions by the model

    #Create a dataframe with the important statistics
    p_valuepc.append(model.pvalues[0])
    p_valuetype.append(model.pvalues[1])
    pc_param.append(model.params[0])
    type_param.append(model.params[1])
    rsq.append(model.rsquared_adj)
    mse.append(model.mse_model)
data = {'p_value pc': p_valuepc,
       'p_value type': p_valuetype,
       'beta pc': pc_param,
       'beta type': type_param,
       'rsquared': rsq,
       'MSE': mse}

#final dataframe with the statistics this regression
df_pc_defensive = pd.DataFrame(data)
     

In [None]:
#regression for pitch control and neutral decisions
p_valuepc = []
p_valuetype = []
pc_param = []
type_param = []
rsq = []
mse = []
pbar = ProgressBar()
#iterate the regression over a hundred times
for i in pbar(range(100)):
    pc_before = []
    pc_after = []
    wpc_before = []
    wpc_after = []
    offensive = []
    neutral = []
    defensive = []
    df_offensive = df.loc[df['offensive'] == 1]
    df_defensive = df.loc[df['defensive'] == 1]
    df_neutral = df.loc[df['neutral'] == 1]
    #take 80% random samples of the types of decisins to solve the imblanced decisions    
    df_offensive = df_offensive.sample(frac=0.8, replace=True)
    df_defensive = df_defensive.sample(frac=0.8, replace=True)
    df_neutral = df_neutral.sample(frac=0.8, replace=True)
    df_new = pd.concat([df_offensive, df_defensive, df_neutral])
    for j in range(len(df_new)):
        pc_before.append(df_new.iloc[j]['PC team 5min before'])
        pc_after.append(df_new.iloc[j]['PC team 5min after'])
        wpc_before.append(df_new.iloc[j]['WPC team 5min before'])
        wpc_after.append(df_new.iloc[j]['WPC team 5min after'])
        offensive.append(df_new.iloc[j]['offensive'])
        defensive.append(df_new.iloc[j]['defensive'])
        neutral.append(df_new.iloc[j]['neutral'])

    #create dataframe with the necessary data for regression
    data = {'pc before': pc_before,
           'pc after': pc_after,
           'wpc before': wpc_before,
           'wpc after': wpc_after,
           'offensive': offensive,
           'defensive': defensive,
           'neutral': neutral}
    df_reg = pd.DataFrame(data)
    
    #Execute the regression
    X = df_reg[['pc before', 'neutral']]
    y = df_reg['pc after']

    model = sm.OLS(y, X).fit()
    predictions = model.predict(X) # make the predictions by the model

    #Create a dataframe with the important statistics
    p_valuepc.append(model.pvalues[0])
    p_valuetype.append(model.pvalues[1])
    pc_param.append(model.params[0])
    type_param.append(model.params[1])
    rsq.append(model.rsquared_adj)
    mse.append(model.mse_model)
data = {'p_value pc': p_valuepc,
       'p_value type': p_valuetype,
       'beta pc': pc_param,
       'beta type': type_param,
       'rsquared': rsq,
       'MSE': mse}

#final dataframe with the statistics this regression
df_pc_neutral = pd.DataFrame(data)
     

In [None]:
#regression for PCV and offensive decisions
p_valuepc = []
p_valuetype = []
pc_param = []
type_param = []
rsq = []
mse = []
pbar = ProgressBar()
#iterate the regression over a hundred times
for i in pbar(range(100)):
    pc_before = []
    pc_after = []
    wpc_before = []
    wpc_after = []
    offensive = []
    neutral = []
    defensive = []
    df_offensive = df.loc[df['offensive'] == 1]
    df_defensive = df.loc[df['defensive'] == 1]
    df_neutral = df.loc[df['neutral'] == 1]
    #take 80% random samples of the types of decisins to solve the imblanced decisions    
    df_offensive = df_offensive.sample(frac=0.8, replace=True)
    df_defensive = df_defensive.sample(frac=0.8, replace=True)
    df_neutral = df_neutral.sample(frac=0.8, replace=True)
    df_new = pd.concat([df_offensive, df_defensive, df_neutral])
    for j in range(len(df_new)):
        pc_before.append(df_new.iloc[j]['PC team 5min before'])
        pc_after.append(df_new.iloc[j]['PC team 5min after'])
        wpc_before.append(df_new.iloc[j]['WPC team 5min before'])
        wpc_after.append(df_new.iloc[j]['WPC team 5min after'])
        offensive.append(df_new.iloc[j]['offensive'])
        defensive.append(df_new.iloc[j]['defensive'])
        neutral.append(df_new.iloc[j]['neutral'])

    #create dataframe with the necessary data for regression
    data = {'pc before': pc_before,
           'pc after': pc_after,
           'wpc before': wpc_before,
           'wpc after': wpc_after,
           'offensive': offensive,
           'defensive': defensive,
           'neutral': neutral}
    df_reg = pd.DataFrame(data)
    
    #Execute the regression
    X = df_reg[['wpc before', 'offensive']]
    y = df_reg['wpc after']

    model = sm.OLS(y, X).fit()
    predictions = model.predict(X) # make the predictions by the model

    #Create a dataframe with the important statistics
    p_valuepc.append(model.pvalues[0])
    p_valuetype.append(model.pvalues[1])
    pc_param.append(model.params[0])
    type_param.append(model.params[1])
    rsq.append(model.rsquared_adj)
    mse.append(model.mse_model)
data = {'p_value pc': p_valuepc,
       'p_value type': p_valuetype,
       'beta pc': pc_param,
       'beta type': type_param,
       'rsquared': rsq,
       'MSE': mse}

#final dataframe with the statistics this regression
df_pcv_offensive = pd.DataFrame(data)
     

In [None]:
#regression for PCV and defensive decisions
p_valuepc = []
p_valuetype = []
pc_param = []
type_param = []
rsq = []
mse = []
pbar = ProgressBar()
#iterate the regression over a hundred times
for i in pbar(range(100)):
    pc_before = []
    pc_after = []
    wpc_before = []
    wpc_after = []
    offensive = []
    neutral = []
    defensive = []
    df_offensive = df.loc[df['offensive'] == 1]
    df_defensive = df.loc[df['defensive'] == 1]
    df_neutral = df.loc[df['neutral'] == 1]
    #take 80% random samples of the types of decisins to solve the imblanced decisions    
    df_offensive = df_offensive.sample(frac=0.8, replace=True)
    df_defensive = df_defensive.sample(frac=0.8, replace=True)
    df_neutral = df_neutral.sample(frac=0.8, replace=True)
    df_new = pd.concat([df_offensive, df_defensive, df_neutral])
    for j in range(len(df_new)):
        pc_before.append(df_new.iloc[j]['PC team 5min before'])
        pc_after.append(df_new.iloc[j]['PC team 5min after'])
        wpc_before.append(df_new.iloc[j]['WPC team 5min before'])
        wpc_after.append(df_new.iloc[j]['WPC team 5min after'])
        offensive.append(df_new.iloc[j]['offensive'])
        defensive.append(df_new.iloc[j]['defensive'])
        neutral.append(df_new.iloc[j]['neutral'])

    #create dataframe with the necessary data for regression
    data = {'pc before': pc_before,
           'pc after': pc_after,
           'wpc before': wpc_before,
           'wpc after': wpc_after,
           'offensive': offensive,
           'defensive': defensive,
           'neutral': neutral}
    df_reg = pd.DataFrame(data)
    
    #Execute the regression
    X = df_reg[['wpc before', 'defensive']]
    y = df_reg['wpc after']

    model = sm.OLS(y, X).fit()
    predictions = model.predict(X) # make the predictions by the model

    #Create a dataframe with the important statistics
    p_valuepc.append(model.pvalues[0])
    p_valuetype.append(model.pvalues[1])
    pc_param.append(model.params[0])
    type_param.append(model.params[1])
    rsq.append(model.rsquared_adj)
    mse.append(model.mse_model)
data = {'p_value pc': p_valuepc,
       'p_value type': p_valuetype,
       'beta pc': pc_param,
       'beta type': type_param,
       'rsquared': rsq,
       'MSE': mse}

#final dataframe with the statistics this regression
df_pcv_defensive = pd.DataFrame(data)
     

In [None]:
#regression for PCV and neutral decisions
p_valuepc = []
p_valuetype = []
pc_param = []
type_param = []
rsq = []
mse = []
pbar = ProgressBar()
#iterate the regression over a hundred times
for i in pbar(range(100)):
    pc_before = []
    pc_after = []
    wpc_before = []
    wpc_after = []
    offensive = []
    neutral = []
    defensive = []
    df_offensive = df.loc[df['offensive'] == 1]
    df_defensive = df.loc[df['defensive'] == 1]
    df_neutral = df.loc[df['neutral'] == 1]
    #take 80% random samples of the types of decisins to solve the imblanced decisions    
    df_offensive = df_offensive.sample(frac=0.8, replace=True)
    df_defensive = df_defensive.sample(frac=0.8, replace=True)
    df_neutral = df_neutral.sample(frac=0.8, replace=True)
    df_new = pd.concat([df_offensive, df_defensive, df_neutral])
    for j in range(len(df_new)):
        pc_before.append(df_new.iloc[j]['PC team 5min before'])
        pc_after.append(df_new.iloc[j]['PC team 5min after'])
        wpc_before.append(df_new.iloc[j]['WPC team 5min before'])
        wpc_after.append(df_new.iloc[j]['WPC team 5min after'])
        offensive.append(df_new.iloc[j]['offensive'])
        defensive.append(df_new.iloc[j]['defensive'])
        neutral.append(df_new.iloc[j]['neutral'])

    #create dataframe with the necessary data for regression
    data = {'pc before': pc_before,
           'pc after': pc_after,
           'wpc before': wpc_before,
           'wpc after': wpc_after,
           'offensive': offensive,
           'defensive': defensive,
           'neutral': neutral}
    df_reg = pd.DataFrame(data)
    
    #Execute the regression
    X = df_reg[['wpc before', 'neutral']]
    y = df_reg['wpc after']

    model = sm.OLS(y, X).fit()
    predictions = model.predict(X) # make the predictions by the model

    #Create a dataframe with the important statistics
    p_valuepc.append(model.pvalues[0])
    p_valuetype.append(model.pvalues[1])
    pc_param.append(model.params[0])
    type_param.append(model.params[1])
    rsq.append(model.rsquared_adj)
    mse.append(model.mse_model)
data = {'p_value pc': p_valuepc,
       'p_value type': p_valuetype,
       'beta pc': pc_param,
       'beta type': type_param,
       'rsquared': rsq,
       'MSE': mse}

#final dataframe with the statistics this regression
df_pcv_neutral = pd.DataFrame(data)
     

In [None]:
#import xT file
import pickle
infile = open('input xT regression decisions on Momentum.p','rb')
df_dec = pickle.load(infile)
infile.close()

In [None]:
#create three additional columns for binary values for the type of decision
neutral = []
offensive = []
averse = []
for i in range(len(df_dec)):
    if df_dec.iloc[i]['Type of decision'] == 'neutral':
        neutral.append(1)
    else:
        neutral.append(0)
    if df_dec.iloc[i]['Type of decision'] == 'offensive':
        offensive.append(1)
    else:
        offensive.append(0)
    if df_dec.iloc[i]['Type of decision'] == 'defensive':
        averse.append(1)
    else:
        averse.append(0)
        
df_dec['neutral'] = neutral
df_dec['offensive'] = offensive
df_dec['defensive'] = averse

In [None]:
#create additional columns for the xT values before and after the decisions depending on the team
final_xt_b = []
final_xt_a = []
for i in range(len(df_dec)):
    if df_dec.iloc[i]['team'] == 'Home':
        final_xt_b.append(df_dec.iloc[i]['xT_prev_5min_home'])
        final_xt_a.append(df_dec.iloc[i]['xT_next_5min_home'])
    else:
        final_xt_b.append(df_dec.iloc[i]['xT_prev_5min_away'])
        final_xt_a.append(df_dec.iloc[i]['xT_next_5min_away'])
df_dec['xT team 5min before'] = final_xt_b
df_dec['xT team 5min after'] = final_xt_a

In [None]:
#normalize the xt values
maximum = max(df_dec['xT team 5min before'].max(), df_dec['xT team 5min after'].max())
minimum = min(df_dec['xT team 5min before'].min(), df_dec['xT team 5min after'].min())
df_dec['xT team 5min before'] = (df_dec['xT team 5min before'] - minimum)/(maximum-minimum)
df_dec['xT team 5min after'] = (df_dec['xT team 5min after'] - minimum)/(maximum-minimum)

In [None]:
#only select the relevant columns in the dataframe
import pandas as pd
df_dec = df_dec.dropna()
df = pd.DataFrame(df_dec,columns=['xT team 5min before','xT team 5min after', 'neutral', 'offensive', 'defensive'])

In [None]:
#regression for xT and offensive decisions
p_valuext = []
p_valuetype = []
xt_param = []
type_param = []
rsq = []
mse = []
pbar = ProgressBar()
#iterate the regression over a hundred times
for i in pbar(range(100)):
    xt_before = []
    xt_after = []
    offensive = []
    neutral = []
    defensive = []
    df_offensive = df.loc[df['offensive'] == 1]
    df_defensive = df.loc[df['defensive'] == 1]
    df_neutral = df.loc[df['neutral'] == 1]
    #take 80% random samples of the types of decisins to solve the imblanced decisions
    df_offensive = df_offensive.sample(frac=0.8, replace=True)
    df_defensive = df_defensive.sample(frac=0.8, replace=True)
    df_neutral = df_neutral.sample(frac=0.8, replace=True)
    df_new = pd.concat([df_offensive, df_defensive, df_neutral])
    for j in range(len(df_new)):
        xt_before.append(df_new.iloc[j]['xT team 5min before'])
        xt_after.append(df_new.iloc[j]['xT team 5min after'])
        offensive.append(df_new.iloc[j]['offensive'])
        defensive.append(df_new.iloc[j]['defensive'])
        neutral.append(df_new.iloc[j]['neutral'])

    #create dataframe with the necessary data for regression
    data = {'xt before': xt_before,
           'xt after': xt_after,
           'offensive': offensive,
           'defensive': defensive,
           'neutral': neutral}
    df_reg = pd.DataFrame(data)
    
    #Execute the regression
    X = df_reg[['xt before', 'offensive']]
    y = df_reg['xt after']

    model = sm.OLS(y, X).fit()
    predictions = model.predict(X) # make the predictions by the model

    #Create a dataframe with the important statistics
    p_valuext.append(model.pvalues[0])
    p_valuetype.append(model.pvalues[1])
    xt_param.append(model.params[0])
    type_param.append(model.params[1])
    rsq.append(model.rsquared_adj)
    mse.append(model.mse_model)
data = {'p_value xt': p_valuext,
       'p_value type': p_valuetype,
       'beta xt': xt_param,
       'beta type': type_param,
       'rsquared': rsq,
       'MSE': mse}

#final dataframe with the statistics this regression
df_xt_offensive = pd.DataFrame(data)
     

In [None]:
#regression for xT and defensive decisions
p_valuext = []
p_valuetype = []
xt_param = []
type_param = []
rsq = []
mse = []
pbar = ProgressBar()
#iterate the regression over a hundred times
for i in pbar(range(100)):
    xt_before = []
    xt_after = []
    offensive = []
    neutral = []
    defensive = []
    df_offensive = df.loc[df['offensive'] == 1]
    df_defensive = df.loc[df['defensive'] == 1]
    df_neutral = df.loc[df['neutral'] == 1]
    #take 80% random samples of the types of decisins to solve the imblanced decisions
    df_offensive = df_offensive.sample(frac=0.8, replace=True)
    df_defensive = df_defensive.sample(frac=0.8, replace=True)
    df_neutral = df_neutral.sample(frac=0.8, replace=True)
    df_new = pd.concat([df_offensive, df_defensive, df_neutral])
    for j in range(len(df_new)):
        xt_before.append(df_new.iloc[j]['xT team 5min before'])
        xt_after.append(df_new.iloc[j]['xT team 5min after'])
        offensive.append(df_new.iloc[j]['offensive'])
        defensive.append(df_new.iloc[j]['defensive'])
        neutral.append(df_new.iloc[j]['neutral'])

    #create dataframe with the necessary data for regression
    data = {'xt before': xt_before,
           'xt after': xt_after,
           'offensive': offensive,
           'defensive': defensive,
           'neutral': neutral}
    df_reg = pd.DataFrame(data)
    
    #Execute the regression
    X = df_reg[['xt before', 'defensive']]
    y = df_reg['xt after']

    model = sm.OLS(y, X).fit()
    predictions = model.predict(X) # make the predictions by the model

    #Create a dataframe with the important statistics
    p_valuext.append(model.pvalues[0])
    p_valuetype.append(model.pvalues[1])
    xt_param.append(model.params[0])
    type_param.append(model.params[1])
    rsq.append(model.rsquared_adj)
    mse.append(model.mse_model)
data = {'p_value xt': p_valuext,
       'p_value type': p_valuetype,
       'beta xt': xt_param,
       'beta type': type_param,
       'rsquared': rsq,
       'MSE': mse}

#final dataframe with the statistics this regression
df_xt_defensive = pd.DataFrame(data)
     

In [None]:
#regression for xT and neutral decisions
p_valuext = []
p_valuetype = []
xt_param = []
type_param = []
rsq = []
mse = []
pbar = ProgressBar()
#iterate the regression over a hundred times
for i in pbar(range(100)):
    xt_before = []
    xt_after = []
    offensive = []
    neutral = []
    defensive = []
    df_offensive = df.loc[df['offensive'] == 1]
    df_defensive = df.loc[df['defensive'] == 1]
    df_neutral = df.loc[df['neutral'] == 1]
    #take 80% random samples of the types of decisins to solve the imblanced decisions
    df_offensive = df_offensive.sample(frac=0.8, replace=True)
    df_defensive = df_defensive.sample(frac=0.8, replace=True)
    df_neutral = df_neutral.sample(frac=0.8, replace=True)
    df_new = pd.concat([df_offensive, df_defensive, df_neutral])
    for j in range(len(df_new)):
        xt_before.append(df_new.iloc[j]['xT team 5min before'])
        xt_after.append(df_new.iloc[j]['xT team 5min after'])
        offensive.append(df_new.iloc[j]['offensive'])
        defensive.append(df_new.iloc[j]['defensive'])
        neutral.append(df_new.iloc[j]['neutral'])

    #create dataframe with the necessary data for regression
    data = {'xt before': xt_before,
           'xt after': xt_after,
           'offensive': offensive,
           'defensive': defensive,
           'neutral': neutral}
    df_reg = pd.DataFrame(data)
    
    #Execute the regression
    X = df_reg[['xt before', 'neutral']]
    y = df_reg['xt after']

    model = sm.OLS(y, X).fit()
    predictions = model.predict(X) # make the predictions by the model

    #Create a dataframe with the important statistics
    p_valuext.append(model.pvalues[0])
    p_valuetype.append(model.pvalues[1])
    xt_param.append(model.params[0])
    type_param.append(model.params[1])
    rsq.append(model.rsquared_adj)
    mse.append(model.mse_model)
data = {'p_value xt': p_valuext,
       'p_value type': p_valuetype,
       'beta xt': xt_param,
       'beta type': type_param,
       'rsquared': rsq,
       'MSE': mse}

#final dataframe with the statistics this regression
df_xt_neutral = pd.DataFrame(data)
     