In [275]:
import statsmodels.api as sm
import statsmodels.stats.stattools as sm_stats
import numpy as np
import patsy
from scipy.optimize import curve_fit

import pandas as pd
import matplotlib.pyplot as plt
import math

from typing import Literal, Iterable, Any
from pathlib import Path

## Helpers

In [276]:
np.random.seed(0)
global global_index
global_index = 0

In [277]:
OLS0_ID = "OLS0" # do not change !
OLS1_ID = "OLS1" # do not change !
OLS2_ID = "OLS2" # do not change !
OLS3_ID = "OLS3" # do not change !

WLS0_ID = "WLS0" # do not change !
WLS1_ID = "WLS1" # do not change !
WLS2_ID = "WLS2" # do not change !
WLS3_ID = "WLS3" # do not change !

RLM0_HUBERT_ID = "RLM0-HuberT" # do not change !
RLM1_HUBERT_ID = "RLM1-HuberT" # do not change !
RLM2_HUBERT_ID = "RLM2-HuberT" # do not change !
RLM3_HUBERT_ID = "RLM3-HuberT" # do not change !

RLM0_TUKEY_ID = "RLM0-Tukey" # do not change !
RLM1_TUKEY_ID = "RLM1-Tukey" # do not change !
RLM2_TUKEY_ID = "RLM2-Tukey" # do not change !
RLM3_TUKEY_ID = "RLM3-Tukey" # do not change !

CUBIC_SPLINE_ID = "Cubic Spline"
NEG_EXP_ID = "Negative Exponential"
SATURATION_ID = "Saturation"

models=[
    #OLS0_ID,
    OLS1_ID,
    OLS2_ID,
    OLS3_ID,
    
    #WLS0_ID,
    WLS1_ID,
    WLS2_ID,
    WLS3_ID,
    
    #RLM0_HUBERT_ID,
    #RLM1_HUBERT_ID,
    #RLM2_HUBERT_ID,
    #RLM3_HUBERT_ID,
    
    #RLM0_TUKEY_ID,
    #RLM1_TUKEY_ID,
    #RLM2_TUKEY_ID,
    #RLM3_TUKEY_ID,
    
    #CUBIC_SPLINE_ID,
    #NEG_EXP_ID,
    #SATURATION_ID
]


saved_models = {}

### Regression

In [278]:
def create_model_summary_df(model_name, rsquared, rsquared_adj, cond_nr, jb, jb_prob,\
    skew, kurtosis, params, success, msg):
    data = {
        "model_name": model_name,
        "success": success,
        "message": msg,
        "r_squared": max(0, rsquared) if not pd.isna(rsquared) else pd.NA, # Ensuring a non-negative R² for comparability
        "r_squared_adj": max(0, rsquared_adj) if not pd.isna(rsquared_adj) else pd.NA, # Ensuring a non-negative adj R² for comparability
        "cond_nr": cond_nr,
        "jb": jb,
        "jb_prob": jb_prob,
        "skew": skew,
        "kurtosis": kurtosis,
    }
    df_key_indicators = pd.DataFrame([data])    
    return df_key_indicators


def get_linear_regression_params(params, order):
    if params is None:
        return None
    
    if order == 1: 
        return params
    
    return pd.NA

In [279]:
def rsquared_robust(y, y_pred, weights):
    y = y.reset_index(drop=True)
    y_pred = y_pred.reset_index(drop=True)

    numerator = np.sum(weights * (y - y_pred) ** 2)

    yw_bar = np.sum(weights * y) / np.sum(weights)

    denominator = np.sum(weights * (y - yw_bar) ** 2)

    r2_robust = 1 - (numerator / denominator)
    return r2_robust

In [280]:
def rsquared_pseudo(y, y_pred):
    y = y.reset_index(drop=True)
    y_pred = y_pred.reset_index(drop=True)

    numerator = np.sum((y - y_pred) ** 2)

    denominator = np.sum((y - np.mean(y)) ** 2)

    r_squared = 1 - (numerator / denominator)

    return r_squared

In [281]:
def adj_rsquared(x, y, r_squared):
    n = len(y)
    q = x.shape[1] -1 
    
    if n-q <= 0:
        return pd.NA
     
    r2_adj_robust = 1 - (1 - r_squared) * ((n - 1) / (n - q))
        
    return r2_adj_robust

In [282]:
def condition_number(x):
    u, s, vh = np.linalg.svd(x.values, full_matrices=False)
    condition_number = s.max() / s.min() 
    return condition_number

### Visualization

In [283]:
def plot_regression(x, y, y_pred):
    plt.scatter(x, y, alpha=0.5, label="Beobachtete Daten")
    
    # plt.plot(X, y_pred, color="red", label="Regressionsgerade")
    plt.scatter(x, y_pred, color="red", marker="x", label="Regressionsvorhersagen")
    
    plt.xlabel("X-Werte")
    plt.ylabel("Y-Werte")
    plt.legend()
    plt.show()
    
    
def plot_residuals(residuals):
    plt.plot(residuals)
    plt.scatter(y=residuals, x=residuals.index, label="Residuen")
    plt.axhline(y=0, color="r", linestyle="-")
    plt.legend()
    plt.show()
    
    
def plot_spline(x, y, x_pred, y_pred) -> None:
    plt.scatter(x, y, s=50, color="None", edgecolor="black", alpha=0.7, marker="o", label="Beobachtete Daten")
    plt.plot(x_pred, y_pred, color="r", linewidth=1.5, label="Kubische Spline-Regression")
    
    plt.xlabel("X-Werte")
    plt.ylabel("Y-Werte")
    plt.title("Kubische Spline-Regression (Knoten: 25%-, 50%-, 75%-Quantil)", fontsize=12)
    plt.legend()
    
    plt.show()

### General

In [284]:
def load_file(file_path):
    df = pd.read_csv(file_path)
    df["date"] = pd.to_datetime(df["date"])
    df['decade'] = (df['date'].dt.year // 10) * 10
    return df


def get_data(df, target_col):
    df = sm.add_constant(df)

    features = ['const', 'decade', 'home_team', 'away_team', 'home_team_strength', 'away_team_strength', 'home_team_form_goals', 'home_team_form_points', 'away_team_form_goals', 'away_team_form_points', 'home_advantage', 'home_team_goals_letzte_Begegnung', 'home_team_goals_vorletzte_Begegnung', 'home_team_goals_vorvorletzte_Begegnung', 'away_team_goals_letzte_Begegnung', 'away_team_goals_vorletzte_Begegnung', 'away_team_goals_vorvorletzte_Begegnung', 'home_team_geschossen_letztes_Spiel', 'home_team_kassiert_letztes_Spiel', 'home_team_geschossen_vorletztes_Spiel', 'home_team_kassiert_vorletztes_Spiel', 'home_team_geschossen_vorvorletztes_Spiel', 'home_team_kassiert_vorvorletztes_Spiel', 'away_team_geschossen_letztes_Spiel', 'away_team_kassiert_letztes_Spiel', 'away_team_geschossen_vorletztes_Spiel', 'away_team_kassiert_vorletztes_Spiel', 'away_team_geschossen_vorvorletztes_Spiel', 'away_team_kassiert_vorvorletztes_Spiel', target_col]
    features_bak = features.copy()

    # Gruppenspiele der EM 2024 herausfiltern
    df_predict = df[(df["date"].dt.year == 2024) & (df["tournament"] == "UEFA Euro")]
    df_predict = df_predict[(df_predict["home_team"].notna()) & (df_predict["away_team"].notna())]

    df = df.drop(df_predict.index)
    df = df.dropna(how="any")
    df = df[features]
    dummy_cols = ["home_team", "away_team", "tournament", "city", "country"]
    df = pd.get_dummies(df, columns=[col for col in dummy_cols if col in df.columns])
    features = df.columns.tolist()
    display(features)

    bool_cols = df.select_dtypes(include=["bool"]).columns
    df[bool_cols] = df[bool_cols].astype(int)

    x_train = df.drop(columns=[target_col])
    y_train = df[target_col]
    x_predict = df_predict[features_bak].drop(columns=[target_col]).dropna(how="any")

    return x_train, y_train, x_predict


### Creation of Plynomial Dataset

In [285]:
def get_order_0_df(df):
    return pd.DataFrame(1, columns=["const"], index=df.index) 

def get_order_1_df(df):
    df_const = get_order_0_df(df)
    return pd.concat([df_const, df], axis=1)

def get_order_2_df(df):
    df_order_1 = get_order_1_df(df)
    df_squared = df**2
    df_squared.columns = [f"{col}__squared" for col in df_squared.columns]
    return pd.concat([df_order_1, df_squared], axis=1)

def get_order_3_df(df):
    df_order_2 = get_order_2_df(df)
    df_cubed = df**3
    df_cubed.columns = [f"{col}__cubed" for col in df_cubed.columns]  
    return pd.concat([df_order_2, df_cubed], axis=1)


In [286]:
def create_polynomial_df(df, order):
    if order == 0:
        return get_order_0_df(df)
    if order == 1:
        return get_order_1_df(df)
    if order == 2:
        return get_order_2_df(df)
    if order == 3:
        return get_order_3_df(df)
    return None

### Models

#### Linear

##### Ordinary Least Squares

In [287]:
def mod_OLS(x, y, order):
    global global_index
    global_index += 1
    
    x = x.reset_index(drop=True)
    y = y.reset_index(drop=True)

    model = sm.OLS(y, x, hasconst=True)
    result = model.fit(cov_type="hc2")
    saved_models[f"OLS{order}-I_{global_index}"] = [model, result]
    #y_pred = result.predict(x)
    #plot_regression(x[x.columns[1]], y, y_pred)
    #print(result.summary())

    # plot_residuals(residuals=result.resid)

    jb, jb_pv, skew, kurtosis = sm_stats.jarque_bera(result.resid, axis=0)
    params = get_linear_regression_params(result.params[1:], order) # [1:] needed to exclude the intercept/const column of X

    key_indicators_df = create_model_summary_df(f"OLS{order}-I_{global_index}", result.rsquared, result.rsquared_adj, result.condition_number, jb, jb_pv, skew, kurtosis, params, True, None)

    return key_indicators_df

##### Weighted Least Squares<>

In [288]:
def mod_WLS(x, y, order):
    global global_index
    global_index += 1
    
    x = x.reset_index(drop=True)
    y = y.reset_index(drop=True)
    
    
    model_OLS = sm.OLS(y, x, hasconst=True)
    result_OLS = model_OLS.fit(cov_type="hc2")
    
    
    weights = 1 / (1 + result_OLS.resid ** 2) # weights = Inverses der quadrierten Residuen aus dem OLS Modell

    model_WLS = sm.WLS(y, x, weights=weights, hasconst=True)
    result_WLS = model_WLS.fit(cov_type="hc2")
    saved_models[f"WLS{order}-I_{global_index}"] = [model_WLS, result_WLS]

    # y_pred = result_WLS.predict(x)
    # plot_regression(x[x.columns[1]], y, y_pred) 
    # print(result_WLS.summary())

    # plot_residuals(result_WLS.resid)
    
    jb, jb_pv, skew, kurtosis = sm_stats.jarque_bera(result_WLS.resid, axis=0)
    params = get_linear_regression_params(result_WLS.params[1:], order) # [1:] needed to exclude the intercept/const column of X
            
    key_indicators_df = create_model_summary_df(f"WLS{order}-I_{global_index}", result_WLS.rsquared, result_WLS.rsquared_adj, result_WLS.condition_number, jb, jb_pv, skew, kurtosis, params, True, None)
       
    return key_indicators_df

##### Robust linear

In [289]:
def mod_RLM(x, y, estimator, order):
    global global_index
    global_index += 1
    
    x = x.reset_index(drop=True)
    y = y.reset_index(drop=True)
    
    
    if estimator == "HuberT":
        estimator_sm = sm.robust.norms.HuberT()
    elif estimator == "Tukey":
        estimator_sm = sm.robust.norms.TukeyBiweight()
    else:
        estimator_sm = None
    
    
    model = sm.RLM(y, x, M=estimator_sm)
    try:
        result = model.fit()
        saved_models[f"RLM{order}-I_{global_index}"] = [model, result]
    except ZeroDivisionError as e:
        return create_model_summary_df(f"RLM{order}-{estimator}-I_{global_index}", rsquared=pd.NA, rsquared_adj=pd.NA, cond_nr=np.inf, jb=pd.NA, jb_prob=pd.NA, skew=pd.NA, kurtosis=pd.NA, params=pd.NA, success=False, msg="Divide by zero")

    y_pred = result.predict(x)
    # plot_regression(x[x.columns[1]], y, y_pred)
    # print(result.summary())

    # plot_residuals(result.resid)

    rsquared = rsquared_robust(y.squeeze(), y_pred, result.weights)
    rsquared_adj = adj_rsquared(x, y, rsquared)
    condition_nr = np.linalg.cond(x.mul(np.sqrt(result.weights), axis=0)) # sqrt to minimize the impact of weighting on Ausreißer
    jb, jb_pv, skew, kurtosis = sm_stats.jarque_bera(result.resid, axis=0)
    params = get_linear_regression_params(result.params[1:], order) # [1:] needed to exclude the intercept/const column of X
        
    key_indicators_df = create_model_summary_df(f"RLM{order}-{estimator}-I_{global_index}", rsquared, rsquared_adj, condition_nr, jb, jb_pv, skew, kurtosis, params, True, None)
    

    return key_indicators_df

#### Non linear

##### Cubic Spline

In [290]:
def mod_cubic_spline_1D(x, y):
    global global_index
    global_index += 1
    
    x = x.reset_index(drop=True)
    y = y.reset_index(drop=True)
    
    x_sorted = x.sort_values(by=x.columns[0]) # 0 because df can only be 1D
    y_sorted = y.reindex(x_sorted.index) 

    knots = np.quantile(x_sorted.values, [0.25, 0.50, 0.75])

    cubic_x = patsy.dmatrix(formula_like="bs(data, knots=(knots[0], knots[1], knots[2]), degree=3, include_intercept=False)",
                            data={"data": x_sorted.iloc[:, 0]}, # ".ilox[:, 0]" bc .dmatrix only accepts data with 1 dimension
                            return_type="dataframe" )
    
    model = sm.GLM(y_sorted, cubic_x)
    result = model.fit()
    saved_models[f"{CUBIC_SPLINE_ID}-I_{global_index}"] = [model, result]

    y_pred = result.predict(cubic_x)
    # plot_spline(x_sorted[x_sorted.columns[0]], y_sorted, x_sorted[x_sorted.columns[0]], y_pred)

    residuals = y_sorted.sub(y_pred, axis=0).reset_index(drop=True) # The result df can only have 1 column bc y can have only 1 column 

    rsquared = result.pseudo_rsquared()
    rsquared_adj = adj_rsquared(cubic_x, y, rsquared)
    x_max = np.amax(x.values)
    x_min = np.amin(x.values)
    if x_max == 0 or x_min == 0:
        condition_nr = pd.NA
    else:
        condition_nr = 1.848 * abs(x_max / x_min)  
    jb, jb_pv, skew, kurtosis = sm_stats.jarque_bera(residuals.values.flatten(), axis=0) 
    params = pd.NA

    key_indicators_df = create_model_summary_df(f"{CUBIC_SPLINE_ID}-I_{global_index}", rsquared, rsquared_adj, condition_nr, jb, jb_pv, skew, kurtosis, params, True, None)

    return key_indicators_df

##### Negative exponential

In [291]:
def mod_neg_exp(x, y):
    global global_index
    global_index += 1
    
    x_np = x.values
    y_np = y.values.flatten() # Flatten bc y needs to be 1D for curve_fit()
        
        
    def neg_exp_func(x, *params):
        return params[0] * np.exp(-np.dot(x, params[1:]))


    multiplier_start = np.median(y_np) 
    exponent_factors_start = np.full(x_np.shape[1], 0.1)
    p0_start = np.concatenate([[multiplier_start], exponent_factors_start])

    try:
        popt, _ = curve_fit(neg_exp_func, x_np, y_np, p0=p0_start) # popt contains the optimized parameters, _ ignores the covariance matrix of the parameters
        saved_models[f"{NEG_EXP_ID}-I_{global_index}"] = [None, popt]
    except RuntimeError as e:
        return create_model_summary_df(f"{NEG_EXP_ID}-I_{global_index}", rsquared=pd.NA, rsquared_adj=pd.NA, cond_nr=np.inf, jb=pd.NA, jb_prob=pd.NA, skew=pd.NA, kurtosis=pd.NA, params=pd.NA, success=False, msg="Maxiter exceeded")
    except TypeError as e:
        return create_model_summary_df(f"{NEG_EXP_ID}-I_{global_index}", rsquared=pd.NA, rsquared_adj=pd.NA, cond_nr=np.inf, jb=pd.NA, jb_prob=pd.NA, skew=pd.NA, kurtosis=pd.NA, params=pd.NA, success=False, msg="Unsolvable equation system")
  
    y_pred = neg_exp_func(x_np, *popt)
    residuals = y_np - y_pred

    # plot_regression(x.iloc[:, 0], y, y_pred)
    
    jb, jb_pv, skew, kurtosis = sm_stats.jarque_bera(residuals)
    rsquared = rsquared_pseudo(y.squeeze(), pd.Series(y_pred))
    rsquared_adj = adj_rsquared(x, y, rsquared)
    condition_nr = np.linalg.cond(x)
    params = pd.NA 
    key_indicators_df = create_model_summary_df(f"{NEG_EXP_ID}-I_{global_index}", rsquared, rsquared_adj, condition_nr, jb, jb_pv, skew, kurtosis, params, True, None)
    
    return key_indicators_df

##### Saturation

In [292]:
def mod_saturation(x, y):
    global global_index
    global_index += 1
       
    x_np = x.values
    y_np = y.values.flatten()  # Flatten bc y needed for curve_fit()


    def saturation_func(x, *params):
        L = params[0] 
        sep =  math.floor(len(params)/2) + 1
        k = params[1:sep]
        x0 = params[sep:]
        
        saturation = np.zeros(x.shape[0])

        for i in range(len(k)):
            saturation += L / (1 + np.exp(-k[i] * (x[:, i] - x0[i])))
            
        return saturation


    L_start = np.percentile(y_np, 95) 
    x0_start = np.mean(x_np, axis=0) 
    k_start = np.full(x_np.shape[1], 0.1) 
    p0_start = np.concatenate([[L_start], k_start, x0_start])
    
    try:
        popt, _ = curve_fit(saturation_func, x_np, y_np, p0=p0_start) # popt contains the optimized parameters, _ ignores the covariance matrix of the parameters
        saved_models[f"{SATURATION_ID}-I_{global_index}"] = [None, popt]
    except RuntimeError as e:
        return create_model_summary_df(f"{SATURATION_ID}-I_{global_index}", rsquared=pd.NA, rsquared_adj=pd.NA, cond_nr=np.inf, jb=pd.NA, jb_prob=pd.NA, skew=pd.NA, kurtosis=pd.NA, params=pd.NA, success=False, msg="Maxiter exceeded")
    except TypeError as e:
        return create_model_summary_df(f"{SATURATION_ID}-I_{global_index}", rsquared=pd.NA, rsquared_adj=pd.NA, cond_nr=np.inf, jb=pd.NA, jb_prob=pd.NA, skew=pd.NA, kurtosis=pd.NA, params=pd.NA, success=False, msg="Unsolvable equation system")

    y_pred = saturation_func(x_np, *popt)
    residuals = y_np - y_pred
    # plot_regression(x.iloc[:, 0], y, y_pred)
    
    jb, jb_pv, skew, kurtosis = sm_stats.jarque_bera(residuals)
    rsquared = rsquared_pseudo(y.squeeze(), pd.Series(y_pred))
    rsquared_adj = adj_rsquared(x, y, rsquared)
    condition_nr = np.linalg.cond(x)
    params = pd.NA 
    key_indicators_df = create_model_summary_df(f"{SATURATION_ID}-I_{global_index}", rsquared, rsquared_adj, condition_nr, jb, jb_pv, skew, kurtosis, params, True, None)
    
    

    return key_indicators_df

### Regression controller

In [293]:
def run_regression_models(x, y, models):
    import datetime
    models_summaries_df = pd.DataFrame()
    for order in range(0, 4):
        x_order = create_polynomial_df(x, order)
        if f"OLS{order}" in models:
            print(f"{datetime.datetime.now().strftime("%H:%M")} - Starting OLS for order {order}")
            models_summaries_df = pd.concat([models_summaries_df, mod_OLS(x_order, y, order)], ignore_index=True)
            print(f"{datetime.datetime.now().strftime("%H:%M")} - Finished")
        if f"WLS{order}" in models:
            print(f"{datetime.datetime.now().strftime("%H:%M")} - Starting WLS for order {order}")
            models_summaries_df = pd.concat([models_summaries_df, mod_WLS(x_order, y, order)], ignore_index=True)
            print(f"{datetime.datetime.now().strftime("%H:%M")} - Finished")
        if f"RLM{order}-HuberT" in models:
            print(f"{datetime.datetime.now().strftime("%H:%M")} - Starting RLM-HuberT for order {order}")
            models_summaries_df = pd.concat([models_summaries_df, mod_RLM(x_order, y, "HuberT", order)], ignore_index=True)
            print(f"{datetime.datetime.now().strftime("%H:%M")} - Finished")
        if f"RLM{order}-Tukey" in models:
            print(f"{datetime.datetime.now().strftime("%H:%M")} - Starting RLM-Tukey for order {order}")
            models_summaries_df = pd.concat([models_summaries_df, mod_RLM(x_order, y, "Tukey", order)], ignore_index=True)
            print(f"{datetime.datetime.now().strftime("%H:%M")} - Finished")
    
    if CUBIC_SPLINE_ID in models:
        for col in x.columns:
            print(f"{datetime.datetime.now().strftime("%H:%M")} - Starting Cubic Spline")
            models_summaries_df = pd.concat([models_summaries_df, mod_cubic_spline_1D(x[[col]], y)], ignore_index=True)
            print(f"{datetime.datetime.now().strftime("%H:%M")} - Finished")
    
    if SATURATION_ID in models:
        print(f"{datetime.datetime.now().strftime("%H:%M")} - Starting Saturation")
        models_summaries_df = pd.concat([models_summaries_df, mod_saturation(x, y)], ignore_index=True)
        print(f"{datetime.datetime.now().strftime("%H:%M")} - Finished")
    
    if NEG_EXP_ID in models:
        print(f"{datetime.datetime.now().strftime("%H:%M")} - Starting Negative Exponential")
        models_summaries_df = pd.concat([models_summaries_df, mod_neg_exp(x, y)], ignore_index=True)
        print(f"{datetime.datetime.now().strftime("%H:%M")} - Finished")
       
    return models_summaries_df

## Lets Go

In [294]:
df = load_file(Path("files/temp/results_fifa_ranking.csv"))

### Home Score

In [295]:
x, y, x_predict = get_data(df, "home_score")
display(x, y, x_predict)

['const',
 'decade',
 'home_team_strength',
 'away_team_strength',
 'home_team_form_goals',
 'home_team_form_points',
 'away_team_form_goals',
 'away_team_form_points',
 'home_advantage',
 'home_team_goals_letzte_Begegnung',
 'home_team_goals_vorletzte_Begegnung',
 'home_team_goals_vorvorletzte_Begegnung',
 'away_team_goals_letzte_Begegnung',
 'away_team_goals_vorletzte_Begegnung',
 'away_team_goals_vorvorletzte_Begegnung',
 'home_team_geschossen_letztes_Spiel',
 'home_team_kassiert_letztes_Spiel',
 'home_team_geschossen_vorletztes_Spiel',
 'home_team_kassiert_vorletztes_Spiel',
 'home_team_geschossen_vorvorletztes_Spiel',
 'home_team_kassiert_vorvorletztes_Spiel',
 'away_team_geschossen_letztes_Spiel',
 'away_team_kassiert_letztes_Spiel',
 'away_team_geschossen_vorletztes_Spiel',
 'away_team_kassiert_vorletztes_Spiel',
 'away_team_geschossen_vorvorletztes_Spiel',
 'away_team_kassiert_vorvorletztes_Spiel',
 'home_score',
 'home_team_Afghanistan',
 'home_team_Albania',
 'home_team_Alger

Unnamed: 0,const,decade,home_team_strength,away_team_strength,home_team_form_goals,home_team_form_points,away_team_form_goals,away_team_form_points,home_advantage,home_team_goals_letzte_Begegnung,...,away_team_Uruguay,away_team_Uzbekistan,away_team_Vanuatu,away_team_Venezuela,away_team_Vietnam,away_team_Wales,away_team_Yemen,away_team_Yugoslavia,away_team_Zambia,away_team_Zimbabwe
3,1.0,1870,1794.90,1497.46,1.0,4.0,0.8,4.0,1,1.0,...,0,0,0,0,0,0,0,0,0,0
4,1.0,1870,1497.46,1794.90,1.2,5.0,1.4,5.0,1,2.0,...,0,0,0,0,0,0,0,0,0,0
6,1.0,1870,1794.90,1497.46,1.4,5.0,2.6,10.0,1,0.0,...,0,0,0,0,0,0,0,0,0,0
8,1.0,1870,1497.46,1794.90,2.8,13.0,1.6,4.0,1,3.0,...,0,0,0,0,0,0,0,0,0,0
11,1.0,1870,1794.90,1497.46,1.4,4.0,5.0,15.0,1,2.0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
47069,1.0,2020,1399.74,1616.41,1.0,5.0,1.0,4.0,1,0.0,...,0,0,0,0,0,0,0,0,0,0
47070,1.0,2020,1497.46,1341.05,1.2,2.0,1.2,7.0,1,1.0,...,0,0,0,0,0,0,0,0,0,0
47071,1.0,2020,1624.73,1225.68,2.4,13.0,1.0,3.0,0,1.0,...,0,0,0,0,0,0,0,0,0,0
47073,1.0,2020,1727.50,1788.65,1.8,12.0,0.6,4.0,1,0.0,...,0,0,0,0,0,0,0,0,0,0


3        2.0
4        3.0
6        1.0
8        7.0
11       5.0
        ... 
47069    0.0
47070    0.0
47071    1.0
47073    3.0
47074    2.0
Name: home_score, Length: 25525, dtype: float64

Unnamed: 0,const,decade,home_team,away_team,home_team_strength,away_team_strength,home_team_form_goals,home_team_form_points,away_team_form_goals,away_team_form_points,...,home_team_geschossen_vorletztes_Spiel,home_team_kassiert_vorletztes_Spiel,home_team_geschossen_vorvorletztes_Spiel,home_team_kassiert_vorvorletztes_Spiel,away_team_geschossen_letztes_Spiel,away_team_kassiert_letztes_Spiel,away_team_geschossen_vorletztes_Spiel,away_team_kassiert_vorletztes_Spiel,away_team_geschossen_vorvorletztes_Spiel,away_team_kassiert_vorvorletztes_Spiel
47075,1.0,2020,Germany,Scotland,1644.21,1497.46,1.6,7.0,1.2,2.0,...,2.0,0.0,0.0,2.0,0.0,1.0,0.0,4.0,3.0,3.0
47076,1.0,2020,Hungary,Switzerland,1532.2,1616.41,2.0,11.0,0.6,6.0,...,1.0,0.0,3.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0
47077,1.0,2020,Spain,Croatia,1727.5,1721.07,2.0,10.0,1.6,10.0,...,0.0,1.0,3.0,1.0,4.0,2.0,0.0,0.0,1.0,0.0
47078,1.0,2020,Italy,Albania,1724.6,1375.1,2.0,10.0,0.6,5.0,...,2.0,1.0,0.0,0.0,0.0,1.0,0.0,3.0,0.0,0.0
47079,1.0,2020,Slovenia,Denmark,1427.84,1602.72,1.6,10.0,1.2,10.0,...,2.0,2.0,1.0,0.0,2.0,0.0,0.0,0.0,0.0,2.0
47081,1.0,2020,Poland,Netherlands,1531.49,1742.29,1.8,9.0,2.6,12.0,...,5.0,1.0,2.0,0.0,1.0,2.0,4.0,0.0,6.0,0.0
47082,1.0,2020,Austria,France,1554.86,1840.59,2.6,15.0,4.6,10.0,...,2.0,0.0,2.0,0.0,3.0,2.0,0.0,2.0,2.0,2.0
47083,1.0,2020,Romania,Ukraine,1468.17,1568.86,2.0,10.0,1.8,13.0,...,1.0,1.0,1.0,0.0,2.0,1.0,2.0,1.0,0.0,0.0
47084,1.0,2020,Belgium,Slovakia,1795.23,1461.55,1.8,9.0,1.6,10.0,...,0.0,0.0,5.0,0.0,1.0,1.0,0.0,2.0,2.0,1.0
47085,1.0,2020,Turkey,Georgia,1495.94,1333.76,1.8,7.0,1.8,8.0,...,0.0,1.0,1.0,1.0,0.0,0.0,2.0,0.0,1.0,3.0


In [296]:
display(x.isna().sum(), y.isna().sum() )

const                   0
decade                  0
home_team_strength      0
away_team_strength      0
home_team_form_goals    0
                       ..
away_team_Wales         0
away_team_Yemen         0
away_team_Yugoslavia    0
away_team_Zambia        0
away_team_Zimbabwe      0
Length: 417, dtype: int64

0

In [297]:
display(x,y)

Unnamed: 0,const,decade,home_team_strength,away_team_strength,home_team_form_goals,home_team_form_points,away_team_form_goals,away_team_form_points,home_advantage,home_team_goals_letzte_Begegnung,...,away_team_Uruguay,away_team_Uzbekistan,away_team_Vanuatu,away_team_Venezuela,away_team_Vietnam,away_team_Wales,away_team_Yemen,away_team_Yugoslavia,away_team_Zambia,away_team_Zimbabwe
3,1.0,1870,1794.90,1497.46,1.0,4.0,0.8,4.0,1,1.0,...,0,0,0,0,0,0,0,0,0,0
4,1.0,1870,1497.46,1794.90,1.2,5.0,1.4,5.0,1,2.0,...,0,0,0,0,0,0,0,0,0,0
6,1.0,1870,1794.90,1497.46,1.4,5.0,2.6,10.0,1,0.0,...,0,0,0,0,0,0,0,0,0,0
8,1.0,1870,1497.46,1794.90,2.8,13.0,1.6,4.0,1,3.0,...,0,0,0,0,0,0,0,0,0,0
11,1.0,1870,1794.90,1497.46,1.4,4.0,5.0,15.0,1,2.0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
47069,1.0,2020,1399.74,1616.41,1.0,5.0,1.0,4.0,1,0.0,...,0,0,0,0,0,0,0,0,0,0
47070,1.0,2020,1497.46,1341.05,1.2,2.0,1.2,7.0,1,1.0,...,0,0,0,0,0,0,0,0,0,0
47071,1.0,2020,1624.73,1225.68,2.4,13.0,1.0,3.0,0,1.0,...,0,0,0,0,0,0,0,0,0,0
47073,1.0,2020,1727.50,1788.65,1.8,12.0,0.6,4.0,1,0.0,...,0,0,0,0,0,0,0,0,0,0


3        2.0
4        3.0
6        1.0
8        7.0
11       5.0
        ... 
47069    0.0
47070    0.0
47071    1.0
47073    3.0
47074    2.0
Name: home_score, Length: 25525, dtype: float64

In [298]:
x_predict

Unnamed: 0,const,decade,home_team,away_team,home_team_strength,away_team_strength,home_team_form_goals,home_team_form_points,away_team_form_goals,away_team_form_points,...,home_team_geschossen_vorletztes_Spiel,home_team_kassiert_vorletztes_Spiel,home_team_geschossen_vorvorletztes_Spiel,home_team_kassiert_vorvorletztes_Spiel,away_team_geschossen_letztes_Spiel,away_team_kassiert_letztes_Spiel,away_team_geschossen_vorletztes_Spiel,away_team_kassiert_vorletztes_Spiel,away_team_geschossen_vorvorletztes_Spiel,away_team_kassiert_vorvorletztes_Spiel
47075,1.0,2020,Germany,Scotland,1644.21,1497.46,1.6,7.0,1.2,2.0,...,2.0,0.0,0.0,2.0,0.0,1.0,0.0,4.0,3.0,3.0
47076,1.0,2020,Hungary,Switzerland,1532.2,1616.41,2.0,11.0,0.6,6.0,...,1.0,0.0,3.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0
47077,1.0,2020,Spain,Croatia,1727.5,1721.07,2.0,10.0,1.6,10.0,...,0.0,1.0,3.0,1.0,4.0,2.0,0.0,0.0,1.0,0.0
47078,1.0,2020,Italy,Albania,1724.6,1375.1,2.0,10.0,0.6,5.0,...,2.0,1.0,0.0,0.0,0.0,1.0,0.0,3.0,0.0,0.0
47079,1.0,2020,Slovenia,Denmark,1427.84,1602.72,1.6,10.0,1.2,10.0,...,2.0,2.0,1.0,0.0,2.0,0.0,0.0,0.0,0.0,2.0
47081,1.0,2020,Poland,Netherlands,1531.49,1742.29,1.8,9.0,2.6,12.0,...,5.0,1.0,2.0,0.0,1.0,2.0,4.0,0.0,6.0,0.0
47082,1.0,2020,Austria,France,1554.86,1840.59,2.6,15.0,4.6,10.0,...,2.0,0.0,2.0,0.0,3.0,2.0,0.0,2.0,2.0,2.0
47083,1.0,2020,Romania,Ukraine,1468.17,1568.86,2.0,10.0,1.8,13.0,...,1.0,1.0,1.0,0.0,2.0,1.0,2.0,1.0,0.0,0.0
47084,1.0,2020,Belgium,Slovakia,1795.23,1461.55,1.8,9.0,1.6,10.0,...,0.0,0.0,5.0,0.0,1.0,1.0,0.0,2.0,2.0,1.0
47085,1.0,2020,Turkey,Georgia,1495.94,1333.76,1.8,7.0,1.8,8.0,...,0.0,1.0,1.0,1.0,0.0,0.0,2.0,0.0,1.0,3.0


In [299]:
df_summary = run_regression_models(x, y, models)

11:54 - Starting OLS for order 1


11:54 - Finished
11:54 - Starting WLS for order 1
11:54 - Finished
11:54 - Starting OLS for order 2
11:54 - Finished
11:54 - Starting WLS for order 2
11:55 - Finished
11:55 - Starting OLS for order 3
11:55 - Finished
11:55 - Starting WLS for order 3
11:57 - Finished


In [300]:
df_summary.sort_values(by="r_squared", ascending=False)

Unnamed: 0,model_name,success,message,r_squared,r_squared_adj,cond_nr,jb,jb_prob,skew,kurtosis
5,WLS3-I_6,True,,0.427139,0.416695,8.503776e+21,16785.835603,0.0,1.132535,6.263804
3,WLS2-I_4,True,,0.42433,0.414349,2.260984e+21,16832.455393,0.0,1.134762,6.267424
1,WLS1-I_2,True,,0.418272,0.408728,4.526694e+19,17274.016881,0.0,1.14945,6.310142
4,OLS3-I_5,True,,0.253152,0.239536,1.803738e+22,14191.461444,0.0,1.065926,5.966276
2,OLS2-I_3,True,,0.250994,0.238008,1.393902e+21,14280.591887,0.0,1.068735,5.976343
0,OLS1-I_1,True,,0.24766,0.235317,2.105549e+19,14490.988343,0.0,1.076346,5.998522


In [301]:
x.columns, x_predict.columns

(Index(['const', 'decade', 'home_team_strength', 'away_team_strength',
        'home_team_form_goals', 'home_team_form_points', 'away_team_form_goals',
        'away_team_form_points', 'home_advantage',
        'home_team_goals_letzte_Begegnung',
        ...
        'away_team_Uruguay', 'away_team_Uzbekistan', 'away_team_Vanuatu',
        'away_team_Venezuela', 'away_team_Vietnam', 'away_team_Wales',
        'away_team_Yemen', 'away_team_Yugoslavia', 'away_team_Zambia',
        'away_team_Zimbabwe'],
       dtype='object', length=417),
 Index(['const', 'decade', 'home_team', 'away_team', 'home_team_strength',
        'away_team_strength', 'home_team_form_goals', 'home_team_form_points',
        'away_team_form_goals', 'away_team_form_points', 'home_advantage',
        'home_team_goals_letzte_Begegnung',
        'home_team_goals_vorletzte_Begegnung',
        'home_team_goals_vorvorletzte_Begegnung',
        'away_team_goals_letzte_Begegnung',
        'away_team_goals_vorletzte_Begegnung

In [302]:
result = saved_models["WLS1-I_2"][1]

x_predict_dummy = x_predict.copy()

dummy_cols = x_predict_dummy.select_dtypes(include=["object"]).columns
x_predict_dummy = pd.get_dummies(x_predict_dummy, columns=dummy_cols)

for col in x.columns:
    if col not in x_predict_dummy.columns:
        x_predict_dummy[col] = 0

x_pred = create_polynomial_df(x_predict_dummy, 1) # Ändern je nach Modell und Order

y_pred = result.predict(x_pred)

  x_predict_dummy[col] = 0
  x_predict_dummy[col] = 0
  x_predict_dummy[col] = 0
  x_predict_dummy[col] = 0
  x_predict_dummy[col] = 0
  x_predict_dummy[col] = 0
  x_predict_dummy[col] = 0
  x_predict_dummy[col] = 0
  x_predict_dummy[col] = 0
  x_predict_dummy[col] = 0
  x_predict_dummy[col] = 0
  x_predict_dummy[col] = 0
  x_predict_dummy[col] = 0
  x_predict_dummy[col] = 0
  x_predict_dummy[col] = 0
  x_predict_dummy[col] = 0
  x_predict_dummy[col] = 0
  x_predict_dummy[col] = 0
  x_predict_dummy[col] = 0
  x_predict_dummy[col] = 0
  x_predict_dummy[col] = 0
  x_predict_dummy[col] = 0
  x_predict_dummy[col] = 0
  x_predict_dummy[col] = 0
  x_predict_dummy[col] = 0
  x_predict_dummy[col] = 0
  x_predict_dummy[col] = 0
  x_predict_dummy[col] = 0
  x_predict_dummy[col] = 0
  x_predict_dummy[col] = 0
  x_predict_dummy[col] = 0
  x_predict_dummy[col] = 0
  x_predict_dummy[col] = 0
  x_predict_dummy[col] = 0
  x_predict_dummy[col] = 0
  x_predict_dummy[col] = 0
  x_predict_dummy[col] = 0
 

In [303]:
x_pred = x_pred.drop(columns=[col for col in x_pred.columns if "cubed" in col or "squared" in col])
x_pred

Unnamed: 0,const,const.1,decade,home_team_strength,away_team_strength,home_team_form_goals,home_team_form_points,away_team_form_goals,away_team_form_points,home_advantage,...,away_team_Uruguay,away_team_Uzbekistan,away_team_Vanuatu,away_team_Venezuela,away_team_Vietnam,away_team_Wales,away_team_Yemen,away_team_Yugoslavia,away_team_Zambia,away_team_Zimbabwe
47075,1,1.0,2020,1644.21,1497.46,1.6,7.0,1.2,2.0,1,...,0,0,0,0,0,0,0,0,0,0
47076,1,1.0,2020,1532.2,1616.41,2.0,11.0,0.6,6.0,0,...,0,0,0,0,0,0,0,0,0,0
47077,1,1.0,2020,1727.5,1721.07,2.0,10.0,1.6,10.0,0,...,0,0,0,0,0,0,0,0,0,0
47078,1,1.0,2020,1724.6,1375.1,2.0,10.0,0.6,5.0,0,...,0,0,0,0,0,0,0,0,0,0
47079,1,1.0,2020,1427.84,1602.72,1.6,10.0,1.2,10.0,0,...,0,0,0,0,0,0,0,0,0,0
47081,1,1.0,2020,1531.49,1742.29,1.8,9.0,2.6,12.0,0,...,0,0,0,0,0,0,0,0,0,0
47082,1,1.0,2020,1554.86,1840.59,2.6,15.0,4.6,10.0,0,...,0,0,0,0,0,0,0,0,0,0
47083,1,1.0,2020,1468.17,1568.86,2.0,10.0,1.8,13.0,0,...,0,0,0,0,0,0,0,0,0,0
47084,1,1.0,2020,1795.23,1461.55,1.8,9.0,1.6,10.0,0,...,0,0,0,0,0,0,0,0,0,0
47085,1,1.0,2020,1495.94,1333.76,1.8,7.0,1.8,8.0,0,...,0,0,0,0,0,0,0,0,0,0


In [304]:
x_pred.columns

Index(['const', 'const', 'decade', 'home_team_strength', 'away_team_strength',
       'home_team_form_goals', 'home_team_form_points', 'away_team_form_goals',
       'away_team_form_points', 'home_advantage',
       ...
       'away_team_Uruguay', 'away_team_Uzbekistan', 'away_team_Vanuatu',
       'away_team_Venezuela', 'away_team_Vietnam', 'away_team_Wales',
       'away_team_Yemen', 'away_team_Yugoslavia', 'away_team_Zambia',
       'away_team_Zimbabwe'],
      dtype='object', length=418)

In [305]:
# Identifizieren der Dummy-kodierten Spalten
home_team_cols = [col for col in x_pred.columns if col.startswith('home_team_')]
away_team_cols = [col for col in x_pred.columns if col.startswith('away_team_')]

display(home_team_cols)

home_team_cols.remove('home_team_strength')
away_team_cols.remove('away_team_strength')
home_team_cols.remove("home_team_form_goals")
away_team_cols.remove("away_team_form_goals")
home_team_cols.remove("home_team_form_points")
away_team_cols.remove("away_team_form_points")
home_team_cols.remove("home_team_goals_letzte_Begegnung")
away_team_cols.remove("away_team_goals_letzte_Begegnung")
home_team_cols.remove("home_team_goals_vorletzte_Begegnung")
away_team_cols.remove("away_team_goals_vorletzte_Begegnung")
home_team_cols.remove("home_team_goals_vorvorletzte_Begegnung")
away_team_cols.remove("away_team_goals_vorvorletzte_Begegnung")
home_team_cols.remove("home_team_geschossen_letztes_Spiel")
away_team_cols.remove("away_team_geschossen_letztes_Spiel")
home_team_cols.remove("home_team_kassiert_letztes_Spiel")
away_team_cols.remove("away_team_kassiert_letztes_Spiel")
home_team_cols.remove("home_team_geschossen_vorletztes_Spiel")
away_team_cols.remove("away_team_geschossen_vorletztes_Spiel")
home_team_cols.remove("home_team_kassiert_vorletztes_Spiel")
away_team_cols.remove("away_team_kassiert_vorletztes_Spiel")
home_team_cols.remove("home_team_geschossen_vorvorletztes_Spiel")
away_team_cols.remove("away_team_geschossen_vorvorletztes_Spiel")
home_team_cols.remove("home_team_kassiert_vorvorletztes_Spiel")
away_team_cols.remove("away_team_kassiert_vorvorletztes_Spiel")

display(x_pred[home_team_cols])

# Zusammenführen der Dummy-Spalten zu einer einzigen Spalte
x_pred['home_team'] = x_pred[home_team_cols].idxmax(axis=1).str.replace('home_team_', '')
x_pred['away_team'] = x_pred[away_team_cols].idxmax(axis=1).str.replace('away_team_', '')
x_pred

['home_team_strength',
 'home_team_form_goals',
 'home_team_form_points',
 'home_team_goals_letzte_Begegnung',
 'home_team_goals_vorletzte_Begegnung',
 'home_team_goals_vorvorletzte_Begegnung',
 'home_team_geschossen_letztes_Spiel',
 'home_team_kassiert_letztes_Spiel',
 'home_team_geschossen_vorletztes_Spiel',
 'home_team_kassiert_vorletztes_Spiel',
 'home_team_geschossen_vorvorletztes_Spiel',
 'home_team_kassiert_vorvorletztes_Spiel',
 'home_team_Austria',
 'home_team_Belgium',
 'home_team_Germany',
 'home_team_Hungary',
 'home_team_Italy',
 'home_team_Poland',
 'home_team_Romania',
 'home_team_Slovenia',
 'home_team_Spain',
 'home_team_Turkey',
 'home_team_Afghanistan',
 'home_team_Albania',
 'home_team_Algeria',
 'home_team_American Samoa',
 'home_team_Andorra',
 'home_team_Angola',
 'home_team_Anguilla',
 'home_team_Antigua and Barbuda',
 'home_team_Argentina',
 'home_team_Armenia',
 'home_team_Aruba',
 'home_team_Australia',
 'home_team_Azerbaijan',
 'home_team_Bahamas',
 'home_te

Unnamed: 0,home_team_Austria,home_team_Belgium,home_team_Germany,home_team_Hungary,home_team_Italy,home_team_Poland,home_team_Romania,home_team_Slovenia,home_team_Spain,home_team_Turkey,...,home_team_Uruguay,home_team_Uzbekistan,home_team_Vanuatu,home_team_Venezuela,home_team_Vietnam,home_team_Wales,home_team_Yemen,home_team_Yugoslavia,home_team_Zambia,home_team_Zimbabwe
47075,False,False,True,False,False,False,False,False,False,False,...,0,0,0,0,0,0,0,0,0,0
47076,False,False,False,True,False,False,False,False,False,False,...,0,0,0,0,0,0,0,0,0,0
47077,False,False,False,False,False,False,False,False,True,False,...,0,0,0,0,0,0,0,0,0,0
47078,False,False,False,False,True,False,False,False,False,False,...,0,0,0,0,0,0,0,0,0,0
47079,False,False,False,False,False,False,False,True,False,False,...,0,0,0,0,0,0,0,0,0,0
47081,False,False,False,False,False,True,False,False,False,False,...,0,0,0,0,0,0,0,0,0,0
47082,True,False,False,False,False,False,False,False,False,False,...,0,0,0,0,0,0,0,0,0,0
47083,False,False,False,False,False,False,True,False,False,False,...,0,0,0,0,0,0,0,0,0,0
47084,False,True,False,False,False,False,False,False,False,False,...,0,0,0,0,0,0,0,0,0,0
47085,False,False,False,False,False,False,False,False,False,True,...,0,0,0,0,0,0,0,0,0,0


Unnamed: 0,const,const.1,decade,home_team_strength,away_team_strength,home_team_form_goals,home_team_form_points,away_team_form_goals,away_team_form_points,home_advantage,...,away_team_Vanuatu,away_team_Venezuela,away_team_Vietnam,away_team_Wales,away_team_Yemen,away_team_Yugoslavia,away_team_Zambia,away_team_Zimbabwe,home_team,away_team
47075,1,1.0,2020,1644.21,1497.46,1.6,7.0,1.2,2.0,1,...,0,0,0,0,0,0,0,0,Germany,Scotland
47076,1,1.0,2020,1532.2,1616.41,2.0,11.0,0.6,6.0,0,...,0,0,0,0,0,0,0,0,Hungary,Switzerland
47077,1,1.0,2020,1727.5,1721.07,2.0,10.0,1.6,10.0,0,...,0,0,0,0,0,0,0,0,Spain,Croatia
47078,1,1.0,2020,1724.6,1375.1,2.0,10.0,0.6,5.0,0,...,0,0,0,0,0,0,0,0,Italy,Albania
47079,1,1.0,2020,1427.84,1602.72,1.6,10.0,1.2,10.0,0,...,0,0,0,0,0,0,0,0,Slovenia,Denmark
47081,1,1.0,2020,1531.49,1742.29,1.8,9.0,2.6,12.0,0,...,0,0,0,0,0,0,0,0,Poland,Netherlands
47082,1,1.0,2020,1554.86,1840.59,2.6,15.0,4.6,10.0,0,...,0,0,0,0,0,0,0,0,Austria,France
47083,1,1.0,2020,1468.17,1568.86,2.0,10.0,1.8,13.0,0,...,0,0,0,0,0,0,0,0,Romania,Ukraine
47084,1,1.0,2020,1795.23,1461.55,1.8,9.0,1.6,10.0,0,...,0,0,0,0,0,0,0,0,Belgium,Slovakia
47085,1,1.0,2020,1495.94,1333.76,1.8,7.0,1.8,8.0,0,...,0,0,0,0,0,0,0,0,Turkey,Georgia


In [306]:
x_pred["home_score"] = y_pred.apply(lambda x: round(math.pow(abs(x), 1), 2)) # Wurzel anpassen für versch. Modelle
x_pred

Unnamed: 0,const,const.1,decade,home_team_strength,away_team_strength,home_team_form_goals,home_team_form_points,away_team_form_goals,away_team_form_points,home_advantage,...,away_team_Venezuela,away_team_Vietnam,away_team_Wales,away_team_Yemen,away_team_Yugoslavia,away_team_Zambia,away_team_Zimbabwe,home_team,away_team,home_score
47075,1,1.0,2020,1644.21,1497.46,1.6,7.0,1.2,2.0,1,...,0,0,0,0,0,0,0,Germany,Scotland,1.97
47076,1,1.0,2020,1532.2,1616.41,2.0,11.0,0.6,6.0,0,...,0,0,0,0,0,0,0,Hungary,Switzerland,0.17
47077,1,1.0,2020,1727.5,1721.07,2.0,10.0,1.6,10.0,0,...,0,0,0,0,0,0,0,Spain,Croatia,0.75
47078,1,1.0,2020,1724.6,1375.1,2.0,10.0,0.6,5.0,0,...,0,0,0,0,0,0,0,Italy,Albania,3.05
47079,1,1.0,2020,1427.84,1602.72,1.6,10.0,1.2,10.0,0,...,0,0,0,0,0,0,0,Slovenia,Denmark,0.6
47081,1,1.0,2020,1531.49,1742.29,1.8,9.0,2.6,12.0,0,...,0,0,0,0,0,0,0,Poland,Netherlands,0.82
47082,1,1.0,2020,1554.86,1840.59,2.6,15.0,4.6,10.0,0,...,0,0,0,0,0,0,0,Austria,France,1.02
47083,1,1.0,2020,1468.17,1568.86,2.0,10.0,1.8,13.0,0,...,0,0,0,0,0,0,0,Romania,Ukraine,0.51
47084,1,1.0,2020,1795.23,1461.55,1.8,9.0,1.6,10.0,0,...,0,0,0,0,0,0,0,Belgium,Slovakia,2.36
47085,1,1.0,2020,1495.94,1333.76,1.8,7.0,1.8,8.0,0,...,0,0,0,0,0,0,0,Turkey,Georgia,2.31


In [307]:
df_summary.to_excel("model_summary_home_score.xlsx")

### Away Score

In [308]:
pred_home_score = x_pred.copy()

In [309]:
x_away, y_away, x_away_predict = get_data(df, "away_score")
display(x_away, y_away, x_away_predict)

['const',
 'decade',
 'home_team_strength',
 'away_team_strength',
 'home_team_form_goals',
 'home_team_form_points',
 'away_team_form_goals',
 'away_team_form_points',
 'home_advantage',
 'home_team_goals_letzte_Begegnung',
 'home_team_goals_vorletzte_Begegnung',
 'home_team_goals_vorvorletzte_Begegnung',
 'away_team_goals_letzte_Begegnung',
 'away_team_goals_vorletzte_Begegnung',
 'away_team_goals_vorvorletzte_Begegnung',
 'home_team_geschossen_letztes_Spiel',
 'home_team_kassiert_letztes_Spiel',
 'home_team_geschossen_vorletztes_Spiel',
 'home_team_kassiert_vorletztes_Spiel',
 'home_team_geschossen_vorvorletztes_Spiel',
 'home_team_kassiert_vorvorletztes_Spiel',
 'away_team_geschossen_letztes_Spiel',
 'away_team_kassiert_letztes_Spiel',
 'away_team_geschossen_vorletztes_Spiel',
 'away_team_kassiert_vorletztes_Spiel',
 'away_team_geschossen_vorvorletztes_Spiel',
 'away_team_kassiert_vorvorletztes_Spiel',
 'away_score',
 'home_team_Afghanistan',
 'home_team_Albania',
 'home_team_Alger

Unnamed: 0,const,decade,home_team_strength,away_team_strength,home_team_form_goals,home_team_form_points,away_team_form_goals,away_team_form_points,home_advantage,home_team_goals_letzte_Begegnung,...,away_team_Uruguay,away_team_Uzbekistan,away_team_Vanuatu,away_team_Venezuela,away_team_Vietnam,away_team_Wales,away_team_Yemen,away_team_Yugoslavia,away_team_Zambia,away_team_Zimbabwe
3,1.0,1870,1794.90,1497.46,1.0,4.0,0.8,4.0,1,1.0,...,0,0,0,0,0,0,0,0,0,0
4,1.0,1870,1497.46,1794.90,1.2,5.0,1.4,5.0,1,2.0,...,0,0,0,0,0,0,0,0,0,0
6,1.0,1870,1794.90,1497.46,1.4,5.0,2.6,10.0,1,0.0,...,0,0,0,0,0,0,0,0,0,0
8,1.0,1870,1497.46,1794.90,2.8,13.0,1.6,4.0,1,3.0,...,0,0,0,0,0,0,0,0,0,0
11,1.0,1870,1794.90,1497.46,1.4,4.0,5.0,15.0,1,2.0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
47069,1.0,2020,1399.74,1616.41,1.0,5.0,1.0,4.0,1,0.0,...,0,0,0,0,0,0,0,0,0,0
47070,1.0,2020,1497.46,1341.05,1.2,2.0,1.2,7.0,1,1.0,...,0,0,0,0,0,0,0,0,0,0
47071,1.0,2020,1624.73,1225.68,2.4,13.0,1.0,3.0,0,1.0,...,0,0,0,0,0,0,0,0,0,0
47073,1.0,2020,1727.50,1788.65,1.8,12.0,0.6,4.0,1,0.0,...,0,0,0,0,0,0,0,0,0,0


3        2.0
4        0.0
6        3.0
8        2.0
11       4.0
        ... 
47069    1.0
47070    1.0
47071    0.0
47073    3.0
47074    1.0
Name: away_score, Length: 25525, dtype: float64

Unnamed: 0,const,decade,home_team,away_team,home_team_strength,away_team_strength,home_team_form_goals,home_team_form_points,away_team_form_goals,away_team_form_points,...,home_team_geschossen_vorletztes_Spiel,home_team_kassiert_vorletztes_Spiel,home_team_geschossen_vorvorletztes_Spiel,home_team_kassiert_vorvorletztes_Spiel,away_team_geschossen_letztes_Spiel,away_team_kassiert_letztes_Spiel,away_team_geschossen_vorletztes_Spiel,away_team_kassiert_vorletztes_Spiel,away_team_geschossen_vorvorletztes_Spiel,away_team_kassiert_vorvorletztes_Spiel
47075,1.0,2020,Germany,Scotland,1644.21,1497.46,1.6,7.0,1.2,2.0,...,2.0,0.0,0.0,2.0,0.0,1.0,0.0,4.0,3.0,3.0
47076,1.0,2020,Hungary,Switzerland,1532.2,1616.41,2.0,11.0,0.6,6.0,...,1.0,0.0,3.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0
47077,1.0,2020,Spain,Croatia,1727.5,1721.07,2.0,10.0,1.6,10.0,...,0.0,1.0,3.0,1.0,4.0,2.0,0.0,0.0,1.0,0.0
47078,1.0,2020,Italy,Albania,1724.6,1375.1,2.0,10.0,0.6,5.0,...,2.0,1.0,0.0,0.0,0.0,1.0,0.0,3.0,0.0,0.0
47079,1.0,2020,Slovenia,Denmark,1427.84,1602.72,1.6,10.0,1.2,10.0,...,2.0,2.0,1.0,0.0,2.0,0.0,0.0,0.0,0.0,2.0
47081,1.0,2020,Poland,Netherlands,1531.49,1742.29,1.8,9.0,2.6,12.0,...,5.0,1.0,2.0,0.0,1.0,2.0,4.0,0.0,6.0,0.0
47082,1.0,2020,Austria,France,1554.86,1840.59,2.6,15.0,4.6,10.0,...,2.0,0.0,2.0,0.0,3.0,2.0,0.0,2.0,2.0,2.0
47083,1.0,2020,Romania,Ukraine,1468.17,1568.86,2.0,10.0,1.8,13.0,...,1.0,1.0,1.0,0.0,2.0,1.0,2.0,1.0,0.0,0.0
47084,1.0,2020,Belgium,Slovakia,1795.23,1461.55,1.8,9.0,1.6,10.0,...,0.0,0.0,5.0,0.0,1.0,1.0,0.0,2.0,2.0,1.0
47085,1.0,2020,Turkey,Georgia,1495.94,1333.76,1.8,7.0,1.8,8.0,...,0.0,1.0,1.0,1.0,0.0,0.0,2.0,0.0,1.0,3.0


In [310]:
display(x_away.isna().sum(), y_away.isna().sum() )

const                   0
decade                  0
home_team_strength      0
away_team_strength      0
home_team_form_goals    0
                       ..
away_team_Wales         0
away_team_Yemen         0
away_team_Yugoslavia    0
away_team_Zambia        0
away_team_Zimbabwe      0
Length: 417, dtype: int64

0

In [311]:
display(x_away, y_away)

Unnamed: 0,const,decade,home_team_strength,away_team_strength,home_team_form_goals,home_team_form_points,away_team_form_goals,away_team_form_points,home_advantage,home_team_goals_letzte_Begegnung,...,away_team_Uruguay,away_team_Uzbekistan,away_team_Vanuatu,away_team_Venezuela,away_team_Vietnam,away_team_Wales,away_team_Yemen,away_team_Yugoslavia,away_team_Zambia,away_team_Zimbabwe
3,1.0,1870,1794.90,1497.46,1.0,4.0,0.8,4.0,1,1.0,...,0,0,0,0,0,0,0,0,0,0
4,1.0,1870,1497.46,1794.90,1.2,5.0,1.4,5.0,1,2.0,...,0,0,0,0,0,0,0,0,0,0
6,1.0,1870,1794.90,1497.46,1.4,5.0,2.6,10.0,1,0.0,...,0,0,0,0,0,0,0,0,0,0
8,1.0,1870,1497.46,1794.90,2.8,13.0,1.6,4.0,1,3.0,...,0,0,0,0,0,0,0,0,0,0
11,1.0,1870,1794.90,1497.46,1.4,4.0,5.0,15.0,1,2.0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
47069,1.0,2020,1399.74,1616.41,1.0,5.0,1.0,4.0,1,0.0,...,0,0,0,0,0,0,0,0,0,0
47070,1.0,2020,1497.46,1341.05,1.2,2.0,1.2,7.0,1,1.0,...,0,0,0,0,0,0,0,0,0,0
47071,1.0,2020,1624.73,1225.68,2.4,13.0,1.0,3.0,0,1.0,...,0,0,0,0,0,0,0,0,0,0
47073,1.0,2020,1727.50,1788.65,1.8,12.0,0.6,4.0,1,0.0,...,0,0,0,0,0,0,0,0,0,0


3        2.0
4        0.0
6        3.0
8        2.0
11       4.0
        ... 
47069    1.0
47070    1.0
47071    0.0
47073    3.0
47074    1.0
Name: away_score, Length: 25525, dtype: float64

In [312]:
x_away_predict

Unnamed: 0,const,decade,home_team,away_team,home_team_strength,away_team_strength,home_team_form_goals,home_team_form_points,away_team_form_goals,away_team_form_points,...,home_team_geschossen_vorletztes_Spiel,home_team_kassiert_vorletztes_Spiel,home_team_geschossen_vorvorletztes_Spiel,home_team_kassiert_vorvorletztes_Spiel,away_team_geschossen_letztes_Spiel,away_team_kassiert_letztes_Spiel,away_team_geschossen_vorletztes_Spiel,away_team_kassiert_vorletztes_Spiel,away_team_geschossen_vorvorletztes_Spiel,away_team_kassiert_vorvorletztes_Spiel
47075,1.0,2020,Germany,Scotland,1644.21,1497.46,1.6,7.0,1.2,2.0,...,2.0,0.0,0.0,2.0,0.0,1.0,0.0,4.0,3.0,3.0
47076,1.0,2020,Hungary,Switzerland,1532.2,1616.41,2.0,11.0,0.6,6.0,...,1.0,0.0,3.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0
47077,1.0,2020,Spain,Croatia,1727.5,1721.07,2.0,10.0,1.6,10.0,...,0.0,1.0,3.0,1.0,4.0,2.0,0.0,0.0,1.0,0.0
47078,1.0,2020,Italy,Albania,1724.6,1375.1,2.0,10.0,0.6,5.0,...,2.0,1.0,0.0,0.0,0.0,1.0,0.0,3.0,0.0,0.0
47079,1.0,2020,Slovenia,Denmark,1427.84,1602.72,1.6,10.0,1.2,10.0,...,2.0,2.0,1.0,0.0,2.0,0.0,0.0,0.0,0.0,2.0
47081,1.0,2020,Poland,Netherlands,1531.49,1742.29,1.8,9.0,2.6,12.0,...,5.0,1.0,2.0,0.0,1.0,2.0,4.0,0.0,6.0,0.0
47082,1.0,2020,Austria,France,1554.86,1840.59,2.6,15.0,4.6,10.0,...,2.0,0.0,2.0,0.0,3.0,2.0,0.0,2.0,2.0,2.0
47083,1.0,2020,Romania,Ukraine,1468.17,1568.86,2.0,10.0,1.8,13.0,...,1.0,1.0,1.0,0.0,2.0,1.0,2.0,1.0,0.0,0.0
47084,1.0,2020,Belgium,Slovakia,1795.23,1461.55,1.8,9.0,1.6,10.0,...,0.0,0.0,5.0,0.0,1.0,1.0,0.0,2.0,2.0,1.0
47085,1.0,2020,Turkey,Georgia,1495.94,1333.76,1.8,7.0,1.8,8.0,...,0.0,1.0,1.0,1.0,0.0,0.0,2.0,0.0,1.0,3.0


In [313]:
df_away_summary = run_regression_models(x_away, y_away, models)

11:57 - Starting OLS for order 1


11:57 - Finished
11:57 - Starting WLS for order 1
11:57 - Finished
11:57 - Starting OLS for order 2
11:57 - Finished
11:57 - Starting WLS for order 2
11:58 - Finished
11:58 - Starting OLS for order 3
11:58 - Finished
11:58 - Starting WLS for order 3
12:00 - Finished


In [314]:
df_away_summary.sort_values(by="r_squared", ascending=False)

Unnamed: 0,model_name,success,message,r_squared,r_squared_adj,cond_nr,jb,jb_prob,skew,kurtosis
5,WLS3-I_12,True,,0.339986,0.327953,2.246251e+19,17223.105695,0.0,1.230828,6.18345
3,WLS2-I_10,True,,0.336658,0.325157,8.890963e+20,17781.714992,0.0,1.242088,6.247801
1,WLS1-I_8,True,,0.3302,0.319211,3.654872e+19,18495.257309,0.0,1.255147,6.329967
4,OLS3-I_11,True,,0.219163,0.204927,1.803738e+22,13811.267025,0.0,1.146525,5.779933
2,OLS2-I_9,True,,0.215991,0.202397,1.393902e+21,14298.162803,0.0,1.157176,5.843886
0,OLS1-I_7,True,,0.211866,0.198935,2.105549e+19,14601.371091,0.0,1.164862,5.881214


In [315]:
result_away = saved_models["WLS1-I_8"][1]

x_away_predict_dummy = x_away_predict.copy()

away_dummy_cols = x_away_predict_dummy.select_dtypes(include=["object"]).columns
x_away_predict_dummy = pd.get_dummies(x_away_predict_dummy, columns=away_dummy_cols)

for col in x_away.columns:
    if col not in x_away_predict_dummy.columns:
        x_away_predict_dummy[col] = 0

x_away_pred = create_polynomial_df(x_away_predict_dummy, 1)

y_away_pred = result_away.predict(x_away_pred)

  x_away_predict_dummy[col] = 0
  x_away_predict_dummy[col] = 0
  x_away_predict_dummy[col] = 0
  x_away_predict_dummy[col] = 0
  x_away_predict_dummy[col] = 0
  x_away_predict_dummy[col] = 0
  x_away_predict_dummy[col] = 0
  x_away_predict_dummy[col] = 0
  x_away_predict_dummy[col] = 0
  x_away_predict_dummy[col] = 0
  x_away_predict_dummy[col] = 0
  x_away_predict_dummy[col] = 0
  x_away_predict_dummy[col] = 0
  x_away_predict_dummy[col] = 0
  x_away_predict_dummy[col] = 0
  x_away_predict_dummy[col] = 0
  x_away_predict_dummy[col] = 0
  x_away_predict_dummy[col] = 0
  x_away_predict_dummy[col] = 0
  x_away_predict_dummy[col] = 0
  x_away_predict_dummy[col] = 0
  x_away_predict_dummy[col] = 0
  x_away_predict_dummy[col] = 0
  x_away_predict_dummy[col] = 0
  x_away_predict_dummy[col] = 0
  x_away_predict_dummy[col] = 0
  x_away_predict_dummy[col] = 0
  x_away_predict_dummy[col] = 0
  x_away_predict_dummy[col] = 0
  x_away_predict_dummy[col] = 0
  x_away_predict_dummy[col] = 0
  x_away

In [316]:
x_away_pred = x_away_pred.drop(columns=[col for col in x_away_pred.columns if "cubed" in col or "squared" in col])
x_away_pred

Unnamed: 0,const,const.1,decade,home_team_strength,away_team_strength,home_team_form_goals,home_team_form_points,away_team_form_goals,away_team_form_points,home_advantage,...,away_team_Uruguay,away_team_Uzbekistan,away_team_Vanuatu,away_team_Venezuela,away_team_Vietnam,away_team_Wales,away_team_Yemen,away_team_Yugoslavia,away_team_Zambia,away_team_Zimbabwe
47075,1,1.0,2020,1644.21,1497.46,1.6,7.0,1.2,2.0,1,...,0,0,0,0,0,0,0,0,0,0
47076,1,1.0,2020,1532.2,1616.41,2.0,11.0,0.6,6.0,0,...,0,0,0,0,0,0,0,0,0,0
47077,1,1.0,2020,1727.5,1721.07,2.0,10.0,1.6,10.0,0,...,0,0,0,0,0,0,0,0,0,0
47078,1,1.0,2020,1724.6,1375.1,2.0,10.0,0.6,5.0,0,...,0,0,0,0,0,0,0,0,0,0
47079,1,1.0,2020,1427.84,1602.72,1.6,10.0,1.2,10.0,0,...,0,0,0,0,0,0,0,0,0,0
47081,1,1.0,2020,1531.49,1742.29,1.8,9.0,2.6,12.0,0,...,0,0,0,0,0,0,0,0,0,0
47082,1,1.0,2020,1554.86,1840.59,2.6,15.0,4.6,10.0,0,...,0,0,0,0,0,0,0,0,0,0
47083,1,1.0,2020,1468.17,1568.86,2.0,10.0,1.8,13.0,0,...,0,0,0,0,0,0,0,0,0,0
47084,1,1.0,2020,1795.23,1461.55,1.8,9.0,1.6,10.0,0,...,0,0,0,0,0,0,0,0,0,0
47085,1,1.0,2020,1495.94,1333.76,1.8,7.0,1.8,8.0,0,...,0,0,0,0,0,0,0,0,0,0


In [317]:
# Identifizieren der Dummy-kodierten Spalten
home_team_cols = [col for col in x_away_pred.columns if col.startswith('home_team_')]
away_team_cols = [col for col in x_away_pred.columns if col.startswith('away_team_')]

home_team_cols.remove('home_team_strength')
away_team_cols.remove('away_team_strength')
home_team_cols.remove("home_team_form_goals")
away_team_cols.remove("away_team_form_goals")
home_team_cols.remove("home_team_form_points")
away_team_cols.remove("away_team_form_points")
home_team_cols.remove("home_team_goals_letzte_Begegnung")
away_team_cols.remove("away_team_goals_letzte_Begegnung")
home_team_cols.remove("home_team_goals_vorletzte_Begegnung")
away_team_cols.remove("away_team_goals_vorletzte_Begegnung")
home_team_cols.remove("home_team_goals_vorvorletzte_Begegnung")
away_team_cols.remove("away_team_goals_vorvorletzte_Begegnung")
home_team_cols.remove("home_team_geschossen_letztes_Spiel")
away_team_cols.remove("away_team_geschossen_letztes_Spiel")
home_team_cols.remove("home_team_kassiert_letztes_Spiel")
away_team_cols.remove("away_team_kassiert_letztes_Spiel")
home_team_cols.remove("home_team_geschossen_vorletztes_Spiel")
away_team_cols.remove("away_team_geschossen_vorletztes_Spiel")
home_team_cols.remove("home_team_kassiert_vorletztes_Spiel")
away_team_cols.remove("away_team_kassiert_vorletztes_Spiel")
home_team_cols.remove("home_team_geschossen_vorvorletztes_Spiel")
away_team_cols.remove("away_team_geschossen_vorvorletztes_Spiel")
home_team_cols.remove("home_team_kassiert_vorvorletztes_Spiel")
away_team_cols.remove("away_team_kassiert_vorvorletztes_Spiel")

display(x_away_pred[home_team_cols])

# Zusammenführen der Dummy-Spalten zu einer einzigen Spalte
x_away_pred['home_team'] = x_away_pred[home_team_cols].idxmax(axis=1).str.replace('home_team_', '')
x_away_pred['away_team'] = x_away_pred[away_team_cols].idxmax(axis=1).str.replace('away_team_', '')
x_away_pred

Unnamed: 0,home_team_Austria,home_team_Belgium,home_team_Germany,home_team_Hungary,home_team_Italy,home_team_Poland,home_team_Romania,home_team_Slovenia,home_team_Spain,home_team_Turkey,...,home_team_Uruguay,home_team_Uzbekistan,home_team_Vanuatu,home_team_Venezuela,home_team_Vietnam,home_team_Wales,home_team_Yemen,home_team_Yugoslavia,home_team_Zambia,home_team_Zimbabwe
47075,False,False,True,False,False,False,False,False,False,False,...,0,0,0,0,0,0,0,0,0,0
47076,False,False,False,True,False,False,False,False,False,False,...,0,0,0,0,0,0,0,0,0,0
47077,False,False,False,False,False,False,False,False,True,False,...,0,0,0,0,0,0,0,0,0,0
47078,False,False,False,False,True,False,False,False,False,False,...,0,0,0,0,0,0,0,0,0,0
47079,False,False,False,False,False,False,False,True,False,False,...,0,0,0,0,0,0,0,0,0,0
47081,False,False,False,False,False,True,False,False,False,False,...,0,0,0,0,0,0,0,0,0,0
47082,True,False,False,False,False,False,False,False,False,False,...,0,0,0,0,0,0,0,0,0,0
47083,False,False,False,False,False,False,True,False,False,False,...,0,0,0,0,0,0,0,0,0,0
47084,False,True,False,False,False,False,False,False,False,False,...,0,0,0,0,0,0,0,0,0,0
47085,False,False,False,False,False,False,False,False,False,True,...,0,0,0,0,0,0,0,0,0,0


Unnamed: 0,const,const.1,decade,home_team_strength,away_team_strength,home_team_form_goals,home_team_form_points,away_team_form_goals,away_team_form_points,home_advantage,...,away_team_Vanuatu,away_team_Venezuela,away_team_Vietnam,away_team_Wales,away_team_Yemen,away_team_Yugoslavia,away_team_Zambia,away_team_Zimbabwe,home_team,away_team
47075,1,1.0,2020,1644.21,1497.46,1.6,7.0,1.2,2.0,1,...,0,0,0,0,0,0,0,0,Germany,Scotland
47076,1,1.0,2020,1532.2,1616.41,2.0,11.0,0.6,6.0,0,...,0,0,0,0,0,0,0,0,Hungary,Switzerland
47077,1,1.0,2020,1727.5,1721.07,2.0,10.0,1.6,10.0,0,...,0,0,0,0,0,0,0,0,Spain,Croatia
47078,1,1.0,2020,1724.6,1375.1,2.0,10.0,0.6,5.0,0,...,0,0,0,0,0,0,0,0,Italy,Albania
47079,1,1.0,2020,1427.84,1602.72,1.6,10.0,1.2,10.0,0,...,0,0,0,0,0,0,0,0,Slovenia,Denmark
47081,1,1.0,2020,1531.49,1742.29,1.8,9.0,2.6,12.0,0,...,0,0,0,0,0,0,0,0,Poland,Netherlands
47082,1,1.0,2020,1554.86,1840.59,2.6,15.0,4.6,10.0,0,...,0,0,0,0,0,0,0,0,Austria,France
47083,1,1.0,2020,1468.17,1568.86,2.0,10.0,1.8,13.0,0,...,0,0,0,0,0,0,0,0,Romania,Ukraine
47084,1,1.0,2020,1795.23,1461.55,1.8,9.0,1.6,10.0,0,...,0,0,0,0,0,0,0,0,Belgium,Slovakia
47085,1,1.0,2020,1495.94,1333.76,1.8,7.0,1.8,8.0,0,...,0,0,0,0,0,0,0,0,Turkey,Georgia


In [318]:
x_pred["away_score"] = y_away_pred.apply(lambda x: round(math.pow(abs(x), 1), 2)) # Wurzel anpassen für versch. Modelle
x_pred

Unnamed: 0,const,const.1,decade,home_team_strength,away_team_strength,home_team_form_goals,home_team_form_points,away_team_form_goals,away_team_form_points,home_advantage,...,away_team_Vietnam,away_team_Wales,away_team_Yemen,away_team_Yugoslavia,away_team_Zambia,away_team_Zimbabwe,home_team,away_team,home_score,away_score
47075,1,1.0,2020,1644.21,1497.46,1.6,7.0,1.2,2.0,1,...,0,0,0,0,0,0,Germany,Scotland,1.97,0.56
47076,1,1.0,2020,1532.2,1616.41,2.0,11.0,0.6,6.0,0,...,0,0,0,0,0,0,Hungary,Switzerland,0.17,6.08
47077,1,1.0,2020,1727.5,1721.07,2.0,10.0,1.6,10.0,0,...,0,0,0,0,0,0,Spain,Croatia,0.75,1.59
47078,1,1.0,2020,1724.6,1375.1,2.0,10.0,0.6,5.0,0,...,0,0,0,0,0,0,Italy,Albania,3.05,0.76
47079,1,1.0,2020,1427.84,1602.72,1.6,10.0,1.2,10.0,0,...,0,0,0,0,0,0,Slovenia,Denmark,0.6,1.3
47081,1,1.0,2020,1531.49,1742.29,1.8,9.0,2.6,12.0,0,...,0,0,0,0,0,0,Poland,Netherlands,0.82,1.26
47082,1,1.0,2020,1554.86,1840.59,2.6,15.0,4.6,10.0,0,...,0,0,0,0,0,0,Austria,France,1.02,1.55
47083,1,1.0,2020,1468.17,1568.86,2.0,10.0,1.8,13.0,0,...,0,0,0,0,0,0,Romania,Ukraine,0.51,3.09
47084,1,1.0,2020,1795.23,1461.55,1.8,9.0,1.6,10.0,0,...,0,0,0,0,0,0,Belgium,Slovakia,2.36,0.57
47085,1,1.0,2020,1495.94,1333.76,1.8,7.0,1.8,8.0,0,...,0,0,0,0,0,0,Turkey,Georgia,2.31,1.39
