In [1]:
from sklearn.compose import ColumnTransformer
from sklearn.datasets import fetch_openml
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.impute import KNNImputer, SimpleImputer
from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import OneHotEncoder, RobustScaler
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV

from sklearn.pipeline import make_pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OrdinalEncoder

from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import OneHotEncoder, RobustScaler, MinMaxScaler

import pandas as pd
import numpy as np

from scripts import load_data
from scripts import machine_learning

In [2]:
COL = ['home_club_position', 'away_club_position', 'attendance',
       'squad_size_x', 'average_age_x', 'foreigners_percentage_x',
       'national_team_players_x']

In [3]:
X_TRAIN_COL = [
    "home_club_position",
    "away_club_position",
    "home_club_id",
    "away_club_id",
    "attendance",
    "squad_size_x",
    "average_age_x",
    "foreigners_percentage_x",
    "national_team_players_x",
]

MODEL_GRID_COL = [
    "model_name",
    "mean_fit_time",
    "std_fit_time",
    "mean_score_time",
    "std_score_time",
    "params",
    "mean_test_score",
    "std_test_score",
]


df_full = load_data.load_df()
df_transformed = machine_learning.transform_data(df_full)
df, df_random = load_data.split_data_random_and_Xy(df_transformed)

X, y = machine_learning.split_X_y(df_transformed)
tts = train_test_split(X, y, stratify=y, test_size=0.3)
X_train, X_test, y_train, y_test = tts


MODEL_GRID_COL = [
    "model_name",
    "mean_fit_time",
    "std_fit_time",
    "mean_score_time",
    "std_score_time",
    "params",
    "mean_test_score",
    "std_test_score",
]

models_dict = {
    "gausian": machine_learning.find_best_gausian_parameters(X_train[X_TRAIN_COL], y_train, cv=2),
    "rfc": machine_learning.find_random_forest_class_parameters(
        X[X_TRAIN_COL], y, cv=2),
    "decisionTree": machine_learning.find_decision_tree_parameters(X_train[X_TRAIN_COL], y_train, cv=2)
}


grid_df = []
for model in models_dict.items():
    model_table = model[1]
    model_grid = pd.DataFrame(model_table)
    best_model_df = model_grid.loc[model_grid["rank_test_score"] == 1].head(1)
    model_name = model[0]
    best_model_df["model_name"] = model_name
    grid_df.append(best_model_df)

models_params_table = pd.concat(grid_df)[MODEL_GRID_COL]
models_params_table = machine_learning.add_models_to_grid_table(models_params_table)

best_columns_dict = {}
for i in models_params_table[['model_name', 'params', 'model']].itertuples(index=False):
    name = i[0]
    params = i[1]
    model_funkc = i[2]
    model = model_funkc(params=params)
    best_columns = machine_learning.find_best_columns_to_model(columns_to_test=["home_club_position",
    "away_club_position", "squad_size_x", "average_age_x"], 
                                            model=model, 
                                            X_test=X_test,
                                            y_test=y_test,
                                            X_train=X_train,
                                            y_train=y_train)
    best_columns_dict[name] = {
        'model': model,
        'best_columns': best_columns,
        'params': params
    }
    
X_new_data = df_random[["home_club_position", "away_club_position", "squad_size_x", "average_age_x"]]
y_new_result = df_random['result']


predict_new_data = machine_learning.predict_new_data(model=best_columns_dict['rfc']['model'], X_new_data=X_new_data, y_new_result=y_new_result)





In [20]:
predict_new_data

Unnamed: 0,home_club_position,away_club_position,squad_size_x,average_age_x,result,y_pred,y_result,propability,result_val
18965,17,2,30.0,23.7,0.0,1.0,0.0,0.773806,False
59938,15,1,31.0,25.7,1.0,1.0,1.0,0.779293,True
20445,9,10,26.0,25.4,0.0,0.0,0.0,0.430543,True
52156,2,15,25.0,24.9,2.0,2.0,2.0,0.828631,True
14148,18,15,25.0,26.2,2.0,1.0,2.0,0.358356,False
...,...,...,...,...,...,...,...,...,...
53006,13,2,25.0,25.8,1.0,1.0,1.0,0.693988,True
16343,11,14,31.0,23.5,1.0,2.0,1.0,0.553320,False
38768,9,11,31.0,25.6,0.0,2.0,0.0,0.434765,False
12712,4,17,28.0,25.8,2.0,2.0,2.0,0.868382,True


In [7]:
predict_new_data['result_val'] = predict_new_data['y_pred'] == predict_new_data['y_result']

In [52]:
predict_new_data.loc[predict_new_data['propability']>0.8]['propability'].mean()

0.8539929717509914

In [53]:
100/85

1.1764705882352942

In [51]:
from itertools import combinations

l = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'p']


for num_columns in range(1, len(l) + 1):
        column_combinations = combinations(l, num_columns)
        for columns in column_combinations:
            columns_to_model = list(columns)
            print(columns_to_model)

['a']
['b']
['c']
['d']
['e']
['f']
['g']
['p']
['a', 'b']
['a', 'c']
['a', 'd']
['a', 'e']
['a', 'f']
['a', 'g']
['a', 'p']
['b', 'c']
['b', 'd']
['b', 'e']
['b', 'f']
['b', 'g']
['b', 'p']
['c', 'd']
['c', 'e']
['c', 'f']
['c', 'g']
['c', 'p']
['d', 'e']
['d', 'f']
['d', 'g']
['d', 'p']
['e', 'f']
['e', 'g']
['e', 'p']
['f', 'g']
['f', 'p']
['g', 'p']
['a', 'b', 'c']
['a', 'b', 'd']
['a', 'b', 'e']
['a', 'b', 'f']
['a', 'b', 'g']
['a', 'b', 'p']
['a', 'c', 'd']
['a', 'c', 'e']
['a', 'c', 'f']
['a', 'c', 'g']
['a', 'c', 'p']
['a', 'd', 'e']
['a', 'd', 'f']
['a', 'd', 'g']
['a', 'd', 'p']
['a', 'e', 'f']
['a', 'e', 'g']
['a', 'e', 'p']
['a', 'f', 'g']
['a', 'f', 'p']
['a', 'g', 'p']
['b', 'c', 'd']
['b', 'c', 'e']
['b', 'c', 'f']
['b', 'c', 'g']
['b', 'c', 'p']
['b', 'd', 'e']
['b', 'd', 'f']
['b', 'd', 'g']
['b', 'd', 'p']
['b', 'e', 'f']
['b', 'e', 'g']
['b', 'e', 'p']
['b', 'f', 'g']
['b', 'f', 'p']
['b', 'g', 'p']
['c', 'd', 'e']
['c', 'd', 'f']
['c', 'd', 'g']
['c', 'd', 'p']
['c'

In [48]:
for i in combinations(l, 4):
    print(i)



('a', 'b', 'c', 'd')


In [1]:
def startstop(func):
    def wrapper():
        print("Starting...")
        func()
        print("Finished!")
    return wrapper
def roll():
    print("Rolling on the floor laughing XD")
roll = startstop(roll)



In [3]:
@startstop
def roll():
    print("Rolling on the floor laughing XD")

In [4]:
roll()

Starting...
Rolling on the floor laughing XD
Finished!


In [6]:
l = [1,2,3]

for i in l:
    dict(i)

TypeError: 'int' object is not iterable