In [None]:
import pandas as pd
import warnings
import numpy as np
import re
from sklearn.ensemble import RandomForestClassifier

seed = 10

def get_dataset():

    featureset_path = "/content/train_features.csv"
    targetset_path = "/content/train_targets.csv"

    df = pd.read_csv(featureset_path)
    target = pd.read_csv(targetset_path)

    #print("Features: ",df.columns,"\n")
    #print("Target Columns: ",target.columns,"\n")

    column_to_drop = ["lobby_type","chat_len","game_mode","match_id_hash"] # "match_id_hash","objectives_len"

    filter = "(game_mode == 2 or game_mode == 22) and game_time > 0" # 2 standard ranked or 22 captain mode

    df = df.query(filter)

    df = df.drop(labels=column_to_drop,axis=1)

    tf_toreplace = ["r1_teamfight_participation",
                    "r2_teamfight_participation",
                    "r3_teamfight_participation",
                    "r4_teamfight_participation",
                    "r5_teamfight_participation",
                    "d1_teamfight_participation",
                    "d2_teamfight_participation",
                    "d3_teamfight_participation",
                    "d4_teamfight_participation",
                    "d5_teamfight_participation"]

    for label in tf_toreplace:
        df.loc[df[label] > 1.0, label] = 1


    print("Filtering Df: ", filter, "\n")

    print("Dropped: ",column_to_drop,"\n")

    print("Dataframe Shape: ",df.shape,"\n")

    target = target.loc[df.index]
    print(f"Target shape: {target.shape}")
    return df,target


def get_hero_id_labels(df: pd.DataFrame) -> list[str]:
    hero_id_labels = [s for s in df.columns if s.endswith('_hero_id')]
    print("Hero Id Labels:",hero_id_labels,"\n")
    return hero_id_labels

def get_single_hero_labels(df: pd.DataFrame) -> list[str]:
    single_hero_labels = [s for s in df.columns if re.match(r"^(d|r)\d",s)]
    print("Single Player Labels:",single_hero_labels,"\n")
    return single_hero_labels

def drop_heros_labels(df:pd.DataFrame) -> pd.DataFrame:
    hero_id_labels = get_hero_id_labels(df)
    if (len(hero_id_labels) == 0):
        for label in df.columns:
            if re.match(r"^(d|r)_\d+$", label):  #regex: r_1 d_2 r_124 etc...
                df = df.drop(label,axis=1)
            elif re.match(r"^(d|r)\d_heroid\d+$",label):      #regex: r1_hero_id_12 d3_hero_id_101 ecc..
                df = df.drop(label,axis=1)
    else:
        df = df.drop(labels=hero_id_labels,axis=1)

    print("Dropped Dataframe Shape:",df.shape)

    return df


def playerstats_playerheros_transform(df: pd.DataFrame):

    features_toonehot = ["r1_hero_id",
                         "r2_hero_id",
                         "r3_hero_id",
                         "r4_hero_id",
                         "r5_hero_id",
                         "d1_hero_id",
                         "d2_hero_id",
                         "d3_hero_id",
                         "d4_hero_id",
                         "d5_hero_id"]
    df = pd.get_dummies(df,columns=features_toonehot)

    #target = target.loc[df.index]
    #print(target.shape)
    #df = df.drop('match_id_hash',axis=1)

    return df

def playerstats_teamheros_transform(df: pd.DataFrame):
    warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)
    hero_id_labels = get_hero_id_labels(df)

    hero_id_set = {i: set() for i in range(len(hero_id_labels))}

    hero_id_set_tot = set()

    for n,label in enumerate(hero_id_labels):
        for id in df[label]:
            hero_id_set[n].add(id)

    for i in range(len(hero_id_labels)):
        #print(f"{i}. {len(hero_id_set[i])}")
        hero_id_set_tot = hero_id_set_tot.union(hero_id_set[i])

    print("Numbers of Heros: ",len(hero_id_set_tot),"\n")

    for hero_id in hero_id_set_tot:
        df[f"r_{hero_id}"] = 0
        df[f"r_{hero_id}"] = (
            (df["r1_hero_id"] == hero_id) |
            (df["r2_hero_id"] == hero_id) |
            (df["r3_hero_id"] == hero_id) |
            (df["r4_hero_id"] == hero_id) |
            (df["r5_hero_id"] == hero_id)
        ).astype(int)
        df[f"d_{hero_id}"] = (
            (df["d1_hero_id"] == hero_id) |
            (df["d2_hero_id"] == hero_id) |
            (df["d3_hero_id"] == hero_id) |
            (df["d4_hero_id"] == hero_id) |
            (df["d5_hero_id"] == hero_id)
        ).astype(int)

    df = df.drop(labels=hero_id_labels,axis=1) #removed ri_hero_id and di_hero_id

    print("Dataframe Shape:",df.shape,"\n")

    #print(df.iloc[0]["match_id_hash"])
    #print(df.iloc[0][df.iloc[0] == 1][-11:])

    print("NaN Count: ",pd.isna(df).sum().sum(),"\n")

    df = df.copy()

    """ i = 0
    for v in df['d_32']:
        if v == 1:
            i += 1
    print(f"Total: {i}") """

    #target = target.loc[df.index]
    #print(target.shape)


    return df

def teamstats_teamheros_transform(df: pd.DataFrame):
    #we handle PerformanceWarning by doing the copy of the dataframe, this ignore is for quality of outputs
    warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)
    hero_id_labels = get_hero_id_labels(df)

    hero_id_set = {i: set() for i in range(len(hero_id_labels))}

    hero_id_set_tot = set()

    for n,label in enumerate(hero_id_labels):
        for id in df[label]:
            hero_id_set[n].add(id)

    for i in range(len(hero_id_labels)):
        #print(f"{i}. {len(hero_id_set[i])}")
        hero_id_set_tot = hero_id_set_tot.union(hero_id_set[i])

    print("Numbers of Heros: ",len(hero_id_set_tot),"\n")

    for hero_id in hero_id_set_tot:
        df[f"r_{hero_id}"] = (
            (df["r1_hero_id"] == hero_id) |
            (df["r2_hero_id"] == hero_id) |
            (df["r3_hero_id"] == hero_id) |
            (df["r4_hero_id"] == hero_id) |
            (df["r5_hero_id"] == hero_id)
        ).astype(int)
        df[f"d_{hero_id}"] = (
            (df["d1_hero_id"] == hero_id) |
            (df["d2_hero_id"] == hero_id) |
            (df["d3_hero_id"] == hero_id) |
            (df["d4_hero_id"] == hero_id) |
            (df["d5_hero_id"] == hero_id)
        ).astype(int)

    df = df.drop(labels=hero_id_labels,axis=1) #removed ri_hero_id and di_hero_id

    #print("Dataframe Shape:",df.shape,"\n")

    #print(df.iloc[0]["match_id_hash"])
    #print(df.iloc[0][df.iloc[0] == 1][-11:])

    print("NaN Count: ",pd.isna(df).sum().sum(),"\n")

    df = df.copy()

    """ i = 0
    for v in df['d_32']:
        if v == 1:
            i += 1
    print(f"Total: {i}") """

    single_hero_labels = get_single_hero_labels(df)
    single_hero_labels2 = single_hero_labels.copy()
    for label in single_hero_labels:
        if re.match(r".*(_x|_y)$",label):
            single_hero_labels2.remove(label)
            continue
        new_label = label[0]+label[2:] #r1_gold -> r_gold
        if not (new_label in df.columns):
            df[new_label] = df[label]
        else:
            df[new_label] += df[label]
    single_hero_labels = single_hero_labels2.copy()
    df = df.drop(labels=single_hero_labels,axis=1).copy()
    #print("New Dataframe Colums:",df.columns,"\n")
    print("New Dataframe Shape:",df.shape,"\n")

    #print(df.query("d_firstblood_claimed == 0 and r_firstblood_claimed == 0").shape)

    #for label in df.columns:
    #    if re.match(r"^(d|r)_\d*$",label): #regex to drop all d_numbers to drop heroes
    #        df = df.drop(label,axis=1)
    #print(df.shape)

    #target = target.loc[df.index]
    #print(target.shape)


    return df

def team_mean_position_transform (df: pd.DataFrame):
    labels_radiant_x = ["r1_x", "r2_x", "r3_x", "r4_x", "r5_x"]
    labels_radiant_y = ["r1_y", "r2_y", "r3_y", "r4_y", "r5_y"]
    labels_dire_x = ["d1_x", "d2_x", "d3_x", "d4_x", "d5_x"]
    labels_dire_y = ["d1_y", "d2_y", "d3_y", "d4_y", "d5_y"]

    #calculate average x and y for Radiant team
    df['radiant_avg_x'] = df[labels_radiant_x].mean(axis=1)
    df['radiant_avg_y'] = df[labels_radiant_y].mean(axis=1)

    #calculate average x and y for Dire team
    df['dire_avg_x'] = df[labels_dire_x].mean(axis=1)
    df['dire_avg_y'] = df[labels_dire_y].mean(axis=1)

    #drop the original x and y columns
    df = df.drop(labels=labels_radiant_x + labels_radiant_y + labels_dire_x + labels_dire_y, axis=1)

    return df

def team_weighted_mean_position_transform(df: pd.DataFrame):

    labels_radiant_x = ["r1_x", "r2_x", "r3_x", "r4_x", "r5_x"]
    labels_radiant_y = ["r1_y", "r2_y", "r3_y", "r4_y", "r5_y"]
    labels_dire_x = ["d1_x", "d2_x", "d3_x", "d4_x", "d5_x"]
    labels_dire_y = ["d1_y", "d2_y", "d3_y", "d4_y", "d5_y"]

    df_Weighted = df.copy(deep=True)
    df_Weighted  = get_average_distances(df_Weighted)

    distances_radiant = ["distance_r1", "distance_r2", "distance_r3", "distance_r4", "distance_r5"]
    distances_dire = ["distance_d1", "distance_d2", "distance_d3", "distance_d4", "distance_d5"]

    # Replace zero distances with 1 to avoid division by zero, only relevant for specific case of all players in the same position
    df_Weighted[distances_radiant] = df_Weighted[distances_radiant].replace(0, 1)
    df_Weighted[distances_dire] = df_Weighted[distances_dire].replace(0, 1)

    #calculate weights as the inverse of distances
    weights_radiant = 1 / df_Weighted[distances_radiant]
    weights_dire = 1 / df_Weighted[distances_dire]

    df['radiant_Weighted_avg_x'] = 0
    df['radiant_Weighted_avg_y'] = 0
    df['dire_Weighted_avg_x'] = 0
    df['dire_Weighted_avg_y'] = 0
    for i in range(5):
        #calculate weighted average x and y for Radiant team
        df['radiant_Weighted_avg_x'] += df[labels_radiant_x[i]] * weights_radiant.iloc[:, i]
        df['radiant_Weighted_avg_y'] += df[labels_radiant_y[i]] * weights_radiant.iloc[:, i]

        #calculate weighted average x and y for Dire team
        df['dire_Weighted_avg_x'] += df[labels_dire_x[i]] * weights_dire.iloc[:, i]
        df['dire_Weighted_avg_y'] += df[labels_dire_y[i]] * weights_dire.iloc[:, i]

    #normalize by the sum of weights
    df['radiant_Weighted_avg_x'] /= weights_radiant.sum(axis=1)
    df['radiant_Weighted_avg_y'] /= weights_radiant.sum(axis=1)
    df['dire_Weighted_avg_x'] /= weights_dire.sum(axis=1)
    df['dire_Weighted_avg_y'] /= weights_dire.sum(axis=1)

    #drop the original x and y columns
    df = df.drop(labels=labels_radiant_x + labels_radiant_y + labels_dire_x + labels_dire_y, axis=1)
    return df

#this is ok, tested
def get_average_distances(df: pd.DataFrame):
    labels_radiant_x = ["r1_x", "r2_x", "r3_x", "r4_x", "r5_x"]
    labels_radiant_y = ["r1_y", "r2_y", "r3_y", "r4_y", "r5_y"]
    labels_dire_x = ["d1_x", "d2_x", "d3_x", "d4_x", "d5_x"]
    labels_dire_y = ["d1_y", "d2_y", "d3_y", "d4_y", "d5_y"]

    radiant_distances = calculate_distances(df, labels_radiant_x, labels_radiant_y)
    dire_distances = calculate_distances(df, labels_dire_x, labels_dire_y)

    for label in radiant_distances:
        truncated_label = label[:-2]
        df[f'distance_{truncated_label}'] = radiant_distances[label]

    for label in dire_distances:
        truncated_label = label[:-2]
        df[f'distance_{truncated_label}'] = dire_distances[label]

    return df
#this is ok, tested
def calculate_distances(df: pd.DataFrame, x_labels, y_labels):
    distances = {label: [] for label in x_labels}
    for i in range(len(x_labels)):
        for j in range(len(x_labels)):
            if i != j:
                dist = np.sqrt((df[x_labels[i]] - df[x_labels[j]])**2 + (df[y_labels[i]] - df[y_labels[j]])**2)
                distances[x_labels[i]].append(dist)
    return {label: np.mean(distances[label], axis=0) for label in distances}


def feature_selection_transform(df: pd.DataFrame,target: pd.DataFrame, threshold: float) -> pd.DataFrame:
    feature_selector = RandomForestClassifier(max_depth=10,random_state=seed)

    feature_selector.fit(df,target)

    feature_importance = {
        name: value
        for name,value in zip(feature_selector.feature_names_in_,feature_selector.feature_importances_)
    }

    feature_importance = dict(reversed(sorted(feature_importance.items(), key=lambda item: item[1])))
    feature_names = list(feature_importance.keys())

    n_selected_features = np.sum(np.array(list(feature_importance.values())) > threshold)

    df_reduced = df[feature_names[:n_selected_features]]
    print("Shape Tranformation:\n",df.shape,"->", df_reduced.shape)

    return df_reduced

def pca_tranform(df:pd.DataFrame, variance_ratio:float):

    scaler = StandardScaler()

    pca = PCA(n_components=variance_ratio,random_state=42)

    df_reduced = pca.fit_transform(scaler.fit_transform(df))

    return df_reduced

In [None]:
import pandas as pd
import numpy as np
from sklearn.svm import SVC
from sklearn.experimental import enable_halving_search_cv
from sklearn.model_selection import GridSearchCV,StratifiedKFold, HalvingGridSearchCV
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

#setup svm

In [None]:
df, target = get_dataset()
win = target['radiant_win']

Filtering Df:  (game_mode == 2 or game_mode == 22) and game_time > 0 

Dropped:  ['lobby_type', 'chat_len', 'game_mode', 'match_id_hash'] 

Dataframe Shape:  (32153, 242) 

Target shape: (32153, 6)


In [None]:
df_tt = teamstats_teamheros_transform(df.copy())
df_mp = team_mean_position_transform(df_tt.copy())
df_wmp = team_weighted_mean_position_transform(df_tt.copy())

df_tt_selected = feature_selection_transform(df_tt,win,0.01)
df_tt_selected = StandardScaler().fit_transform(df_tt_selected)

df_mp_selected = feature_selection_transform(df_mp,win,0.01)
df_mp_selected = StandardScaler().fit_transform(df_mp_selected)

df_wmp_selected = feature_selection_transform(df_wmp,win,0.01)
df_wmp_selected = StandardScaler().fit_transform(df_wmp_selected)

Hero Id Labels: ['r1_hero_id', 'r2_hero_id', 'r3_hero_id', 'r4_hero_id', 'r5_hero_id', 'd1_hero_id', 'd2_hero_id', 'd3_hero_id', 'd4_hero_id', 'd5_hero_id'] 

Numbers of Heros:  115 

NaN Count:  0 

Single Player Labels: ['r1_kills', 'r1_deaths', 'r1_assists', 'r1_denies', 'r1_gold', 'r1_lh', 'r1_xp', 'r1_health', 'r1_max_health', 'r1_max_mana', 'r1_level', 'r1_x', 'r1_y', 'r1_stuns', 'r1_creeps_stacked', 'r1_camps_stacked', 'r1_rune_pickups', 'r1_firstblood_claimed', 'r1_teamfight_participation', 'r1_towers_killed', 'r1_roshans_killed', 'r1_obs_placed', 'r1_sen_placed', 'r2_kills', 'r2_deaths', 'r2_assists', 'r2_denies', 'r2_gold', 'r2_lh', 'r2_xp', 'r2_health', 'r2_max_health', 'r2_max_mana', 'r2_level', 'r2_x', 'r2_y', 'r2_stuns', 'r2_creeps_stacked', 'r2_camps_stacked', 'r2_rune_pickups', 'r2_firstblood_claimed', 'r2_teamfight_participation', 'r2_towers_killed', 'r2_roshans_killed', 'r2_obs_placed', 'r2_sen_placed', 'r3_kills', 'r3_deaths', 'r3_assists', 'r3_denies', 'r3_gold', 'r

In [None]:
params = {
    'kernel': ["rbf"],
    'C' : [2**(-4),2**(-2),2**(-1)],
    'gamma' : [2**(-3),2**(-2),2**(-1)]
}

model = SVC(random_state=42)

#scorings = ["roc_auc","accuracy","recall","precision","f1"]
#best_model = GridSearchCV(estimator=model,param_grid=params,scoring=scorings,refit="roc_auc",cv=StratifiedKFold(n_splits=5,shuffle=True),return_train_score=True,verbose=1)
scorings = "roc_auc"
best_model = HalvingGridSearchCV(
    estimator=model,
    param_grid=params,
    scoring=scorings,
    refit=True,
    cv=StratifiedKFold(n_splits=5, shuffle=True, random_state=42),#added random
    return_train_score=True,
    verbose=1,
    factor=3  # HalvingGridSearchCV parameter controls the proportion of candidates that are selected for each subsequent iteration.
)

# Teamstat teamheroes

In [None]:
best_model.fit(df_tt_selected,win)

n_iterations: 3
n_required_iterations: 3
n_possible_iterations: 3
min_resources_: 3572
max_resources_: 32153
aggressive_elimination: False
factor: 3
----------
iter: 0
n_candidates: 9
n_resources: 3572
Fitting 5 folds for each of 9 candidates, totalling 45 fits
----------
iter: 1
n_candidates: 3
n_resources: 10716
Fitting 5 folds for each of 3 candidates, totalling 15 fits
----------
iter: 2
n_candidates: 1
n_resources: 32148
Fitting 5 folds for each of 1 candidates, totalling 5 fits


In [None]:
best_model.cv_results_

{'iter': array([0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2]),
 'n_resources': array([ 3572,  3572,  3572,  3572,  3572,  3572,  3572,  3572,  3572,
        10716, 10716, 10716, 32148]),
 'mean_fit_time': array([ 0.61010022,  0.70792618,  0.70186806,  0.7168849 ,  0.7552125 ,
         0.70802002,  0.61624603,  0.74087434,  0.70285339,  5.81427093,
         5.69469457,  6.55231075, 94.2154479 ]),
 'std_fit_time': array([0.1081773 , 0.12712719, 0.01803711, 0.23909807, 0.14732611,
        0.00457278, 0.09093591, 0.14162157, 0.01093992, 0.36121744,
        0.33722968, 0.08625725, 4.15473379]),
 'mean_score_time': array([ 0.19901729,  0.19990664,  0.1774941 ,  0.37957339,  0.20345268,
         0.17821412,  0.19193268,  0.20204043,  0.17851915,  1.46745119,
         1.87052865,  1.36653647, 12.87454567]),
 'std_score_time': array([0.04886464, 0.05415776, 0.00687255, 0.28681734, 0.05855127,
        0.01394628, 0.0455222 , 0.05188474, 0.00576133, 0.02143463,
        0.36952965, 0.00792766, 0.0999432

In [None]:
cv_results:dict = best_model.cv_results_

print("Best Parameters:\n",best_model.best_params_,'\n')

results = []

mean_test_roc_label = f"mean_test_score"

mean_test_score_list = cv_results[f"mean_test_score"]

best_test_position = np.argmin(best_model.cv_results_["rank_test_score"])

mean_train_roc_label = f"mean_train_score"
mean_train_score_list = cv_results[f"mean_train_score"]
results.append({"Scoring": scorings, "Mean Test Score": mean_test_score_list[best_test_position], "Mean Train Score": mean_train_score_list[best_test_position]})

df_results = pd.DataFrame(results)

df_results

Best Parameters:
 {'C': 0.5, 'gamma': 0.125, 'kernel': 'rbf'} 



Unnamed: 0,Scoring,Mean Test Score,Mean Train Score
0,roc_auc,0.769465,0.945839


# Mean Position Dataset

In [None]:
best_model.fit(df_mp_selected,win)

n_iterations: 3
n_required_iterations: 3
n_possible_iterations: 3
min_resources_: 3572
max_resources_: 32153
aggressive_elimination: False
factor: 3
----------
iter: 0
n_candidates: 9
n_resources: 3572
Fitting 5 folds for each of 9 candidates, totalling 45 fits
----------
iter: 1
n_candidates: 3
n_resources: 10716
Fitting 5 folds for each of 3 candidates, totalling 15 fits
----------
iter: 2
n_candidates: 1
n_resources: 32148
Fitting 5 folds for each of 1 candidates, totalling 5 fits


In [None]:
best_model.cv_results_

{'iter': array([0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2]),
 'n_resources': array([ 3572,  3572,  3572,  3572,  3572,  3572,  3572,  3572,  3572,
        10716, 10716, 10716, 32148]),
 'mean_fit_time': array([ 0.49672146,  0.55686059,  0.49060178,  0.51034079,  0.48031883,
         0.54136271,  0.43912621,  0.55059848,  0.49116545,  5.11534605,
         4.75626931,  4.8147234 , 44.34846053]),
 'std_fit_time': array([0.01429085, 0.10317801, 0.00637672, 0.09561289, 0.00581511,
        0.1084145 , 0.00655783, 0.09053895, 0.01472579, 0.35521908,
        0.35006879, 0.47539303, 0.65003982]),
 'mean_score_time': array([0.14905338, 0.17790909, 0.15688872, 0.14830518, 0.1471981 ,
        0.18045602, 0.13133736, 0.16055627, 0.15789766, 1.34998212,
        1.14074645, 1.02866292, 9.48817949]),
 'std_score_time': array([0.00617245, 0.0459332 , 0.00383184, 0.0369018 , 0.00435471,
        0.04897472, 0.00670497, 0.04292227, 0.01006507, 0.32113538,
        0.19720008, 0.01233538, 0.48808876]),
 'param_

In [None]:
cv_results:dict = best_model.cv_results_

print("Best Parameters:\n",best_model.best_params_,'\n')

results = []

mean_test_roc_label = f"mean_test_score"

mean_test_score_list = cv_results[f"mean_test_score"]

best_test_position = np.argmin(best_model.cv_results_["rank_test_score"])

mean_train_roc_label = f"mean_train_score"
mean_train_score_list = cv_results[f"mean_train_score"]
results.append({"Scoring": scorings, "Mean Test Score": mean_test_score_list[best_test_position], "Mean Train Score": mean_train_score_list[best_test_position]})

df_results = pd.DataFrame(results)

df_results

Best Parameters:
 {'C': 0.5, 'gamma': 0.125, 'kernel': 'rbf'} 



Unnamed: 0,Scoring,Mean Test Score,Mean Train Score
0,roc_auc,0.798335,0.844824


# Weighted Mean Position Dataset


In [None]:
best_model.fit(df_wmp_selected,win)

n_iterations: 3
n_required_iterations: 3
n_possible_iterations: 3
min_resources_: 3572
max_resources_: 32153
aggressive_elimination: False
factor: 3
----------
iter: 0
n_candidates: 9
n_resources: 3572
Fitting 5 folds for each of 9 candidates, totalling 45 fits
----------
iter: 1
n_candidates: 3
n_resources: 10716
Fitting 5 folds for each of 3 candidates, totalling 15 fits
----------
iter: 2
n_candidates: 1
n_resources: 32148
Fitting 5 folds for each of 1 candidates, totalling 5 fits


In [None]:
cv_results:dict = best_model.cv_results_

print("Best Parameters:\n",best_model.best_params_,'\n')

results = []

mean_test_roc_label = f"mean_test_score"

mean_test_score_list = cv_results[f"mean_test_score"]

best_test_position = np.argmin(best_model.cv_results_["rank_test_score"])

mean_train_roc_label = f"mean_train_score"
mean_train_score_list = cv_results[f"mean_train_score"]
results.append({"Scoring": scorings, "Mean Test Score": mean_test_score_list[best_test_position], "Mean Train Score": mean_train_score_list[best_test_position]})

df_results = pd.DataFrame(results)

df_results

Best Parameters:
 {'C': 0.5, 'gamma': 0.125, 'kernel': 'rbf'} 



Unnamed: 0,Scoring,Mean Test Score,Mean Train Score
0,roc_auc,0.79822,0.844748


# F1 of the best forming model

In [None]:
params = {
    'kernel': ["rbf"],
    'C' : [2**(-1)],
    'gamma' : [2**(-3)]
}
#print f1
model = SVC(random_state=42)
f1_model = HalvingGridSearchCV(
    estimator=model,
    param_grid=params,
    scoring='f1',
    refit=True,
    cv=StratifiedKFold(n_splits=5, shuffle=True, random_state=42),#added random
    return_train_score=True,
    verbose=1,
    factor=3  # HalvingGridSearchCV parameter controls the proportion of candidates that are selected for each subsequent iteration.
)
f1_model.fit(df_mp_selected,win)
cv_results:dict = f1_model.cv_results_

print("Best Parameters:\n",f1_model.best_params_,'\n')

results = []

mean_test_roc_label = f"mean_test_score"

mean_test_score_list = cv_results[f"mean_test_score"]

best_test_position = np.argmin(f1_model.cv_results_["rank_test_score"])

mean_train_roc_label = f"mean_train_score"
mean_train_score_list = cv_results[f"mean_train_score"]
results.append({"Scoring": "f1", "Mean Test Score": mean_test_score_list[best_test_position], "Mean Train Score": mean_train_score_list[best_test_position]})

df_results = pd.DataFrame(results)

df_results

n_iterations: 1
n_required_iterations: 1
n_possible_iterations: 1
min_resources_: 32153
max_resources_: 32153
aggressive_elimination: False
factor: 3
----------
iter: 0
n_candidates: 1
n_resources: 32153
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Best Parameters:
 {'C': 0.5, 'gamma': 0.125, 'kernel': 'rbf'} 



Unnamed: 0,Scoring,Mean Test Score,Mean Train Score
0,f1,0.751812,0.783029
