## Load the dataset

In [1]:
import pickle

import numpy as np
import pandas as pd
from sklearn import preprocessing
from sklearn.metrics import balanced_accuracy_score, f1_score, accuracy_score
from sklearn.model_selection import PredefinedSplit
from sklearn.model_selection import train_test_split, StratifiedKFold
from skopt import BayesSearchCV

save_in_test_folder = True
if save_in_test_folder:
    filepath = "../TestModule"
else:
    filepath = "."

seed = 42
FILENAME = "dataset/train_dataset.csv"

#Prepare train data
df1 = pd.read_csv(FILENAME, sep=",", low_memory=False)

# get features names
features = list(df1.columns)
features_to_remove = ["label", "ts", "src_ip", "dst_ip", "dns_query", "ssl_subject", "ssl_issuer", "http_uri", "type", "http_referrer", "http_user_agent"]
features = [feature for feature in features if feature not in features_to_remove]
df1 = df1[features + ["type"]]

# Converte i valori in numeri, sostituendo quelli non validi con NaN
df1["src_bytes"] = pd.to_numeric(df1["src_bytes"], errors='coerce')
# Filtra le righe con NaN (valori non convertibili)
df1 = df1.dropna(subset=["src_bytes"])
# Converte i valori rimasti in interi
df1.loc[:, "src_bytes"] = df1["src_bytes"].astype(int)

print("#Righe: " + str(df1.shape[0]) + " #Colonne: " + str(df1.shape[1]))
df1 = df1.dropna()
print("#Righe: " + str(df1.shape[0]) + " #Colonne: " + str(df1.shape[1]))

X = df1[features]
y = df1["type"]

le = preprocessing.LabelEncoder()
le.fit(y)
with open(f"{filepath}/transformer/target_encoder.save", "wb") as f:
    pickle.dump(le, f)

y = le.transform(y)

indices = np.arange(X.shape[0])
train_idx, val_idx = train_test_split(indices, test_size=0.2, stratify=y, random_state=seed)

# fold = np.zeros(X.shape[0])
# fold[train_idx] = -1

n_splits = 10
skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)

fold = np.full(len(y), -1)  # Inizializza tutto con -1 (default: train)

# Assegna i fold ai campioni
for fold_number, (_, val_idx) in enumerate(skf.split(X, y)):
    fold[val_idx] = fold_number  # Assegna il numero del fold ai campioni di validazione

ps = PredefinedSplit(fold)
ps.get_n_splits()

# for i, (train_index, test_index) in enumerate(ps.split()):
#     print(f"Fold {i}:")
#     print(f"  Train: index={train_index}")
#     print(f"  Test:  index={test_index}")

# take only x with index in val_idx
X_val = X.iloc[val_idx]
y_val = y[val_idx]
X_train = X.iloc[train_idx]
y_train = y[train_idx]

#Righe: 616983 #Colonne: 36
#Righe: 616983 #Colonne: 36


In [3]:
df1

Unnamed: 0,src_port,dst_port,proto,service,duration,src_bytes,dst_bytes,conn_state,missed_bytes,src_pkts,...,http_version,http_request_body_len,http_response_body_len,http_status_code,http_orig_mime_types,http_resp_mime_types,weird_name,weird_addl,weird_notice,type
0,53972,10502,tcp,-,0.000000,0.0,0,OTH,0,0,...,-,0,0,0,-,-,-,-,-,normal
1,37513,53,udp,dns,0.163608,47.0,423,SF,0,1,...,-,0,0,0,-,-,-,-,-,normal
2,2077,2077,tcp,-,0.208218,0.0,0,S0,0,120,...,-,0,0,0,-,-,-,-,-,normal
3,53972,10502,tcp,-,0.000000,0.0,0,OTH,0,0,...,-,0,0,0,-,-,-,-,-,normal
4,1880,47979,tcp,-,0.000000,0.0,0,OTH,0,1,...,-,0,0,0,-,-,-,-,-,normal
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
616997,53116,53,udp,dns,0.044893,84.0,424,SF,0,2,...,-,0,0,0,-,-,-,-,-,mitm
616998,57669,53,udp,dns,0.002957,84.0,436,SF,0,2,...,-,0,0,0,-,-,-,-,-,mitm
616999,54730,53,udp,dns,0.016624,58.0,178,SF,0,2,...,-,0,0,0,-,-,-,-,-,mitm
617000,59846,443,tcp,ssl,48.271568,3219.0,1212,SF,0,26,...,-,0,0,0,-,-,-,-,-,mitm


## Preprocess the dataset

In [2]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import OrdinalEncoder, StandardScaler
from sklearn.feature_selection import SequentialFeatureSelector
from sklearn.compose import ColumnTransformer

import pickle

categorical_columns = X_train.select_dtypes(include=["object"]).columns.tolist()
numeric_columns = X_train.select_dtypes(include=["int64", "float64"]).columns.tolist()
# boolean_columns = X_train.select_dtypes(include=["bool"]).columns.tolist()

ct = ColumnTransformer(
    [
        ("cat", OrdinalEncoder(handle_unknown='use_encoded_value', unknown_value=-1), categorical_columns),  # Trasforma le colonne categoriche
        # ("ordinal", OneHotEncoder(handle_unknown='infrequent_if_exist', sparse_output=False), categorical_columns),  # Trasforma le colonne categoriche
        ("scale", StandardScaler(), numeric_columns)  # Normalizza le colonne numeriche
    ],
    remainder="passthrough"  # Mantieni le altre colonne invariate
)
ct.set_output(transform="pandas")

ct = ct.fit(X_train)
with open(f"{filepath}/transformer/transformer.save", "wb") as f:
    pickle.dump(ct, f)

# train set
X_train = ct.transform(X_train)

# validation set
X_val = ct.transform(X_val)

# X
X = ct.transform(X)

In [3]:
rf = RandomForestClassifier(n_estimators=3, random_state=seed)
sfs = SequentialFeatureSelector(estimator=rf, direction="backward", n_features_to_select="auto", scoring="balanced_accuracy", n_jobs=12)

# sfs.fit(X_train, y_train)
# with open(f"{filepath}/transformer/sfs.save", "wb") as f:
#     pickle.dump(sfs, f)


# rename "scale__" to all numeric columns in x_train
X_train.columns = [col.replace("remainder__", "scale__") for col in X_train.columns]
X_val.columns = [col.replace("remainder__", "scale__") for col in X_val.columns]
X.columns = [col.replace("remainder__", "scale__") for col in X.columns]

with open("../TestModule/transformer/sfs.save", "rb") as f:
    sfs: SequentialFeatureSelector = pickle.load(f)

# train set
X_train = sfs.transform(X_train)

# validation set
X_val = sfs.transform(X_val)

# X
X = sfs.transform(X)

In [5]:
print(sfs.get_feature_names_out())
print(sfs.get_support())
with open(f"{filepath}/transformer/sfs.save", "wb") as f:
    pickle.dump(sfs, f)

['cat__service' 'cat__dns_AA' 'cat__dns_RA' 'cat__dns_rejected'
 'cat__ssl_established' 'cat__http_method' 'cat__http_resp_mime_types'
 'scale__src_port' 'scale__dst_port' 'scale__duration' 'scale__src_bytes'
 'scale__dst_bytes' 'scale__missed_bytes' 'scale__src_pkts'
 'scale__src_ip_bytes' 'scale__dst_ip_bytes'
 'scale__http_response_body_len' 'scale__http_status_code']
[False  True False  True False  True  True False False False  True False
  True False False  True False False False  True  True  True  True  True
  True  True  True False  True False False False False  True  True]


NotFittedError: This RandomForestClassifier instance is not fitted yet. Call 'fit' with appropriate arguments before using this estimator.

In [None]:

from sklearn import discriminant_analysis

# pca = decomposition.PCA(n_components=0.95, random_state=seed)
pca = discriminant_analysis.LinearDiscriminantAnalysis(n_components=9)

pca.set_output(transform="pandas")

pca.fit(X_train, y_train)
with open(f"{filepath}/transformer/pca.save", "wb") as f:
    pickle.dump(pca, f)

X_train_pca = pca.transform(X_train)
X_val_pca = pca.transform(X_val)
X_pca = pca.transform(X)

# X_train = X_train_pca
# X_val = X_val_pca
# X = X_pca

## Apply K-Nearest Neighbour

In [6]:
from sklearn.neighbors import KNeighborsClassifier
import os

os.environ['OMP_NUM_THREADS'] = '12'
from sklearn.model_selection import RandomizedSearchCV

param_grid = {
    'n_neighbors': [3, 5, 7, 9, 11],  # Number of neighbors to consider
    'weights': ['uniform', 'distance'],  # Weight function
    'metric': ['euclidean', 'manhattan', 'minkowski'],  # Distance metrics
    'p': [1, 2],  # Minkowski parameter (1 for Manhattan, 2 for Euclidean)
    'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute'],  # Algorithm to compute neighbors
    'leaf_size': [30, 50, 70, 90, 100]
}

grid = RandomizedSearchCV(KNeighborsClassifier(), param_grid, cv=ps, n_iter=10, random_state=seed, n_jobs=12, verbose=2, scoring="balanced_accuracy")
grid.fit(X, y)

print("Best parameters:", grid.best_params_)
print("Best score:", grid.best_score_)
best_knn = grid.best_estimator_
with open(f"{filepath}/models/knn.save", "wb") as file:
    pickle.dump(best_knn, file)

y_pred = best_knn.predict(X_val)
print("Accuracy:", accuracy_score(y_val, y_pred))
print("Balanced accuracy:", balanced_accuracy_score(y_val, y_pred))
print("F1 score:", f1_score(y_val, y_pred, average="weighted"))
pd.DataFrame(grid.cv_results_)

# Accuracy: 0.9745696781095011
# Balanced accuracy: 0.8729493259671391
# F1 score: 0.9743348139040916
# knn = KNeighborsClassifier(n_jobs=12).fit(X_train, y_train)
# y_pred = knn.predict(X_val)
# print("Accuracy:", accuracy_score(y_val, y_pred))
# print("Balanced accuracy:", balanced_accuracy_score(y_val, y_pred))
# print("F1 score:", f1_score(y_val, y_pred, average="weighted"))
# with open( f"{filepath}/models/knn.save", "wb") as file:
#     pickle.dump(knn, file)

Fitting 10 folds for each of 10 candidates, totalling 100 fits
Best parameters: {'weights': 'distance', 'p': 1, 'n_neighbors': 3, 'metric': 'euclidean', 'leaf_size': 70, 'algorithm': 'auto'}
Best score: 0.8710343179322546
Accuracy: 0.999935168076761
Balanced accuracy: 0.9988783200648552
F1 score: 0.9999351242823835


Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_weights,param_p,param_n_neighbors,param_metric,param_leaf_size,param_algorithm,...,split3_test_score,split4_test_score,split5_test_score,split6_test_score,split7_test_score,split8_test_score,split9_test_score,mean_test_score,std_test_score,rank_test_score
0,0.392307,0.195572,321.808554,1.838529,uniform,2,5,minkowski,90,brute,...,0.837736,0.852339,0.845148,0.838031,0.840989,0.846554,0.845665,0.842366,0.005793,8
1,18.95868,16.936123,347.359505,113.404161,uniform,1,3,manhattan,100,kd_tree,...,0.853666,0.869219,0.858933,0.853286,0.851006,0.873041,0.866258,0.860481,0.007406,4
2,0.435112,0.219423,234.91455,79.347833,uniform,2,7,minkowski,90,brute,...,0.82992,0.839869,0.823889,0.832664,0.821712,0.829931,0.837411,0.82906,0.005785,10
3,1.02531,0.366506,403.875754,28.357348,distance,2,9,euclidean,90,brute,...,0.850477,0.853828,0.843179,0.848685,0.84509,0.849945,0.856705,0.847888,0.006436,5
4,1.018411,0.536654,875.38747,83.074616,uniform,1,5,manhattan,70,brute,...,0.840114,0.854929,0.843169,0.841705,0.838886,0.851533,0.851962,0.845496,0.006274,6
5,0.731444,0.473222,243.398184,97.898992,distance,1,3,euclidean,70,auto,...,0.866382,0.869658,0.877265,0.874733,0.86848,0.873085,0.879386,0.871034,0.00665,1
6,14.582977,6.177091,796.969173,62.721913,uniform,2,5,minkowski,70,ball_tree,...,0.837766,0.852339,0.845148,0.838026,0.840989,0.846554,0.845654,0.84237,0.005791,7
7,5.21567,0.254044,593.510718,303.167388,uniform,2,7,manhattan,30,ball_tree,...,0.82982,0.844652,0.823399,0.832923,0.828139,0.83434,0.838154,0.832669,0.005426,9
8,0.546016,0.271394,620.023638,55.486762,distance,2,5,manhattan,50,auto,...,0.863147,0.870928,0.868032,0.869098,0.863508,0.869513,0.87497,0.867138,0.00616,3
9,0.744108,0.341919,242.055618,83.124472,distance,2,3,minkowski,90,brute,...,0.866382,0.869658,0.877265,0.874733,0.86848,0.873085,0.879386,0.871034,0.00665,1


- Performance: 0.9187280941672238 con minmax
- Performance: 0.9549626207986386 senza minmax
- Accuracy: 0.9745696781095011 Balanced accuracy: 0.8729493259671391 F1 score: 0.9743348139040916

## Apply Random Forest

In [5]:
from sklearn.ensemble import RandomForestClassifier

param_grid = {
    'n_estimators': [100, 200, 300, 500],  # Number of trees in the forest
    'max_depth': [None, 10, 20, 30, 50],  # Maximum depth of the tree
    'min_samples_split': [2, 5, 10],  # Minimum number of samples required to split a node
    'min_samples_leaf': [1, 2, 4],  # Minimum number of samples required at a leaf node
    'max_features': ['sqrt', 'log2', None],  # Number of features to consider when looking for the best split
    'bootstrap': [True, False],  # Whether bootstrap samples are used when building trees
    'criterion': ['gini', 'entropy', 'log_loss'],  # Split quality measure
    # 'class_weight': ['balanced', 'balanced_subsample', None]  # Weights associated with classes
}

grid = BayesSearchCV(RandomForestClassifier(random_state=seed, class_weight='balanced'), param_grid, cv=ps, n_iter=10, random_state=seed, verbose=2, n_jobs=12, scoring="balanced_accuracy")
grid.fit(X, y)

print("Best parameters:", grid.best_params_)
print("Best score:", grid.best_score_)
best_rf = grid.best_estimator_
with open(f"{filepath}/models/rf.save", "wb") as file:
    pickle.dump(best_rf, file)

y_pred = best_rf.predict(X_val)
print("Accuracy:", accuracy_score(y_val, y_pred))
print("Balanced accuracy:", balanced_accuracy_score(y_val, y_pred))
print("F1 score:", f1_score(y_val, y_pred, average="weighted"))

pd.DataFrame(grid.cv_results_)

# rf = RandomForestClassifier(random_state=seed, class_weight="balanced", n_jobs=12).fit(X_train, y_train)
# y_pred = rf.predict(X_val)
# print("Accuracy:", accuracy_score(y_val, y_pred))
# print("Balanced accuracy:", balanced_accuracy_score(y_val, y_pred))
# print("F1 score:", f1_score(y_val, y_pred, average="weighted"))
#
# with open(f"{filepath}/models/rf.save", "wb") as file:
#     pickle.dump(rf, file)


Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Best parameters: OrderedDict({'bootstrap': False, 'criterion': 'log_loss', 'max_depth': None, 'max_features': 'sqrt', 'min_samples_leaf': 4, 'min_samples_split': 5, 'n_estimators': 200})
Best score: 0.9851496603902916
Accuracy: 0.9954617653732698
Balanced accuracy: 0.995593218348715
F1 score: 0.9955201289356593


Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_bootstrap,param_criterion,param_max_depth,param_max_features,param_min_samples_leaf,param_min_samples_split,...,split3_test_score,split4_test_score,split5_test_score,split6_test_score,split7_test_score,split8_test_score,split9_test_score,mean_test_score,std_test_score,rank_test_score
0,312.488653,1.672881,1.732622,0.27917,True,log_loss,50.0,sqrt,4,5,...,0.983113,0.982468,0.987439,0.9865,0.986031,0.979565,0.990377,0.984528,0.003087,2
1,565.094697,2.936796,0.616845,0.090349,False,log_loss,10.0,,4,2,...,0.971783,0.972529,0.975702,0.979299,0.974846,0.969277,0.979752,0.97453,0.003087,7
2,163.944228,1.230193,0.884031,0.163866,True,log_loss,,log2,1,5,...,0.977243,0.981224,0.985134,0.980366,0.979913,0.974154,0.98927,0.980366,0.004474,5
3,2721.355328,16.223837,3.879635,0.57594,False,gini,20.0,,2,2,...,0.961872,0.965735,0.966008,0.968651,0.971914,0.967117,0.980976,0.969547,0.005327,9
4,1304.434891,11.690157,1.442917,0.142656,False,entropy,20.0,,4,10,...,0.972133,0.971047,0.976643,0.978842,0.97323,0.972873,0.97997,0.975183,0.00283,6
5,421.761908,2.89746,1.57979,0.243875,False,log_loss,,sqrt,4,5,...,0.985171,0.985519,0.98623,0.986805,0.98601,0.978365,0.990792,0.98515,0.003188,1
6,1705.193035,9.654948,1.821003,0.280079,False,log_loss,10.0,,2,5,...,0.971808,0.972589,0.975789,0.979363,0.974982,0.969514,0.976282,0.974432,0.002546,8
7,1950.144876,15.673215,2.156522,0.291111,False,log_loss,20.0,,1,2,...,0.967591,0.956188,0.972189,0.967176,0.964706,0.962873,0.965042,0.964528,0.004983,10
8,252.737854,1.651852,0.854056,0.131026,False,log_loss,50.0,log2,2,2,...,0.978856,0.984269,0.983868,0.980269,0.978966,0.976791,0.989063,0.981738,0.003279,4
9,313.1063,1.823094,1.724825,0.247816,True,log_loss,30.0,sqrt,2,2,...,0.983308,0.982759,0.986345,0.98544,0.984602,0.97838,0.989361,0.98402,0.002722,3


0.9989123986553292 senza scaling

0.9990112715048448 con scaling

## Apply Support Vector Classifier with HP tuning

In [4]:
from sklearn.linear_model import SGDClassifier

# param_grid = {'C': [0.1, 1, 10, 100, 1000],
#               'gamma': [1, 0.1, 0.01, 0.001, 0.0001],
#               'kernel': ['rbf']}

param_grid = {
    'alpha': [0.0001, 0.001, 0.01, 0.1, 1],
    'loss': ["hinge", "log_loss", "modified_huber", "squared_hinge", "perceptron"],
    'penalty': ["l2", "l1", "elasticnet"],
    'learning_rate': ["optimal", "invscaling", "adaptive"],
    'eta0': [0.01, 0.1, 1, 10],
    'power_t': [0.1, 0.5, 1, 5],
    'average': [True, False]
}

# param_grid = {
#     'C': (0.1, 100, 'log-uniform'),
#     'gamma': (0.001, 10, 'log-uniform'),
#     'kernel': ["rbf"]
# }

grid = BayesSearchCV(
    SGDClassifier(random_state=seed, class_weight="balanced", verbose=0, n_jobs=12),
    param_grid,
    n_iter=10,  # Numero massimo di iterazioni
    cv=ps,
    verbose=1,
    random_state=seed,
    scoring="balanced_accuracy"
)
grid.fit(X, y)


print("Best parameters:", grid.best_params_)
print("Best score:", grid.best_score_)
best_svm = grid.best_estimator_
with open(f"{filepath}/models/svm.save", "wb") as file:
    pickle.dump(best_svm, file)
y_pred = best_svm.predict(X_val)
print("Accuracy:", accuracy_score(y_val, y_pred))
print("Balanced accuracy:", balanced_accuracy_score(y_val, y_pred))
print("F1 score:", f1_score(y_val, y_pred, average="weighted"))

pd.DataFrame(grid.cv_results_)


# svm = SVC(random_state=seed, class_weight="balanced", verbose=1, C=100).fit(X_train[:10000], y_train[:10000])
# svm = SGDClassifier(random_state=seed, class_weight="balanced", verbose=2, n_jobs=12).fit(X_train, y_train)
# y_pred = svm.predict(X_val)
# print("Accuracy:", accuracy_score(y_val, y_pred))
# print("Balanced accuracy:", balanced_accuracy_score(y_val, y_pred))
# print("F1 score:", f1_score(y_val, y_pred, average="weighted"))
#
# with open(f"{filepath}/models/svm.save", "wb") as file:
#     pickle.dump(svm, file)



Fitting 10 folds for each of 1 candidates, totalling 10 fits




Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Best parameters: OrderedDict({'alpha': 0.0001, 'average': False, 'eta0': 1, 'learning_rate': 'optimal', 'loss': 'modified_huber', 'penalty': 'l2', 'power_t': 0.5})
Best score: 0.6896217820090957
Accuracy: 0.6394372589062854
Balanced accuracy: 0.5549949558990029
F1 score: 0.6872223688164277


Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_alpha,param_average,param_eta0,param_learning_rate,param_loss,param_penalty,...,split3_test_score,split4_test_score,split5_test_score,split6_test_score,split7_test_score,split8_test_score,split9_test_score,mean_test_score,std_test_score,rank_test_score
0,265.175159,19.581271,0.010093,0.000831,0.01,False,10.0,optimal,squared_hinge,l1,...,0.353302,0.229067,0.207981,0.266864,0.480048,0.618529,0.191627,0.341619,0.133579,6
1,12.501902,0.930198,0.010171,0.001094,1.0,False,0.1,adaptive,perceptron,l2,...,0.697267,0.529499,0.661853,0.717868,0.709812,0.59164,0.651383,0.629937,0.067504,3
2,3.485439,0.557518,0.009635,0.000862,0.01,False,0.01,invscaling,hinge,l1,...,0.619842,0.695728,0.617492,0.656893,0.573519,0.625531,0.609339,0.594834,0.061251,4
3,28.110854,1.598656,0.009873,0.00131,1.0,True,1.0,adaptive,modified_huber,l2,...,0.002123,0.003096,0.002869,0.002626,0.002593,0.002804,0.002755,0.002551,0.000564,10
4,24.963539,2.173775,0.011439,0.003067,0.1,True,1.0,adaptive,perceptron,elasticnet,...,0.030763,0.034134,0.031136,0.033048,0.030649,0.029774,0.035949,0.031991,0.001989,9
5,3.406497,3.480001,0.011394,0.002534,0.1,False,0.01,optimal,perceptron,l1,...,0.001572,0.031217,0.02086,0.001572,0.001556,0.019984,0.001556,0.047422,0.099491,8
6,20.167309,1.075677,0.009573,0.000723,0.1,False,0.1,adaptive,modified_huber,l1,...,0.549029,0.566064,0.564573,0.560261,0.548543,0.557571,0.527424,0.555352,0.011657,5
7,18.337396,1.31213,0.009704,0.000781,0.01,False,0.1,adaptive,log_loss,l2,...,0.645402,0.643684,0.640912,0.642744,0.647444,0.639308,0.639729,0.64317,0.002671,2
8,17.128403,1.893942,0.0095,0.001284,1.0,False,10.0,invscaling,log_loss,l2,...,0.1325,0.024571,0.023891,0.181546,0.024588,0.001021,0.355927,0.130368,0.118101,7
9,8.357687,0.781089,0.009604,0.000704,0.0001,False,1.0,optimal,modified_huber,l2,...,0.692356,0.655289,0.728581,0.635628,0.74453,0.703297,0.583601,0.689622,0.048224,1
