In [8]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

In [9]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

def prepare_data(file='ready_file_for_ML_April.csv'):
    """
    Prepares the dataset for machine learning analysis by filtering relevant data, applying one-hot encoding,
    computing differential conditions, and removing genes with no significant changes.

    Parameters:
    file (str): Path to the CSV file containing the initial data.

    Returns:
    tuple: A tuple containing two pandas DataFrames, each filtered by different conditions.
    """
    # Loading the dataset and excluding specified columns to focus on relevant data
    df = pd.read_csv(file)
    exclude_columns = ['Geneid', 'shScramble', 'shDDX41', 'DMSO', 'CX5461']
    # Filtering rows where at least one column has a non-zero value
    rows_at_least_one_nonzero = df.drop(columns=exclude_columns).any(axis=1)
    result = df[rows_at_least_one_nonzero]
    
    # Applying one-hot encoding to the 'Geneid' column to turn categorical gene IDs into binary features
    one_hot_encoded_df = pd.get_dummies(result, columns=['Geneid'], dtype=int)
    
    # Calculating conditions by finding the differences between treatments and controls
    one_hot_encoded_df['condition1'] = one_hot_encoded_df['shScramble'] - one_hot_encoded_df['shDDX41']
    one_hot_encoded_df['condition2'] = one_hot_encoded_df['DMSO'] - one_hot_encoded_df['CX5461']
    
    # Applying the sign function to set negative differences to -1, zero to 0, and positive to +1
    one_hot_encoded_df['condition1'] = np.sign(one_hot_encoded_df['condition1'])
    one_hot_encoded_df['condition2'] = np.sign(one_hot_encoded_df['condition2'])
    
    # List of columns to be removed
    columns_to_remove =  ['shScramble', 'shDDX41', 'DMSO', 'CX5461']
    
    # Removing the specified columns
    one_hot_encoded_df.drop(columns=columns_to_remove, inplace=True)

    # Separating datasets for each condition and exclude rows without significant change (value 0)
    df1 = one_hot_encoded_df.drop(columns=['condition2'])
    df2 = one_hot_encoded_df.drop(columns=['condition1'])
    #df1 = df1[df1['condition1'] != 0]
    #df2 = df2[df2['condition2'] != 0]

    return df1, df2


In [8]:
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.metrics import accuracy_score, classification_report 

def model_data(df, target, model, param_dist, n_class=2, convert_to_binary=False, n_iter=2, actual_value = False): 
    """
    Fit and evaluate a machine learning model on the provided dataset.

    Parameters:
    - df: DataFrame with features and target.
    - target: Name of the target column.
    - model: ML model to be trained.
    - param_dist: Hyperparameter distribution for tuning.
    - n_class: Number of classes (default 2).
    - convert_to_binary: Convert non-binary targets to binary (default False).
    - n_iter: Iterations for parameter tuning (default 2).
    - actual_value: Use actual feature values (default False).
    
    Outputs:
    - Model accuracy and classification report.
    - Best parameters if tuned.
    """
    print("*********************************************************")
    print("Running the model task with the following parameters:")
    print("target:", target)
    print("model:", model)

    for param, dist in param_dist.items():
        if type(dist) is list:
            sampled_values = dist
        else:
            sampled_values = dist.rvs(n_iter)
        print(f"{param}: {sampled_values}")
    print("n_class:", n_class)
    print("n_iter:", n_iter)
    print("actual_value:", actual_value, "\n\n")

    # Re-encoding the target: 0 -> 0, 1 -> 1, -1 -> 2
    if n_class == 2:
        df[target] = df[target].replace({-1: 0})
    else: 
        df[target] = df[target].replace({-1: 2})

    # Separating features and the target
    X = df.drop(target, axis=1)
    y = df[target]    
    # Function to convert positive numbers to 1 and negative numbers to 0
    if actual_value == False:
        if convert_to_binary:
            convert_function = lambda x: 1 if x > 0 else 0
            X = X.applymap(convert_function)
    else:
        X = X.round(2)

    # Splitting the dataset into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    if n_iter > 1:
        # Hyperparameter optimization using RandomizedSearchCV
        randomized_search = RandomizedSearchCV(model, param_distributions=param_dist, n_iter=n_iter, scoring='accuracy', cv=3, verbose=1, random_state=42)
        # Training the model with the best parameters
        randomized_search.fit(X_train, y_train)
        # Best model
        best_model = randomized_search.best_estimator_
    else:
        # Training the model with default parameters
        model.fit(X_train, y_train)
        best_model = model
    # Predicting
    predictions = best_model.predict(X_test)   
    if  n_class ==2:
        predictions = [1 if prob > 0.5 else 0 for prob in predictions]

    # Evaluation
    accuracy = accuracy_score(y_test, predictions)
    print("Accuracy:", accuracy)
    print("\nClassification Report:\n", classification_report(y_test, predictions))

    if n_iter > 1:
        print("\nBest Parameters:\n", randomized_search.best_params_)


In [10]:
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.metrics import accuracy_score, classification_report 

def model_data(df, target, model, param_dist, n_class=2, convert_to_binary=False, n_iter=2, actual_value = False): 
    """
    Fit and evaluate a machine learning model on the provided dataset.

    Parameters:
    - df: DataFrame with features and target.
    - target: Name of the target column.
    - model: ML model to be trained.
    - param_dist: Hyperparameter distribution for tuning.
    - n_class: Number of classes (default 2).
    - convert_to_binary: Convert non-binary targets to binary (default False).
    - n_iter: Iterations for parameter tuning (default 2).
    - actual_value: Use actual feature values (default False).
    
    Outputs:
    - Model accuracy and classification report.
    - Best parameters if tuned.
    """
    print("*********************************************************")
    print("Running the model task with the following parameters:")
    print("target:", target)
    print("model:", model)

    for param, dist in param_dist.items():
        if type(dist) is list:
            sampled_values = dist
        else:
            sampled_values = dist.rvs(n_iter)
        print(f"{param}: {sampled_values}")
    print("n_class:", n_class)
    print("n_iter:", n_iter)
    print("actual_value:", actual_value, "\n\n")

    # Re-encoding the target: 0 -> 0, 1 -> 1, -1 -> 2
    if n_class == 2:
        df[target] = df[target].replace({-1: 0})
    else: 
        df[target] = df[target].replace({-1: 2})

    # Separating features and the target
    X = df.drop(target, axis=1)
    y = df[target]    
    # Function to convert positive numbers to 1 and negative numbers to 0
    if actual_value == False:
        if convert_to_binary:
            convert_function = lambda x: 1 if x > 0 else 0
            X = X.applymap(convert_function)
    else:
        X = X.round(2)

    # Splitting the dataset into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    if n_iter > 1:
        # Hyperparameter optimization using RandomizedSearchCV
        randomized_search = RandomizedSearchCV(model, param_distributions=param_dist, n_iter=n_iter, scoring='accuracy', cv=3, verbose=1, random_state=42)
        # Training the model with the best parameters
        randomized_search.fit(X_train, y_train)
        # Best model
        best_model = randomized_search.best_estimator_
    else:
        # Training the model with default parameters
        model.fit(X_train, y_train)
        best_model = model
    # Predicting
    predictions = best_model.predict(X_test)   
    if  n_class ==2:
        predictions = [1 if prob > 0.5 else 0 for prob in predictions]

    # Evaluation
    accuracy = accuracy_score(y_test, predictions)
    print("Accuracy:", accuracy)
    print("\nClassification Report:\n", classification_report(y_test, predictions))

    if n_iter > 1:
        print("\nBest Parameters:\n", randomized_search.best_params_)


In [11]:
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.linear_model import SGDClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from scipy.stats import randint, uniform

# Example parameter grids for different models
param_grid_xgb = {
    'max_depth': randint(3, 10),
    'learning_rate': uniform(0.01, 0.3),
    'n_estimators': randint(50, 200),
    'subsample': uniform(0.6, 0.4),
    'colsample_bytree': uniform(0.5, 0.5),
    'gamma': uniform(0, 0.5)
}

param_grid_rf = {
    'n_estimators': randint(50, 200),
    'max_depth': randint(3, 10),
    'min_samples_split': randint(2, 10),
    'min_samples_leaf': randint(1, 10)
}

param_grid_ab = {
    'n_estimators': randint(50, 200),
    'learning_rate': uniform(0.01, 1)
}

param_grid_sgd = {
    'alpha': [0.0001, 0.001, 0.01, 0.1],
    'max_iter': randint(1000, 10000),
    'tol': [1e-3],
    'penalty': ['l2', 'l1', 'elasticnet']
}

param_grid_knn = {
    'n_neighbors': randint(3, 30),
    'weights': ['uniform', 'distance'],
    'algorithm': ['ball_tree', 'kd_tree', 'brute']
}

param_grid_nb = {}  # Naive Bayes usually does not require hyperparameter tuning

# Example usage
#model_data(your_dataframe, 'target_column_name', RandomForestClassifier(), param_grid_rf)



In [12]:
df1, df2 = prepare_data()

df1

In [13]:
df1

Unnamed: 0,DDX1,XRCC6,GEMIN5,DROSHA,HNRNPUL1,FTO,MORC2,SSB,U2AF2,CSTF2T,...,Geneid_ENSG00000290315.1,Geneid_ENSG00000290318.1,Geneid_ENSG00000291237.1,Geneid_ENSG00000291307.1,Geneid_ENSG00000291313.1,Geneid_ENSG00000291316.1,Geneid_ENSG00000291317.1,Geneid_ENSG00000293552.1,Geneid_ENSG00000293553.1,condition1
4,0.000000,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,...,0,0,0,0,0,0,0,0,0,-1.0
5,0.000000,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,...,0,0,0,0,0,0,0,0,0,-1.0
8,0.000000,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,...,0,0,0,0,0,0,0,0,0,1.0
9,0.000000,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,...,0,0,0,0,0,0,0,0,0,-1.0
10,3.744698,0.0,0.0,0.0,0.0,0.0,3.401821,0.0,0.0,0.0,...,0,0,0,0,0,0,0,0,0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20044,0.000000,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0.0
20045,0.000000,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0.0
20046,0.000000,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0.0
20047,0.000000,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0.0


# Actual Value

In [None]:
model_data(df=df1, target='condition1', model = xgb.XGBClassifier(), param_dist= param_grid_xgb,
           n_iter=10, actual_value= True)
model_data(df=df1, target='condition1', model = RandomForestClassifier(), param_dist= param_grid_rf,
           n_iter=10, actual_value= True)
model_data(df=df1, target='condition1', model = AdaBoostClassifier(), param_dist= param_grid_ab,
           n_iter=10, actual_value= True)

model_data(df=df1, target='condition1', model = SGDClassifier(), param_dist= param_grid_sgd,
           n_iter=10, actual_value= True)

model_data(df=df1, target='condition1', model = KNeighborsClassifier(), param_dist= param_grid_knn,
           n_iter=10, actual_value= True)

model_data(df=df1, target='condition1', model = GaussianNB(), param_dist= param_grid_nb,
           n_iter=10, actual_value= True)


*********************************************************
Running the model task with the following parameters:
target: condition1
model: XGBClassifier(base_score=None, booster=None, callbacks=None,
              colsample_bylevel=None, colsample_bynode=None,
              colsample_bytree=None, device=None, early_stopping_rounds=None,
              enable_categorical=False, eval_metric=None, feature_types=None,
              gamma=None, grow_policy=None, importance_type=None,
              interaction_constraints=None, learning_rate=None, max_bin=None,
              max_cat_threshold=None, max_cat_to_onehot=None,
              max_delta_step=None, max_depth=None, max_leaves=None,
              min_child_weight=None, missing=nan, monotone_constraints=None,
              multi_strategy=None, n_estimators=None, n_jobs=None,
              num_parallel_tree=None, random_state=None, ...)
max_depth: [5 3 3 8 9 7 9 7 3 3]
learning_rate: [0.10131738 0.03262122 0.25762722 0.12869784 0.01431766 

In [5]:
model_data(df=df1, target='condition1', model = KNeighborsClassifier(), param_dist= param_grid_knn,
           n_iter=10, actual_value= True)

model_data(df=df1, target='condition1', model = GaussianNB(), param_dist= param_grid_nb,
           n_iter=10, actual_value= True)

*********************************************************
Running the model task with the following parameters:
target: condition1
model: KNeighborsClassifier()
n_neighbors: [ 9 14 12 13 23 26 22 22 24 18]
weights: ['uniform', 'distance']
algorithm: ['ball_tree', 'kd_tree', 'brute']
n_class: 2
n_iter: 10
actual_value: True 


Fitting 3 folds for each of 10 candidates, totalling 30 fits
Accuracy: 0.579002624671916

Classification Report:
               precision    recall  f1-score   support

         0.0       0.59      0.75      0.66      1051
         1.0       0.54      0.37      0.44       854

    accuracy                           0.58      1905
   macro avg       0.57      0.56      0.55      1905
weighted avg       0.57      0.58      0.56      1905


Best Parameters:
 {'algorithm': 'ball_tree', 'n_neighbors': 23, 'weights': 'uniform'}
*********************************************************
Running the model task with the following parameters:
target: condition1
model: Gaussi

In [None]:
model_data(df=df1, target='condition1', model = xgb.XGBClassifier(), param_dist= param_grid_xgb,
           n_iter=100, actual_value= True)
model_data(df=df1, target='condition1', model = RandomForestClassifier(), param_dist= param_grid_rf,
           n_iter=100, actual_value= True)
model_data(df=df1, target='condition1', model = AdaBoostClassifier(), param_dist= param_grid_ab,
           n_iter=100, actual_value= True)
model_data(df=df1, target='condition1', model = SGDClassifier(), param_dist= param_grid_sgd,
           n_iter=100, actual_value= True)

model_data(df=df1, target='condition1', model = KNeighborsClassifier(), param_dist= param_grid_knn,
           n_iter=100, actual_value= True)

model_data(df=df1, target='condition1', model = GaussianNB(), param_dist= param_grid_nb,
           n_iter=100, actual_value= True)

*********************************************************
Running the model task with the following parameters:
target: condition1
model: XGBClassifier(base_score=None, booster=None, callbacks=None,
              colsample_bylevel=None, colsample_bynode=None,
              colsample_bytree=None, device=None, early_stopping_rounds=None,
              enable_categorical=False, eval_metric=None, feature_types=None,
              gamma=None, grow_policy=None, importance_type=None,
              interaction_constraints=None, learning_rate=None, max_bin=None,
              max_cat_threshold=None, max_cat_to_onehot=None,
              max_delta_step=None, max_depth=None, max_leaves=None,
              min_child_weight=None, missing=nan, monotone_constraints=None,
              multi_strategy=None, n_estimators=None, n_jobs=None,
              num_parallel_tree=None, random_state=None, ...)
max_depth: [6 3 4 3 5 3 9 7 9 9 9 5 7 8 7 4 8 8 4 7 9 8 6 5 9 6 3 4 7 7 8 5 5 3 3 6 4
 7 4 8 8 9 8 7 5 5

In [14]:
model_data(df=df1, target='condition1', model = AdaBoostClassifier(), param_dist= param_grid_ab,
           n_iter=100, actual_value= True)


*********************************************************
Running the model task with the following parameters:
target: condition1
model: AdaBoostClassifier()
n_estimators: [196 141  95  82 195 153 110  72 106 133 151  64  65 119 177 105  80 181
 150 174  97  65 108 101 138  73  90 179  77 124 107  58 127 114 119  54
  80  53 169 140  53  99 144  91  55 138  50  67 182 194 119 125 121 179
 159 128 191  87 150  94 133  60 112 184  88 158  50 164  90 190 130  54
  73 155  95  60 199 151  74 165  73  87 108 120 120 166  92  66  85 137
  63 192 187 108 156  72 124 123  77 138]
learning_rate: [0.58706463 0.70621173 0.09709517 0.67997284 0.81852426 0.935102
 0.57951902 0.15033405 0.89348993 0.52002428 0.22395024 0.57309572
 0.57548008 0.83909905 0.84728036 0.68473517 0.97741898 0.89708361
 0.57920961 0.16192169 0.40896822 0.56375265 0.18654354 0.67858637
 0.65881409 0.01606243 0.77876512 0.15686986 1.00417377 0.31020238
 0.30490643 0.53809306 0.27955219 0.24394203 0.35280317 0.43462197
 0.41

In [None]:
model_data(df=df1, target='condition1', model = SGDClassifier(), param_dist= param_grid_sgd,
           n_iter=100, actual_value= True)

*********************************************************
Running the model task with the following parameters:
target: condition1
model: SGDClassifier()
alpha: [0.0001, 0.001, 0.01, 0.1]
max_iter: [6925 3727 5699 3315 5671 8994 3006 8944 7462 5071 7277 6265 7412 7843
 7768 7869 4685 9523 2627 5235 5348 2525 7526 8810 7684 9391 8940 5540
 4910 2672 6011 7862 1196 5615 9965 8040 2562 4275 2493 5495 7847 9142
 1877 5974 6161 7811 8060 8575 8729 4600 2641 6090 4276 4431 1521 7312
 8118 8833 5840 2556 1752 9534 1301 9277 1841 5474 6714 6066 6526 5041
 3816 5897 6410 4651 4977 5018 3189 3855 4232 2348 9321 2430 5912 4370
 7599 8994 7659 3178 5104 4075 6929 8904 9430 1276 9084 6292 7693 3788
 8372 4709]
tol: [0.001]
penalty: ['l2', 'l1', 'elasticnet']
n_class: 2
n_iter: 100
actual_value: True 


Fitting 3 folds for each of 100 candidates, totalling 300 fits
Accuracy: 0.5921259842519685

Classification Report:
               precision    recall  f1-score   support

         0.0       0.59    

In [None]:
model_data(df=df1, target='condition1', model = KNeighborsClassifier(), param_dist= param_grid_knn,
           n_iter=100, actual_value= True)

In [25]:
model_data(df=df2, target='condition2', model = xgb.XGBClassifier(), param_dist= param_grid_xgb,
           n_iter=1, actual_value= True)
model_data(df=df2, target='condition2', model = RandomForestClassifier(), param_dist= param_grid_rf,
           n_iter=1, actual_value= True)
model_data(df=df2, target='condition2', model = AdaBoostClassifier(), param_dist= param_grid_ab,
           n_iter=1, actual_value= True)

model_data(df=df2, target='condition2', model = SGDClassifier(), param_dist= param_grid_sgd,
           n_iter=1, actual_value= True)

model_data(df=df2, target='condition2', model = KNeighborsClassifier(), param_dist= param_grid_knn,
           n_iter=1, actual_value= True)

model_data(df=df2, target='condition2', model = GaussianNB(), param_dist= param_grid_nb,
           n_iter=1, actual_value= True)

*********************************************************
Running the model task with the following parameters:
target: condition2
model: XGBClassifier(base_score=None, booster=None, callbacks=None,
              colsample_bylevel=None, colsample_bynode=None,
              colsample_bytree=None, device=None, early_stopping_rounds=None,
              enable_categorical=False, eval_metric=None, feature_types=None,
              gamma=None, grow_policy=None, importance_type=None,
              interaction_constraints=None, learning_rate=None, max_bin=None,
              max_cat_threshold=None, max_cat_to_onehot=None,
              max_delta_step=None, max_depth=None, max_leaves=None,
              min_child_weight=None, missing=nan, monotone_constraints=None,
              multi_strategy=None, n_estimators=None, n_jobs=None,
              num_parallel_tree=None, random_state=None, ...)
max_depth: [6]
learning_rate: [0.08812315]
n_estimators: [193]
subsample: [0.80896294]
colsample_bytree:

In [5]:
model_data(df=df2, target='condition2', model = xgb.XGBClassifier(), param_dist= param_grid_xgb,
           n_iter=10, actual_value= True)
model_data(df=df2, target='condition2', model = RandomForestClassifier(), param_dist= param_grid_rf,
           n_iter=10, actual_value= True)
model_data(df=df2, target='condition2', model = AdaBoostClassifier(), param_dist= param_grid_ab,
           n_iter=10, actual_value= True)

model_data(df=df2, target='condition2', model = SGDClassifier(), param_dist= param_grid_sgd,
           n_iter=10, actual_value= True)

model_data(df=df2, target='condition2', model = KNeighborsClassifier(), param_dist= param_grid_knn,
           n_iter=10, actual_value= True)

model_data(df=df2, target='condition2', model = GaussianNB(), param_dist= param_grid_nb,
           n_iter=10, actual_value= True)

*********************************************************
Running the model task with the following parameters:
target: condition2
model: XGBClassifier(base_score=None, booster=None, callbacks=None,
              colsample_bylevel=None, colsample_bynode=None,
              colsample_bytree=None, device=None, early_stopping_rounds=None,
              enable_categorical=False, eval_metric=None, feature_types=None,
              gamma=None, grow_policy=None, importance_type=None,
              interaction_constraints=None, learning_rate=None, max_bin=None,
              max_cat_threshold=None, max_cat_to_onehot=None,
              max_delta_step=None, max_depth=None, max_leaves=None,
              min_child_weight=None, missing=nan, monotone_constraints=None,
              multi_strategy=None, n_estimators=None, n_jobs=None,
              num_parallel_tree=None, random_state=None, ...)
max_depth: [3 5 9 8 4 6 7 5 3 6]
learning_rate: [0.11020404 0.0769275  0.26901342 0.17304578 0.17951997 

In [33]:
df1['condition1'].unique()

array([-1.,  1.,  0.])

# 3 class model

In [34]:
model_data(df=df1, target='condition1', model = xgb.XGBClassifier(), param_dist= param_grid_xgb,
           n_iter=1, actual_value= True, n_class=3)
model_data(df=df1, target='condition1', model = RandomForestClassifier(), param_dist= param_grid_rf,
           n_iter=1, actual_value= True, n_class=3)
model_data(df=df1, target='condition1', model = AdaBoostClassifier(), param_dist= param_grid_ab,
           n_iter=1, actual_value= True, n_class=3)

model_data(df=df1, target='condition1', model = SGDClassifier(), param_dist= param_grid_sgd,
           n_iter=1, actual_value= True, n_class=3)

model_data(df=df1, target='condition1', model = KNeighborsClassifier(), param_dist= param_grid_knn,
           n_iter=1, actual_value= True, n_class=3)

model_data(df=df1, target='condition1', model = GaussianNB(), param_dist= param_grid_nb,
           n_iter=1, actual_value= True, n_class=3)

*********************************************************
Running the model task with the following parameters:
target: condition1
model: XGBClassifier(base_score=None, booster=None, callbacks=None,
              colsample_bylevel=None, colsample_bynode=None,
              colsample_bytree=None, device=None, early_stopping_rounds=None,
              enable_categorical=False, eval_metric=None, feature_types=None,
              gamma=None, grow_policy=None, importance_type=None,
              interaction_constraints=None, learning_rate=None, max_bin=None,
              max_cat_threshold=None, max_cat_to_onehot=None,
              max_delta_step=None, max_depth=None, max_leaves=None,
              min_child_weight=None, missing=nan, monotone_constraints=None,
              multi_strategy=None, n_estimators=None, n_jobs=None,
              num_parallel_tree=None, random_state=None, ...)
max_depth: [3, 4, 5]
learning_rate: [0.01, 0.05, 0.1]
n_class: 3
n_iter: 1
actual_value: True 


Accurac

In [35]:
model_data(df=df1, target='condition1', model = xgb.XGBClassifier(), param_dist= param_grid_xgb,
           n_iter=1, actual_value= False, n_class=3)
model_data(df=df1, target='condition1', model = RandomForestClassifier(), param_dist= param_grid_rf,
           n_iter=1, actual_value= False, n_class=3)
model_data(df=df1, target='condition1', model = AdaBoostClassifier(), param_dist= param_grid_ab,
           n_iter=1, actual_value= False, n_class=3)

model_data(df=df1, target='condition1', model = SGDClassifier(), param_dist= param_grid_sgd,
           n_iter=1, actual_value= False, n_class=3)

model_data(df=df1, target='condition1', model = KNeighborsClassifier(), param_dist= param_grid_knn,
           n_iter=1, actual_value= False, n_class=3)

model_data(df=df1, target='condition1', model = GaussianNB(), param_dist= param_grid_nb,
           n_iter=1, actual_value= False, n_class=3)

*********************************************************
Running the model task with the following parameters:
target: condition1
model: XGBClassifier(base_score=None, booster=None, callbacks=None,
              colsample_bylevel=None, colsample_bynode=None,
              colsample_bytree=None, device=None, early_stopping_rounds=None,
              enable_categorical=False, eval_metric=None, feature_types=None,
              gamma=None, grow_policy=None, importance_type=None,
              interaction_constraints=None, learning_rate=None, max_bin=None,
              max_cat_threshold=None, max_cat_to_onehot=None,
              max_delta_step=None, max_depth=None, max_leaves=None,
              min_child_weight=None, missing=nan, monotone_constraints=None,
              multi_strategy=None, n_estimators=None, n_jobs=None,
              num_parallel_tree=None, random_state=None, ...)
max_depth: [3, 4, 5]
learning_rate: [0.01, 0.05, 0.1]
n_class: 3
n_iter: 1
actual_value: False 


Accura

In [6]:
model_data(df=df2, target='condition2', model = xgb.XGBClassifier(), param_dist= param_grid_xgb,
           n_iter=1, actual_value= True, n_class=3)
model_data(df=df2, target='condition2', model = RandomForestClassifier(), param_dist= param_grid_rf,
           n_iter=1, actual_value= True, n_class=3)
model_data(df=df2, target='condition2', model = AdaBoostClassifier(), param_dist= param_grid_ab,
           n_iter=1, actual_value= True, n_class=3)

model_data(df=df2, target='condition2', model = SGDClassifier(), param_dist= param_grid_sgd,
           n_iter=1, actual_value= True, n_class=3)

model_data(df=df2, target='condition2', model = KNeighborsClassifier(), param_dist= param_grid_knn,
           n_iter=1, actual_value= True, n_class=3)

model_data(df=df2, target='condition2', model = GaussianNB(), param_dist= param_grid_nb,
           n_iter=1, actual_value= True ,n_class=3)

*********************************************************
Running the model task with the following parameters:
target: condition2
model: XGBClassifier(base_score=None, booster=None, callbacks=None,
              colsample_bylevel=None, colsample_bynode=None,
              colsample_bytree=None, device=None, early_stopping_rounds=None,
              enable_categorical=False, eval_metric=None, feature_types=None,
              gamma=None, grow_policy=None, importance_type=None,
              interaction_constraints=None, learning_rate=None, max_bin=None,
              max_cat_threshold=None, max_cat_to_onehot=None,
              max_delta_step=None, max_depth=None, max_leaves=None,
              min_child_weight=None, missing=nan, monotone_constraints=None,
              multi_strategy=None, n_estimators=None, n_jobs=None,
              num_parallel_tree=None, random_state=None, ...)
max_depth: [8]
learning_rate: [0.07170901]
n_estimators: [135]
subsample: [0.71166701]
colsample_bytree:

In [37]:
model_data(df=df2, target='condition2', model = xgb.XGBClassifier(), param_dist= param_grid_xgb,
           n_iter=1, actual_value= False)
model_data(df=df2, target='condition2', model = RandomForestClassifier(), param_dist= param_grid_rf,
           n_iter=1, actual_value= False)
model_data(df=df2, target='condition2', model = AdaBoostClassifier(), param_dist= param_grid_ab,
           n_iter=1, actual_value= False)

model_data(df=df2, target='condition2', model = SGDClassifier(), param_dist= param_grid_sgd,
           n_iter=1, actual_value= False)

model_data(df=df2, target='condition2', model = KNeighborsClassifier(), param_dist= param_grid_knn,
           n_iter=1, actual_value= False)

model_data(df=df2, target='condition2', model = GaussianNB(), param_dist= param_grid_nb,
           n_iter=1, actual_value= False)

*********************************************************
Running the model task with the following parameters:
target: condition2
model: XGBClassifier(base_score=None, booster=None, callbacks=None,
              colsample_bylevel=None, colsample_bynode=None,
              colsample_bytree=None, device=None, early_stopping_rounds=None,
              enable_categorical=False, eval_metric=None, feature_types=None,
              gamma=None, grow_policy=None, importance_type=None,
              interaction_constraints=None, learning_rate=None, max_bin=None,
              max_cat_threshold=None, max_cat_to_onehot=None,
              max_delta_step=None, max_depth=None, max_leaves=None,
              min_child_weight=None, missing=nan, monotone_constraints=None,
              multi_strategy=None, n_estimators=None, n_jobs=None,
              num_parallel_tree=None, random_state=None, ...)
max_depth: [3, 4, 5]
learning_rate: [0.01, 0.05, 0.1]
n_class: 2
n_iter: 1
actual_value: False 


Accura

# Base and optimisation

In [26]:
model_data(df=df1, target='condition1', model = xgb.XGBClassifier(), param_dist= param_grid_xgb,n_iter=1)
model_data(df=df1, target='condition1', model = xgb.XGBClassifier(), param_dist= param_grid_xgb,n_iter=10)

*********************************************************
Running the model task with the following parameters:
target: condition1
model: XGBClassifier(base_score=None, booster=None, callbacks=None,
              colsample_bylevel=None, colsample_bynode=None,
              colsample_bytree=None, device=None, early_stopping_rounds=None,
              enable_categorical=False, eval_metric=None, feature_types=None,
              gamma=None, grow_policy=None, importance_type=None,
              interaction_constraints=None, learning_rate=None, max_bin=None,
              max_cat_threshold=None, max_cat_to_onehot=None,
              max_delta_step=None, max_depth=None, max_leaves=None,
              min_child_weight=None, missing=nan, monotone_constraints=None,
              multi_strategy=None, n_estimators=None, n_jobs=None,
              num_parallel_tree=None, random_state=None, ...)
max_depth: [9]
learning_rate: [0.09129576]
n_estimators: [100]
subsample: [0.88929258]
colsample_bytree:

In [27]:
model_data(df=df1, target='condition1', model = RandomForestClassifier(), param_dist= param_grid_rf,n_iter=1)
model_data(df=df1, target='condition1', model = RandomForestClassifier(), param_dist= param_grid_rf,n_iter=10)

*********************************************************
Running the model task with the following parameters:
target: condition1
model: RandomForestClassifier()
n_estimators: [108]
max_depth: [6]
min_samples_split: [7]
min_samples_leaf: [7]
n_class: 2
n_iter: 1 


Accuracy: 0.588008800880088

Classification Report:
               precision    recall  f1-score   support

         0.0       0.60      0.71      0.65       981
         1.0       0.57      0.44      0.50       837

    accuracy                           0.59      1818
   macro avg       0.58      0.58      0.57      1818
weighted avg       0.58      0.59      0.58      1818

*********************************************************
Running the model task with the following parameters:
target: condition1
model: RandomForestClassifier()
n_estimators: [ 66 109  73  84 108 165 197 189  93  97]
max_depth: [7 5 9 8 7 8 7 7 8 6]
min_samples_split: [8 9 4 6 2 9 7 8 8 6]
min_samples_leaf: [1 5 2 2 7 9 7 9 7 5]
n_class: 2
n_iter: 1

In [28]:
model_data(df=df1, target='condition1', model = AdaBoostClassifier(), param_dist= param_grid_ab,n_iter=1)
model_data(df=df1, target='condition1', model = AdaBoostClassifier(), param_dist= param_grid_ab,n_iter=10)

*********************************************************
Running the model task with the following parameters:
target: condition1
model: AdaBoostClassifier()
n_estimators: [199]
learning_rate: [0.32051402]
n_class: 2
n_iter: 1 


Accuracy: 0.6056105610561056

Classification Report:
               precision    recall  f1-score   support

         0.0       0.61      0.75      0.67       981
         1.0       0.60      0.43      0.50       837

    accuracy                           0.61      1818
   macro avg       0.60      0.59      0.59      1818
weighted avg       0.60      0.61      0.59      1818

*********************************************************
Running the model task with the following parameters:
target: condition1
model: AdaBoostClassifier()
n_estimators: [178  71  66 176  75  95 100 152 141  76]
learning_rate: [0.22497884 0.37645835 0.75698977 0.76657604 0.79601236 0.54218522
 0.11612226 0.42038203 0.32578356 0.28051177]
n_class: 2
n_iter: 10 


Fitting 3 folds for 

In [29]:
model_data(df=df1, target='condition1', model = SGDClassifier(), param_dist= param_grid_sgd,n_iter=1)
model_data(df=df1, target='condition1', model = SGDClassifier(), param_dist= param_grid_sgd,n_iter=10)

*********************************************************
Running the model task with the following parameters:
target: condition1
model: SGDClassifier()
alpha: [0.0001, 0.001, 0.01, 0.1]
max_iter: [2035]
tol: [0.001]
penalty: ['l2', 'l1', 'elasticnet']
n_class: 2
n_iter: 1 


Accuracy: 0.5473047304730473

Classification Report:
               precision    recall  f1-score   support

         0.0       0.56      0.81      0.66       981
         1.0       0.52      0.24      0.33       837

    accuracy                           0.55      1818
   macro avg       0.54      0.52      0.49      1818
weighted avg       0.54      0.55      0.51      1818

*********************************************************
Running the model task with the following parameters:
target: condition1
model: SGDClassifier()
alpha: [0.0001, 0.001, 0.01, 0.1]
max_iter: [2766 6024 7429 7402 5445 2064 8843 6929 1210 9808]
tol: [0.001]
penalty: ['l2', 'l1', 'elasticnet']
n_class: 2
n_iter: 10 


Fitting 3 folds f

In [30]:
model_data(df=df1, target='condition1', model = KNeighborsClassifier(), param_dist= param_grid_knn,n_iter=1)
model_data(df=df1, target='condition1', model = KNeighborsClassifier(), param_dist= param_grid_knn,n_iter=10)

*********************************************************
Running the model task with the following parameters:
target: condition1
model: KNeighborsClassifier()
n_neighbors: [17]
weights: ['uniform', 'distance']
algorithm: ['ball_tree', 'kd_tree', 'brute']
n_class: 2
n_iter: 1 


Accuracy: 0.5484048404840484

Classification Report:
               precision    recall  f1-score   support

         0.0       0.58      0.62      0.60       981
         1.0       0.51      0.46      0.49       837

    accuracy                           0.55      1818
   macro avg       0.54      0.54      0.54      1818
weighted avg       0.55      0.55      0.55      1818

*********************************************************
Running the model task with the following parameters:
target: condition1
model: KNeighborsClassifier()
n_neighbors: [25 14 12  7 18  8  5 15  6  7]
weights: ['uniform', 'distance']
algorithm: ['ball_tree', 'kd_tree', 'brute']
n_class: 2
n_iter: 10 


Fitting 3 folds for each of 1

In [31]:
model_data(df=df1, target='condition1', model = GaussianNB(), param_dist= param_grid_nb,n_iter=1)
model_data(df=df1, target='condition1', model = GaussianNB(), param_dist= param_grid_nb,n_iter=10)

*********************************************************
Running the model task with the following parameters:
target: condition1
model: GaussianNB()
n_class: 2
n_iter: 1 


Accuracy: 0.45764576457645767

Classification Report:
               precision    recall  f1-score   support

         0.0       0.00      0.00      0.00       981
         1.0       0.46      0.99      0.63       837

    accuracy                           0.46      1818
   macro avg       0.23      0.50      0.31      1818
weighted avg       0.21      0.46      0.29      1818

*********************************************************
Running the model task with the following parameters:
target: condition1
model: GaussianNB()
n_class: 2
n_iter: 10 


Fitting 3 folds for each of 1 candidates, totalling 3 fits
Accuracy: 0.45764576457645767

Classification Report:
               precision    recall  f1-score   support

         0.0       0.00      0.00      0.00       981
         1.0       0.46      0.99      0.63 

In [32]:
model_data(df=df2, target='condition2', model = xgb.XGBClassifier(), param_dist= param_grid_xgb,n_iter=1)
model_data(df=df2, target='condition2', model = xgb.XGBClassifier(), param_dist= param_grid_xgb,n_iter=20)

*********************************************************
Running the model task with the following parameters:
target: condition2
model: XGBClassifier(base_score=None, booster=None, callbacks=None,
              colsample_bylevel=None, colsample_bynode=None,
              colsample_bytree=None, device=None, early_stopping_rounds=None,
              enable_categorical=False, eval_metric=None, feature_types=None,
              gamma=None, grow_policy=None, importance_type=None,
              interaction_constraints=None, learning_rate=None, max_bin=None,
              max_cat_threshold=None, max_cat_to_onehot=None,
              max_delta_step=None, max_depth=None, max_leaves=None,
              min_child_weight=None, missing=nan, monotone_constraints=None,
              multi_strategy=None, n_estimators=None, n_jobs=None,
              num_parallel_tree=None, random_state=None, ...)
max_depth: [5]
learning_rate: [0.20755617]
n_estimators: [120]
subsample: [0.81864631]
colsample_bytree:

In [33]:
model_data(df=df2, target='condition2', model = RandomForestClassifier(), param_dist= param_grid_rf,n_iter=1)
model_data(df=df2, target='condition2', model = RandomForestClassifier(), param_dist= param_grid_rf,n_iter=20)

*********************************************************
Running the model task with the following parameters:
target: condition2
model: RandomForestClassifier()
n_estimators: [170]
max_depth: [6]
min_samples_split: [2]
min_samples_leaf: [3]
n_class: 2
n_iter: 1 


Accuracy: 0.5670391061452514

Classification Report:
               precision    recall  f1-score   support

         0.0       0.59      0.74      0.66       995
         1.0       0.52      0.35      0.42       795

    accuracy                           0.57      1790
   macro avg       0.55      0.55      0.54      1790
weighted avg       0.56      0.57      0.55      1790

*********************************************************
Running the model task with the following parameters:
target: condition2
model: RandomForestClassifier()
n_estimators: [188 142 185 198 167 112 116 137 182  51  94 128  85 131 198 133 136 137
 177 109]
max_depth: [9 6 6 7 8 4 8 5 6 8 6 7 9 6 3 5 5 8 5 9]
min_samples_split: [9 2 4 4 7 8 9 5 6 8

In [34]:
model_data(df=df2, target='condition2', model = AdaBoostClassifier(), param_dist= param_grid_ab,n_iter=1)
model_data(df=df2, target='condition2', model = AdaBoostClassifier(), param_dist= param_grid_ab,n_iter=20)


*********************************************************
Running the model task with the following parameters:
target: condition2
model: AdaBoostClassifier()
n_estimators: [128]
learning_rate: [0.44334881]
n_class: 2
n_iter: 1 


Accuracy: 0.5581005586592179

Classification Report:
               precision    recall  f1-score   support

         0.0       0.59      0.68      0.63       995
         1.0       0.50      0.41      0.45       795

    accuracy                           0.56      1790
   macro avg       0.55      0.54      0.54      1790
weighted avg       0.55      0.56      0.55      1790

*********************************************************
Running the model task with the following parameters:
target: condition2
model: AdaBoostClassifier()
n_estimators: [138 181 198 142 159  96 173 132  82 114 142 104  72 118  86 104 145 185
  69 180]
learning_rate: [0.0481674  0.74673022 0.73468902 0.79119016 0.97752093 0.91556906
 0.80633824 0.12429105 0.37839472 0.63272452 0.783

In [35]:
model_data(df=df2, target='condition2', model = SGDClassifier(), param_dist= param_grid_sgd,n_iter=1)
model_data(df=df2, target='condition2', model = SGDClassifier(), param_dist= param_grid_sgd,n_iter=20)


*********************************************************
Running the model task with the following parameters:
target: condition2
model: SGDClassifier()
alpha: [0.0001, 0.001, 0.01, 0.1]
max_iter: [2384]
tol: [0.001]
penalty: ['l2', 'l1', 'elasticnet']
n_class: 2
n_iter: 1 


Accuracy: 0.523463687150838

Classification Report:
               precision    recall  f1-score   support

         0.0       0.57      0.61      0.59       995
         1.0       0.46      0.42      0.44       795

    accuracy                           0.52      1790
   macro avg       0.51      0.51      0.51      1790
weighted avg       0.52      0.52      0.52      1790

*********************************************************
Running the model task with the following parameters:
target: condition2
model: SGDClassifier()
alpha: [0.0001, 0.001, 0.01, 0.1]
max_iter: [4776 9171 6665 7471 7548 6797 8341 5777 6355 2468 9838 9575 2214 9210
 8117 1164 8353 2561 1432 9227]
tol: [0.001]
penalty: ['l2', 'l1', 'elast

In [None]:
model_data(df=df2, target='condition2', model = KNeighborsClassifier(), param_dist= param_grid_knn,n_iter=1)
model_data(df=df2, target='condition2', model = KNeighborsClassifier(), param_dist= param_grid_knn,n_iter=20)

*********************************************************
Running the model task with the following parameters:
target: condition2
model: KNeighborsClassifier()
n_neighbors: [16]
weights: ['uniform', 'distance']
algorithm: ['ball_tree', 'kd_tree', 'brute']
n_class: 2
n_iter: 1 


Accuracy: 0.523463687150838

Classification Report:
               precision    recall  f1-score   support

         0.0       0.57      0.58      0.57       995
         1.0       0.46      0.46      0.46       795

    accuracy                           0.52      1790
   macro avg       0.52      0.52      0.52      1790
weighted avg       0.52      0.52      0.52      1790

*********************************************************
Running the model task with the following parameters:
target: condition2
model: KNeighborsClassifier()
n_neighbors: [15  9 21  7  9 27 25 11  9 16  6 24 27 29 22 23 17 10  7 25]
weights: ['uniform', 'distance']
algorithm: ['ball_tree', 'kd_tree', 'brute']
n_class: 2
n_iter: 20 




In [None]:
model_data(df=df2, target='condition2', model = GaussianNB(), param_dist= param_grid_nb,n_iter=1)
model_data(df=df2, target='condition2', model = GaussianNB(), param_dist= param_grid_nb,n_iter=20)

*********************************************************
Running the model task with the following parameters:
target: condition2
model: GaussianNB()
n_class: 2
n_iter: 1 


Accuracy: 0.4480446927374302

Classification Report:
               precision    recall  f1-score   support

         0.0       0.77      0.01      0.02       995
         1.0       0.45      1.00      0.62       795

    accuracy                           0.45      1790
   macro avg       0.61      0.50      0.32      1790
weighted avg       0.63      0.45      0.28      1790

*********************************************************
Running the model task with the following parameters:
target: condition2
model: GaussianNB()
n_class: 2
n_iter: 20 


Fitting 3 folds for each of 1 candidates, totalling 3 fits
Accuracy: 0.4480446927374302

Classification Report:
               precision    recall  f1-score   support

         0.0       0.77      0.01      0.02       995
         1.0       0.45      1.00      0.62   

In [None]:
model_data(df=df1, target='condition1', model = GaussianNB(), param_dist= param_grid_nb,n_iter=100)

model_data(df=df2, target='condition2', model = GaussianNB(), param_dist= param_grid_nb,n_iter=100)

In [6]:
model_data(df=df1, target='condition1', model = xgb.XGBClassifier(), param_dist= param_grid_xgb,n_iter=100)


*********************************************************
Running the model task with the following parameters:
target: condition1
model: XGBClassifier(base_score=None, booster=None, callbacks=None,
              colsample_bylevel=None, colsample_bynode=None,
              colsample_bytree=None, device=None, early_stopping_rounds=None,
              enable_categorical=False, eval_metric=None, feature_types=None,
              gamma=None, grow_policy=None, importance_type=None,
              interaction_constraints=None, learning_rate=None, max_bin=None,
              max_cat_threshold=None, max_cat_to_onehot=None,
              max_delta_step=None, max_depth=None, max_leaves=None,
              min_child_weight=None, missing=nan, monotone_constraints=None,
              multi_strategy=None, n_estimators=None, n_jobs=None,
              num_parallel_tree=None, random_state=None, ...)
max_depth: [5 6 9 9 9 5 4 4 5 6 7 4 6 6 6 4 8 3 8 9 7 4 3 5 8 4 6 4 6 4 8 8 4 4 4 7 6
 3 9 3 6 8 9 8 5 8

In [7]:
model_data(df=df1, target='condition1', model = RandomForestClassifier(), param_dist= param_grid_rf,n_iter=100)


*********************************************************
Running the model task with the following parameters:
target: condition1
model: RandomForestClassifier()
n_estimators: [153 181  54 193 186 151  86 120  89  72 128  83  66 113 195  84  77 123
 197 164 121  86 191  73  68  80 193 158 157  73  61 187 153 127 181  52
 101 116 187  59 193 192 160 177 126 117  81 173 177  83 163 185 144 196
 106  60 104  74 165 112  57  98 109 151 122 124  92 106 177  61  75  66
  83 185 143 150  63  64  85  95  80  92 184 118  88 101 148  74 152  57
 107 135  94 102 197  75 186 147 113  53]
max_depth: [6 9 3 8 3 8 7 6 5 6 4 3 4 6 3 9 9 4 8 9 7 3 9 5 5 4 4 6 5 5 9 9 3 9 6 6 9
 7 7 7 9 5 5 7 4 6 7 4 9 3 4 3 9 9 4 8 9 9 9 5 4 9 8 9 5 4 3 8 6 3 5 5 4 6
 7 6 9 9 9 5 3 4 5 3 3 9 6 4 4 5 3 7 9 5 5 7 7 8 8 6]
min_samples_split: [6 5 8 8 9 4 9 5 5 7 4 4 2 6 8 7 7 8 9 5 2 8 5 5 3 9 3 7 8 3 3 3 6 2 7 4 9
 3 7 4 9 3 5 5 4 6 6 2 5 5 2 3 7 5 5 8 3 2 6 8 9 8 8 7 9 4 3 4 3 8 4 9 4 2
 5 2 7 7 8 9 2 6 7 9 8 8 4 4 7 6

In [8]:
model_data(df=df1, target='condition1', model = AdaBoostClassifier(), param_dist= param_grid_ab,n_iter=100)


*********************************************************
Running the model task with the following parameters:
target: condition1
model: AdaBoostClassifier()
n_estimators: [ 72 117  93 193  91 104 106 194 102 115  60 146 174 159  91 113  87 142
 104 116  51 169  87 145 102 135 169  52  84 193 136 184 109  75 115 120
 107  55  70  54 181 187 177 172  57  69 175 125  65  83  90 142 175 142
 135 155 119 131 148 166 199  87  65  99 196 108  96 134  81  58 117  93
  73 156  65 139 149 121  61 168 144  54 163 102 106  91  69  96 124 155
  71  58 136  52 106 191 140 199 129 180]
learning_rate: [0.20636272 0.44173392 0.16736606 0.98030113 0.36545323 0.17749219
 0.87369731 0.50438042 0.92872831 0.91649096 0.66060408 0.75431055
 0.05741713 0.58092712 0.38963839 0.25348176 0.51971293 0.79665165
 0.16585512 0.06332597 0.99799928 0.17602281 0.55201054 0.31159025
 0.80759166 0.55439478 0.7466375  0.13737021 0.30361912 0.11873342
 0.08532153 0.90645571 0.52621401 0.28016723 0.99927449 0.44093058
 0.

In [9]:
model_data(df=df1, target='condition1', model = SGDClassifier(), param_dist= param_grid_sgd,n_iter=100)


*********************************************************
Running the model task with the following parameters:
target: condition1
model: SGDClassifier()
alpha: [0.0001, 0.001, 0.01, 0.1]
max_iter: [8339 6782 8878 4234 4854 1985 7622 9400 3690 2821 1266 5698 8863 6797
 6635 7165 6099 4237 1816 3476 4726 5505 4795 9772 5339 3330 7709 7556
 8559 1614 1827 4641 9708 6206 8746 6286 8719 6419 2636 8343 1581 6255
 5953 8202 5450 1870 9248 2622 8021 2814 5561 3965 6982 5728 1637 2322
 8093 9432 6376 8702 2141 6475 5111 7284 3901 3825 5070 5981 8687 5989
 7408 7437 6288 3855 6879 2207 1234 1743 8367 5409 4868 4098 5818 5394
 6660 4772 4574 6019 3096 2156 9298 5315 9595 1781 8142 2831 3929 3563
 3978 2890]
tol: [0.001]
penalty: ['l2', 'l1', 'elasticnet']
n_class: 2
n_iter: 100 


Fitting 3 folds for each of 100 candidates, totalling 300 fits
Accuracy: 0.5775577557755776

Classification Report:
               precision    recall  f1-score   support

         0.0       0.57      0.90      0.70   

In [10]:
model_data(df=df1, target='condition1', model = KNeighborsClassifier(), param_dist= param_grid_knn,n_iter=100)


*********************************************************
Running the model task with the following parameters:
target: condition1
model: KNeighborsClassifier()
n_neighbors: [11  7  7 18 17 27 25 18 19 24  6 19  3  7 16  4 13 14 10  6  9 11 11  9
 16 22  7 26 18 20  6  5  5 20  8  6 13 16  4 20 29  8 20 14  4 25 25 14
 27 17 26 19 27  8  3 19 23 10 12  8 23 26 24  5  7 18 26 25 27 21 21 10
  4 22  3 27 18 20 19 11 22 19 13  3 25  3 24 11 21 13 28 10 24 22 26 16
 10 27 19 29]
weights: ['uniform', 'distance']
algorithm: ['ball_tree', 'kd_tree', 'brute']
n_class: 2
n_iter: 100 


Fitting 3 folds for each of 100 candidates, totalling 300 fits
Accuracy: 0.5841584158415841

Classification Report:
               precision    recall  f1-score   support

         0.0       0.59      0.72      0.65       981
         1.0       0.57      0.42      0.48       837

    accuracy                           0.58      1818
   macro avg       0.58      0.57      0.57      1818
weighted avg       0.58    

In [11]:
model_data(df=df1, target='condition1', model = GaussianNB(), param_dist= param_grid_nb,n_iter=100)

*********************************************************
Running the model task with the following parameters:
target: condition1
model: GaussianNB()
n_class: 2
n_iter: 100 


Fitting 3 folds for each of 1 candidates, totalling 3 fits
Accuracy: 0.45764576457645767

Classification Report:
               precision    recall  f1-score   support

         0.0       0.00      0.00      0.00       981
         1.0       0.46      0.99      0.63       837

    accuracy                           0.46      1818
   macro avg       0.23      0.50      0.31      1818
weighted avg       0.21      0.46      0.29      1818


Best Parameters:
 {}


In [7]:
model_data(df=df2, target='condition2', model = xgb.XGBClassifier(), param_dist= param_grid_xgb,n_iter=100)


*********************************************************
Running the model task with the following parameters:
target: condition2
model: XGBClassifier(base_score=None, booster=None, callbacks=None,
              colsample_bylevel=None, colsample_bynode=None,
              colsample_bytree=None, device=None, early_stopping_rounds=None,
              enable_categorical=False, eval_metric=None, feature_types=None,
              gamma=None, grow_policy=None, importance_type=None,
              interaction_constraints=None, learning_rate=None, max_bin=None,
              max_cat_threshold=None, max_cat_to_onehot=None,
              max_delta_step=None, max_depth=None, max_leaves=None,
              min_child_weight=None, missing=nan, monotone_constraints=None,
              multi_strategy=None, n_estimators=None, n_jobs=None,
              num_parallel_tree=None, random_state=None, ...)
max_depth: [9 3 7 3 6 3 6 5 3 4 9 8 7 5 9 8 4 7 3 9 7 4 7 9 6 6 8 3 3 8 7 6 8 3 3 7 4
 7 6 4 8 7 7 8 7 8

In [None]:
model_data(df=df2, target='condition2', model = RandomForestClassifier(), param_dist= param_grid_rf,n_iter=100)


*********************************************************
Running the model task with the following parameters:
target: condition2
model: RandomForestClassifier()
n_estimators: [122  86 154 126 199 176 148 161  77 164 180 117  51 128 122 185 117  65
 158 130 106 115 129 150  96  56 166 154 126 130  94 113 108 170  54  66
  95 179 109 124  90 168  81 136  82 186 198 192 138 137 104 116  51  50
 184  65  88 101  65 114 170  66 153 117  58  83 168 184  70  50 159 157
  70 195  62 148 172 149  77 136 197 149  62 175 161 195  56 121 173  78
 105 138 134 125  67 199  97 195  58 103]
max_depth: [4 6 8 7 4 4 7 9 7 7 5 6 7 4 9 7 6 4 4 6 7 9 4 8 9 4 8 8 8 3 4 6 8 8 8 5 8
 8 7 6 3 9 5 3 6 9 4 8 6 5 5 8 9 8 4 9 9 4 6 3 6 9 7 8 8 4 4 8 3 6 7 7 7 6
 3 4 6 6 8 3 6 4 7 9 8 3 3 8 6 6 6 6 7 8 7 5 7 8 9 5]
min_samples_split: [3 7 9 7 7 8 8 6 2 5 8 8 8 3 8 8 6 4 8 9 5 6 4 2 6 2 4 7 6 3 7 5 4 7 6 6 9
 7 2 8 7 6 4 8 7 4 2 7 8 4 9 6 9 2 9 7 6 5 5 3 4 7 8 2 5 7 3 9 9 2 5 2 7 2
 4 8 7 4 3 5 7 9 9 4 2 3 9 3 9 2

In [None]:
model_data(df=df2, target='condition2', model = AdaBoostClassifier(), param_dist= param_grid_ab,n_iter=100)


*********************************************************
Running the model task with the following parameters:
target: condition2
model: AdaBoostClassifier()
n_estimators: [ 56 141  50 180 142 185 142 198  80 140  76 108 154 175  88 155 123  65
 123 105 143 161  73 127 180 192 151 148 147 176 187 150 116 198 181 184
 122 149 156  84 177 168 184 147 167 133  80 186  56  52 105 138  90 170
 130  74 148  76  77  90  79 191  50 192  60 123  91 132 192  74  83 140
 148 171  84 192 171  74 104  95  73  76 133  75 112  52  62  81 136 170
 161 102  97  81 183  66 188 102 130  55]
learning_rate: [0.28951355 0.23118789 0.54570681 0.30515933 0.20422108 0.19115242
 0.17324367 0.08935469 0.57783163 0.69727626 0.80037385 0.03368296
 0.47202062 0.53119418 0.03440261 0.81018069 0.3212295  0.87690751
 0.81205479 0.75028091 0.82305478 0.82968568 0.64547027 0.55672778
 0.31254561 0.15502382 0.06504859 0.17394972 0.8396309  0.57657144
 0.09678005 0.46721225 0.01591027 0.50325289 0.5310654  0.66660708
 0.

In [None]:
model_data(df=df2, target='condition2', model = SGDClassifier(), param_dist= param_grid_sgd,n_iter=100)


*********************************************************
Running the model task with the following parameters:
target: condition2
model: SGDClassifier()
alpha: [0.0001, 0.001, 0.01, 0.1]
max_iter: [3062 3569 6203 1152 6066 9592 8655 6753 7779 7354 3219 8714 7910 9639
 7498 7649 7991 2024 5720 2736 5972 5069 5049 2898 1620 9691 4077 9067
 8613 1475 8821 5408 3780 5217 2776 2554 6362 7074 4924 4378 4522 3024
 5182 5442 5930 2358 4209 7050 8670 4754 4787 3805 8358 4654 8721 9149
 4691 5335 1743 7080 9951 4976 6957 4458 8432 6259 4798 8750 8201 1239
 3134 4355 7981 4495 5163 7875 3248 6183 2038 1398 3801 8933 5668 9148
 5419 3221 9874 9422 8751 3652 1069 8512 4012 9222 5205 9048 2698 6741
 6454 4925]
tol: [0.001]
penalty: ['l2', 'l1', 'elasticnet']
n_class: 2
n_iter: 100 


Fitting 3 folds for each of 100 candidates, totalling 300 fits
Accuracy: 0.5597765363128492

Classification Report:
               precision    recall  f1-score   support

         0.0       0.56      0.93      0.70   

In [None]:
model_data(df=df2, target='condition2', model = KNeighborsClassifier(), param_dist= param_grid_knn,n_iter=100)

*********************************************************
Running the model task with the following parameters:
target: condition2
model: KNeighborsClassifier()
n_neighbors: [23 22  8  6 14 27 13 16 11 12 27 10 11 18  3 19 11 22  7  5 12  4 25 26
 11 12 18 14 25 17  3  9 15 25  7  8 18 15 11 27 12 11 22  3 13  5 14 23
  6  7  7 23 11 14 24 12 18 23 18 29 10  9  9 16 22 26 25  4 16  3  6 28
  9 15  5  5 24 14 13 18 10 29  9 19 12 14 28 27 17 13 11 12 11  5  6 13
 12 24  8 16]
weights: ['uniform', 'distance']
algorithm: ['ball_tree', 'kd_tree', 'brute']
n_class: 2
n_iter: 100 


Fitting 3 folds for each of 100 candidates, totalling 300 fits
Accuracy: 0.5536312849162012

Classification Report:
               precision    recall  f1-score   support

         0.0       0.58      0.68      0.63       995
         1.0       0.50      0.39      0.44       795

    accuracy                           0.55      1790
   macro avg       0.54      0.54      0.53      1790
weighted avg       0.55    

In [None]:
model_data(df=df2, target='condition2', model = GaussianNB(), param_dist= param_grid_nb,n_iter=100)

*********************************************************
Running the model task with the following parameters:
target: condition2
model: GaussianNB()
n_class: 2
n_iter: 100 


Fitting 3 folds for each of 1 candidates, totalling 3 fits
Accuracy: 0.4480446927374302

Classification Report:
               precision    recall  f1-score   support

         0.0       0.77      0.01      0.02       995
         1.0       0.45      1.00      0.62       795

    accuracy                           0.45      1790
   macro avg       0.61      0.50      0.32      1790
weighted avg       0.63      0.45      0.28      1790


Best Parameters:
 {}
