In [1]:
# Standar libraries
from datetime import datetime

# Scikit learn
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.decomposition import PCA



# External modules
from module_path import test_data_path, train_data_path, plots_data_path
from module_data import Dataset
from module_graph import graph_tree
from module_model import ModelEvaluation, ModelEvaluationXG, ModelSubmission
from model_params2 import Models

Plots directory found in  ../plots


In [2]:
# get current date and time
start_datetime = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
print("Start date & time : ", start_datetime)

# create a Dataset object
df = Dataset(data_imputed = True, 
                data_standarized = False, 
                relevant_data = False)

# train dataframe, test dataframe, y targets dataframe
df_train, df_test, labels = df.process()

Start date & time :  2025-04-30 16:25:13
Train data directory found in  ../data/TRAIN_NEW
Test data directory found in  ../data/TEST
NaN values processed for every dataset by kNNImputer algorithm considering 5 neighbors.


In [3]:
# define array of target variables for the model
targets = ['ADHD_Outcome',  'Sex_F']

# Create the hiperparameters grid for the GridSearchCV
param_grid_lr = {
'C': [0.1, 1.0, 10.0],
'solver': ['lbfgs', 'liblinear'],
}

param_grid_rf = {
'n_estimators': [100, 500, 1000],
'max_depth': [3, 5, 10, 20],
'bootstrap': [True, False],
'criterion': ['gini', 'entropy']
}

iteraciones = 5000

scoring = 'f1'

In [4]:
lr_model = Models(X = df_train, 
                  y1 = labels[targets[0]],
                  y2 = labels[targets[1]], 
                  tag1 = 'adhd',
                  tag2 = 'sex_f', 
                  )

In [8]:
lr_adhd, lr_sex_f = lr_model.log_regression(gridsearch = False,
                                                param_grid = param_grid_lr,
                                                scoring = 'f1',
                                                cv = 5,
                                           model_evaluation=True,
                                                max_iter = 5000,
                                                solver = 'lbfgs')

Mlruns directory found in  ../mlruns

Model evaluation: LogisticRegression - adhd

Confusion matrix (adhd):
[[ 68  53]
 [ 22 221]]

F1_score  : 0.85
Accuracy  : 0.79
Precision : 0.81
Recall    : 0.91
Mlruns directory found in  ../mlruns

Model evaluation: LogisticRegression - sex_f

Confusion matrix (sex_f):
[[214  40]
 [ 73  37]]

F1_score  : 0.40
Accuracy  : 0.69
Precision : 0.48
Recall    : 0.34


In [9]:
rf_adhd, rf_sex_f = lr_model.random_forest(gridsearch = True,
                                               param_grid = param_grid_rf,
                                               scoring = 'f1',
                                               cv = 5,
                                           model_evaluation=True,
                                               n_estimators = 1000,
                                               criterion = "gini",
                                               max_depth = 10,
                                               random_state = 42,
                                               bootstrap = True)

Ejecutando GridSearchCV para RandomForestClassifier - adhd

Mejores hiperparámetros para adhd:
  bootstrap: True
  criterion: entropy
  max_depth: 5
  n_estimators: 1000
Mlruns directory found in  ../mlruns

Model evaluation: RandomForestClassifier - adhd

Confusion matrix (adhd):
[[ 59  62]
 [ 18 225]]

F1_score  : 0.85
Accuracy  : 0.78
Precision : 0.78
Recall    : 0.93
Ejecutando GridSearchCV para RandomForestClassifier - sex_f

Mejores hiperparámetros para sex_f:
  bootstrap: False
  criterion: gini
  max_depth: 20
  n_estimators: 1000
Mlruns directory found in  ../mlruns

Model evaluation: RandomForestClassifier - sex_f

Confusion matrix (sex_f):
[[219  35]
 [ 87  23]]

F1_score  : 0.27
Accuracy  : 0.66
Precision : 0.40
Recall    : 0.21
