In [None]:
import argparse
import os
import sys
import pandas as pd
import torch
from sklearn.model_selection import train_test_split

sys.path.append(os.path.abspath('../..'))

# Importing modules from your project
from src.data.preprocess import preprocess
from src.data.make_dataset import create_dataset
from src.models.train_model import train_and_save, train_study_and_save, tune_and_save
from src.visualization.visualize import visualize_study, visualize_evaluate, visualize_train
from src.models.predict_model import evaluate_model, evaluate_model_opt_threshold, predict_model


In [5]:
preprocess(file_name_raw="raw_data", file_name_processed="processed_data")

2025-03-16 00:22:02,899 - INFO - Data successfully loaded.
  df[yes_no_columns] = df[yes_no_columns].replace({"Yes": 1, "No": 0})
2025-03-16 00:22:02,956 - INFO - Data preprocessing completed successfully.
2025-03-16 00:22:03,010 - INFO - Processed data saved to c:\Users\duyhu\Downloads\ML_Beginners\src\data\..\..\data\processed\processed_data.csv.


In [6]:
X_train, X_val, X_test, y_train, y_val, y_test = create_dataset(file_name="processed_data")

2025-03-16 00:22:13,157 - INFO - Loading data: processed_data


In [None]:
train_and_save(
            model_type="neural_network",
            model_name="neural_network_manual",
            X_train=X_train,
            y_train=y_train,
            X_val=X_val,
            y_val=y_val,
            lr=0.001,
            batch_size=32,
            epochs=20,
            hidden_size=15,
        )

Training neural_network with the following hyperparameters:
lr: 0.001
batch_size: 32
epochs: 20
hidden_size: 15


2025-03-16 00:23:36,149 - INFO - Epoch [10/20], Train Loss: 0.4143, Validation Loss: 0.4307
2025-03-16 00:23:38,327 - INFO - Epoch [20/20], Train Loss: 0.4054, Validation Loss: 0.4334
2025-03-16 00:23:38,330 - INFO - Training results saved to c:\Users\duyhu\Downloads\ML_Beginners\src\..\storage\trainings\ne_neural_network_manual.pkl.
2025-03-16 00:23:38,334 - INFO - Model saved to c:\Users\duyhu\Downloads\ML_Beginners\src\..\models\experiments\ne_neural_network_manual.pth


In [14]:
tune_and_save(
        model_type="neural_network",
        model_name="neural_network_study",
        X_train=X_train,
        y_train=y_train,
        X_val=X_val,
        y_val=y_val,
        n_trials=5,
        direction="minimize",
    )

[I 2025-03-16 00:56:16,093] A new study created in memory with name: no-name-798bccf4-7c85-4b80-a4eb-c773a3a6b850
[I 2025-03-16 00:56:22,120] Trial 0 finished with value: 4.893602478317916 and parameters: {'lr': 0.037460266483547784, 'batch_size': 124, 'hidden_size': 190, 'epochs': 64}. Best is trial 0 with value: 4.893602478317916.
[I 2025-03-16 00:56:40,123] Trial 1 finished with value: 30.938045274127614 and parameters: {'lr': 0.01561030385783923, 'batch_size': 47, 'hidden_size': 22, 'epochs': 88}. Best is trial 0 with value: 4.893602478317916.
[I 2025-03-16 00:56:53,322] Trial 2 finished with value: 6.887500358479364 and parameters: {'lr': 0.06011549002420346, 'batch_size': 100, 'hidden_size': 13, 'epochs': 98}. Best is trial 0 with value: 4.893602478317916.
[I 2025-03-16 00:56:58,126] Trial 3 finished with value: 12.175545931435549 and parameters: {'lr': 0.08324593965363418, 'batch_size': 52, 'hidden_size': 53, 'epochs': 26}. Best is trial 0 with value: 4.893602478317916.
[I 2025-

In [15]:
visualize_study("ne_neural_network_study")

2025-03-16 00:57:31,117 - INFO - Loading Optuna study from file: ne_neural_network_study
2025-03-16 00:57:31,120 - INFO - Study ne_neural_network_study loaded successfully.
2025-03-16 00:57:31,134 - INFO - Plotting optimization history for study.
  fig = optuna.visualization.matplotlib.plot_optimization_history(study)
2025-03-16 00:57:31,953 - INFO - Plot saved as c:\Users\duyhu\Downloads\ML_Beginners\src\visualization\..\..\storage\plots\ne_neural_network_study\st_optimization_history.png
2025-03-16 00:57:31,954 - INFO - Plotting slice plot for study.
  fig = optuna.visualization.matplotlib.plot_slice(study)
2025-03-16 00:57:32,321 - INFO - Plot saved as c:\Users\duyhu\Downloads\ML_Beginners\src\visualization\..\..\storage\plots\ne_neural_network_study\st_slice_plot.png
2025-03-16 00:57:32,324 - INFO - Plotting parameter importance for study.
  fig = optuna.visualization.matplotlib.plot_param_importances(study)
2025-03-16 00:57:32,636 - INFO - Plot saved as c:\Users\duyhu\Downloads\ML

In [16]:
train_study_and_save(
            model_type="neural_network",
            model_name="neural_network_study",
            X_train=X_train,
            y_train=y_train,
            X_val=X_val,
            y_val=y_val,
            file_name="ne_neural_network_study",
        )

2025-03-16 00:58:10,608 - INFO - Study ne_neural_network_study loaded successfully.


Training neural_network with the following hyperparameters:
lr: 0.037460266483547784
batch_size: 124
hidden_size: 190
epochs: 64


2025-03-16 00:58:11,692 - INFO - Epoch [10/64], Train Loss: 0.4106, Validation Loss: 0.4398
2025-03-16 00:58:12,847 - INFO - Epoch [20/64], Train Loss: 0.3953, Validation Loss: 0.4612
2025-03-16 00:58:14,037 - INFO - Epoch [30/64], Train Loss: 0.3761, Validation Loss: 0.4783
2025-03-16 00:58:15,161 - INFO - Epoch [40/64], Train Loss: 0.3618, Validation Loss: 0.5106
2025-03-16 00:58:16,375 - INFO - Epoch [50/64], Train Loss: 0.3448, Validation Loss: 0.5610
2025-03-16 00:58:17,386 - INFO - Epoch [60/64], Train Loss: 0.3463, Validation Loss: 0.5902
2025-03-16 00:58:17,778 - INFO - Training results saved to c:\Users\duyhu\Downloads\ML_Beginners\src\..\storage\trainings\ne_neural_network_study.pkl.
2025-03-16 00:58:17,782 - INFO - Model saved to c:\Users\duyhu\Downloads\ML_Beginners\src\..\models\experiments\ne_neural_network_study.pth


In [17]:
visualize_train("ne_neural_network_study")

2025-03-16 00:58:40,521 - INFO - Loading training results from file: ne_neural_network_study
2025-03-16 00:58:40,521 - INFO - Training ne_neural_network_study loaded successfully.
2025-03-16 00:58:40,533 - INFO - Plotting loss curves.
2025-03-16 00:58:40,659 - INFO - Plot saved as c:\Users\duyhu\Downloads\ML_Beginners\src\visualization\..\..\storage\plots\ne_neural_network_study\tr_loss_curve.png


In [21]:
evaluate_model(file_name="ne_neural_network_study", X_test=X_test, y_test=y_test, threshold=0.5)

2025-03-16 01:09:27,149 - INFO - Model ne_neural_network_study loaded successfully.
2025-03-16 01:09:27,175 - INFO - Accuracy: 0.7882
2025-03-16 01:09:27,176 - INFO - Precision: 0.6000
2025-03-16 01:09:27,177 - INFO - Recall: 0.5304
2025-03-16 01:09:27,178 - INFO - F1: 0.5630
2025-03-16 01:09:27,179 - INFO - Roc_auc: 0.8100
2025-03-16 01:09:27,183 - INFO - Evaluation results saved to c:\Users\duyhu\Downloads\ML_Beginners\src\..\storage\evaluations\ne_neural_network_study.pkl.


In [22]:
visualize_evaluate("ne_neural_network_study")

2025-03-16 01:09:57,065 - INFO - Loading evaluation results from file: ne_neural_network_study
2025-03-16 01:09:57,065 - INFO - Evaluation ne_neural_network_study loaded successfully.
2025-03-16 01:09:57,080 - INFO - Plotting confusion matrix.
2025-03-16 01:09:57,207 - INFO - Plot saved as c:\Users\duyhu\Downloads\ML_Beginners\src\visualization\..\..\storage\plots\ne_neural_network_study\ev_confusion_matrix.png
2025-03-16 01:09:57,208 - INFO - Plotting ROC curve and AUC score.
2025-03-16 01:09:57,331 - INFO - Plot saved as c:\Users\duyhu\Downloads\ML_Beginners\src\visualization\..\..\storage\plots\ne_neural_network_study\ev_roc_curve.png
2025-03-16 01:09:57,332 - INFO - Plotting Precision-Recall curve.
2025-03-16 01:09:57,429 - INFO - Plot saved as c:\Users\duyhu\Downloads\ML_Beginners\src\visualization\..\..\storage\plots\ne_neural_network_study\ev_precision_recall_curve.png


In [23]:
evaluate_model_opt_threshold(file_name="ne_neural_network_study", X_test=X_test, y_test=y_test)

2025-03-16 01:10:23,639 - INFO - Model ne_neural_network_study loaded successfully.
2025-03-16 01:10:23,654 - INFO - Optimal Threshold (Maximizing Youden's J): 0.3365
2025-03-16 01:10:23,669 - INFO - Accuracy: 0.7413
2025-03-16 01:10:23,670 - INFO - Precision: 0.4982
2025-03-16 01:10:23,671 - INFO - Recall: 0.7459
2025-03-16 01:10:23,672 - INFO - F1: 0.5973
2025-03-16 01:10:23,672 - INFO - Roc_auc: 0.8100
2025-03-16 01:10:23,676 - INFO - Evaluation results saved to c:\Users\duyhu\Downloads\ML_Beginners\src\..\storage\evaluations\ne_neural_network_study.pkl.


In [24]:
visualize_evaluate("ne_neural_network_study")

2025-03-16 01:10:33,614 - INFO - Loading evaluation results from file: ne_neural_network_study
2025-03-16 01:10:33,615 - INFO - Evaluation ne_neural_network_study loaded successfully.
2025-03-16 01:10:33,633 - INFO - Plotting confusion matrix.
2025-03-16 01:10:33,749 - INFO - Plot saved as c:\Users\duyhu\Downloads\ML_Beginners\src\visualization\..\..\storage\plots\ne_neural_network_study\ev_confusion_matrix.png
2025-03-16 01:10:33,750 - INFO - Plotting ROC curve and AUC score.
2025-03-16 01:10:33,876 - INFO - Plot saved as c:\Users\duyhu\Downloads\ML_Beginners\src\visualization\..\..\storage\plots\ne_neural_network_study\ev_roc_curve.png
2025-03-16 01:10:33,877 - INFO - Plotting Precision-Recall curve.
2025-03-16 01:10:33,981 - INFO - Plot saved as c:\Users\duyhu\Downloads\ML_Beginners\src\visualization\..\..\storage\plots\ne_neural_network_study\ev_precision_recall_curve.png
