In [1]:
import os
import sys
import joblib
sys.path.append("../")

from sklearn.preprocessing import MaxAbsScaler
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, classification_report
from scripts.sampling import sampling, balanceSampling
from scripts.features_extraction import load_features
import scripts.logger as log
import time

start_time = time.time()

# Load data from .npz file
# names = ["symptoms"]
names = ["normalized/ks_level_2_norm", "normalized/community_count_norm", "normalized/betweenness_norm"]

features, labels = load_features(names)

features, labels = balanceSampling(features, labels, threshold=35)
print(features.shape)
print(labels.shape)
log.simple_logger(f'Features size: {features.shape}')
log.simple_logger(f'Labels size: {labels.shape}')

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42, stratify=labels)

# Perform MaxAbsScaler normalization
# scaler = MaxAbsScaler()
# X_train = scaler.fit_transform(X_train)
# X_test = scaler.transform(X_test)

# np.savez_compressed("../prediction_model/features/random_sample_symptoms_norm", Xtest=X_test, Xtrain=X_train, Ytest=y_test, Ytrain=y_train)
#np.savez_compressed("../prediction_model/features/MLP_mix_full_data_norm", Xtest=X_test, Xtrain=X_train, Ytest=y_test, Ytrain=y_train)
np.savez_compressed("../prediction_model/features/MLP_mix_data_norm", Xtest=X_test, Xtrain=X_train, Ytest=y_test, Ytrain=y_train)

# Define the parameter grid to search
param_grid = {
    "max_iter": [100],
    'hidden_layer_sizes': [(80)],
    'alpha': [0.0001],
    'activation': ['relu'],
}

# Create an MLP classifier
mlp = MLPClassifier(random_state=42)

# Create the GridSearchCV object
grid_search = GridSearchCV(mlp, param_grid, verbose=3, cv=3, scoring="accuracy", n_jobs=-1)

# Fit the GridSearchCV object to the data
grid_search.fit(X_train, y_train)

# Get the best parameters and the corresponding model
best_params = grid_search.best_params_
best_model = grid_search.best_estimator_

print(best_model.get_params())
all_params = best_model.get_params()
# Print the parameters
for param, value in all_params.items():
    log.simple_logger(f'{param}: {value}')

print(best_model.n_iter_)

# Make predictions on the test set
predictions = best_model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, predictions)
report = classification_report(y_test, predictions)

log.simple_logger(f'Accuracy: {accuracy}')
print(f'Accuracy: {accuracy}')
#print('Classification Report:\n', report)

end_time = time.time()
execution_time = end_time - start_time

# Convert seconds to hours, minutes, and seconds
hours, remainder = divmod(execution_time, 3600)
minutes, seconds = divmod(remainder, 60)

# Format the execution time
formatted_execution_time = f'{int(hours):02}:{int(minutes):02}:{seconds:.4f}'

log.simple_logger(f'The code took {formatted_execution_time} seconds to execute.')
log.simple_logger("\n\n")


# Specify the current working directory and relative directory
current_directory = os.getcwd()
relative_save_directory = 'models'

# Create the absolute path for the directory
absolute_save_directory = os.path.join(current_directory, relative_save_directory)

# Ensure the directory exists, creating it if necessary
os.makedirs(absolute_save_directory, exist_ok=True)

# Specify the filename for the logistic regression model
#logistic_regression_model_filename = 'MLP_model_symptoms_full.joblib'
#logistic_regression_model_filename = 'MLP_mix_full_data_norm.joblib'
logistic_regression_model_filename = 'MLP_mix_norm.joblib'

# Create the absolute path for the file
logistic_regression_model_path = os.path.join(absolute_save_directory, logistic_regression_model_filename)

# Save the logistic regression model
joblib.dump(best_model, logistic_regression_model_path)


(26180, 653)
(26180,)
Fitting 3 folds for each of 1 candidates, totalling 3 fits
{'activation': 'relu', 'alpha': 0.0001, 'batch_size': 'auto', 'beta_1': 0.9, 'beta_2': 0.999, 'early_stopping': False, 'epsilon': 1e-08, 'hidden_layer_sizes': 80, 'learning_rate': 'constant', 'learning_rate_init': 0.001, 'max_fun': 15000, 'max_iter': 100, 'momentum': 0.9, 'n_iter_no_change': 10, 'nesterovs_momentum': True, 'power_t': 0.5, 'random_state': 42, 'shuffle': True, 'solver': 'adam', 'tol': 0.0001, 'validation_fraction': 0.1, 'verbose': False, 'warm_start': False}
100
Accuracy: 0.8831168831168831




['c:\\Users\\Cristian\\Documents\\GitHub\\financial-project\\code\\prediction_model\\models\\MLP_mix_norm.joblib']