# Explaining ANN (Classification) to identify depression

## Import all necessary libraries

Of the following imports, the most important libraries are: sklearn.neural_network and sklearn.metrics.

In [8]:
import joblib
import dice_ml
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split
from explainerdashboard import ClassifierExplainer, ExplainerDashboard
from sklearn.metrics import ConfusionMatrixDisplay
from sklearn.metrics import precision_recall_fscore_support
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import cross_val_score
from sklearn.metrics import mean_squared_error
from imblearn.over_sampling import SMOTE
from alibi.explainers.ale import ALE, plot_ale

## Defining global variables

The following variables defined below can be modified according to the given approach.

In [9]:
## Global variable designated for data loading.
df = pd.read_csv("Dataset_MO_ENG.csv")

## Global variables designated for threshold control.
positive_threshold = 0.01
negative_threshold = -0.01

## Defining private variables

The following variables defined below cannot be modified since they are an integral part of its operation.

In [10]:
## Private variables for the neural network by classification.
class_dic = { 1: 0 , 2: 0, 3:1, 4:2, 5:2}
target_namesbase = ["Low", "Medium", "High"]

## Private variables for oversample.
random_state = 13
oversample = SMOTE(random_state=random_state)

## Private variables for neural network generation.
seedbase = 1

## Preparing the data for the neural network

The data is prepared in order to be processed by the neural network.

In [11]:
## Eliminating physical-related questions in dataset.
df = df_base.drop(df.columns[102:-1], axis=1)

## Grouping of the target according to the defined dictionary.
df ['Target'] = df ['Target'].map(class_dic)

## Assignment of local variables according to the data necessary for the neural network.
train_colsbase = df_base.columns [0:-1]
labelbase = df_base.columns [-1]
X = df [train_colsbase]
y = df [labelbase]

## Oversample application

An oversample process is applied to level the amount of existing data given by the grouping.

In [None]:
X, y = oversample.fit_resample(X, y)

## Preparation of the parameters of the neural network

We proceed to define the parameters necessary for the training of the neural network.

In [13]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=seedbase)

## Neural network training

The alpha parameters and the initial learning rate are adjusted to obtain the best performance with the model.
using cross validation.

In [None]:
## Local variables are defined for the storage of the scores obtained by the training.
cv_scores_mean = []
cv_scores_std = []

## Training and validation of different configurations.
regul_param_range = 10.0 ** -np.arange(-2, 7)

for regul_param in regul_param_range:
    
    ## Increase the max_iter parameter until it converges.
    mlp = MLPClassifier(hidden_layer_sizes=(10,), activation='logistic', solver='adam', alpha=regul_param, 
             learning_rate='constant', learning_rate_init=0.0001, max_iter=100000, random_state=seed)
    
    scores = cross_val_score(mlp, X, y, cv=5, scoring='f1_macro')
    
    cv_scores_mean.append(scores.mean())
    cv_scores_std.append(scores.std())
    
## The results obtained during the training are printed.
cv_scores_mean, cv_scores_std