## Implementation of a Neural Network

In [5]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix, f1_score
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import GridSearchCV
from joblib import load


# Load the saved TF-IDF feature matrix and target variable for the training data
model_train_tfidf = load('../pkl_files/model_train_tfidf.pkl')
target = load('../pkl_files/target.pkl')
model_test_tfidf = load('../pkl_files/model_test_tfidf.pkl')

# Load the saved Count Vectorizer feature matrix and target varible for the training data
model_train_count = load('../pkl_files/model_train_count.pkl')
target = load('../pkl_files/target.pkl')
model_test_count = load('../pkl_files/model_test_count.pkl')

In [8]:
# Split the training data into training and validation sets
X_validation_train_tfidf, X_validation_test_tfidf, y_validation_train_tfidf, y_validation_test_tfidf = train_test_split(model_train_tfidf, target, test_size=0.2, random_state=42)

Perform cross validation to determine the optimal hyperparameters for the neural network

In [None]:
mlp_tfidf_cv = MLPClassifier(hidden_layer_sizes=(64,32),  # Two hidden layers, with 512 and 256 neurons respectively     
                    solver='adam',                 # SGD optimizer
                    alpha = 0.0001,                # default for l2 (ridge)
                    max_iter=50,                # Maximum number of iterations
                    random_state=42,             # Random seed for reproducibility
                    batch_size=32,               # Batch size for gradient descent
                    learning_rate='adaptive',    # Adaptive learning rate
                    learning_rate_init = 0.01,
                    momentum = 0.9,
                    nesterovs_momentum = True,
                    early_stopping = True,
                    warm_start=False,            # Whether to reuse the previous solution
                    tol=1e-4,                    # Tolerance for stopping
                    verbose=True)       


param_grid = {
    'hidden_layer_sizes': [(64, 32), (128,), (64,64)],  # Different architectures
    'alpha': [0.0001, 0.001],                      # Regularization
    'learning_rate_init': [0.001, 0.01],          # Learning rates
}

grid_search = GridSearchCV(estimator=mlp_tfidf_cv, param_grid=param_grid, 
                           scoring='f1_weighted', cv=3, verbose=1, n_jobs = -1)

# Fit the Grid Search model on the training data
grid_search.fit(X_validation_train_tfidf, y_validation_train_tfidf)

# Output the best parameters and F1 score found 
print(f"Best parameters: {grid_search.best_params_}")
print(f"Best cross-validated F1 score: {grid_search.best_score_:.4f}")

# Fit the final MLP model using the best parameters
best_model = grid_search.best_estimator_

# Make predictions on the test set
y_val_pred_tfidf = best_model.predict(X_validation_test_tfidf)

# Evaluate the model performance (e.g., accuracy)
f1score = f1_score(y_validation_test_tfidf, y_val_pred_tfidf, average='weighted')
print(f"F1 on test set: {f1score:.4f}")

#joblib.dump(mlp, '../pkl_files/mlp_model.pkl')

Fitting 3 folds for each of 12 candidates, totalling 36 fits
Iteration 1, loss = 0.54861118
Validation score: 0.731527
Iteration 1, loss = 0.56342031
Validation score: 0.758621
Iteration 1, loss = 0.64111006
Iteration 1, loss = 0.56514255
Validation score: 0.714286
Validation score: 0.788177
Iteration 1, loss = 0.63394168
Validation score: 0.795567
Iteration 1, loss = 0.64416335
Validation score: 0.751232
Iteration 2, loss = 0.23879959
Validation score: 0.682266
Iteration 2, loss = 0.26917050
Validation score: 0.657635
Iteration 2, loss = 0.26044986
Validation score: 0.642857
Iteration 2, loss = 0.44511697
Validation score: 0.783251
Iteration 2, loss = 0.45080041
Validation score: 0.736453
Iteration 2, loss = 0.45547688
Validation score: 0.780788
Iteration 1, loss = 0.61818959
Validation score: 0.743842
Iteration 1, loss = 0.62126417
Validation score: 0.780788
Iteration 3, loss = 0.07651488
Validation score: 0.756158
Iteration 3, loss = 0.10282978
Validation score: 0.674877
Iteration 3

In [None]:
mlp_model = MLPClassifier(hidden_layer_sizes=(128,),  # One hidden layer    
                    solver='adam',                
                    alpha = 0.0001,                # default for l2 (ridge)
                    max_iter=50,                # Maximum number of iterations
                    random_state=42,             # Random seed for reproducibility
                    batch_size=32,               # Batch size for gradient descent
                    learning_rate='adaptive',    # Adaptive learning rate
                    learning_rate_init = 0.001,
                    momentum = 0.9,
                    nesterovs_momentum = True,
                    early_stopping = True,
                    warm_start=False,            # Whether to reuse the previous solution
                    tol=1e-4,                    
                    verbose=True)   



mlp_model.fit(X_validation_train_tfidf, y_validation_train_tfidf)

# Make predictions on the test set
y_val_pred_tfidf = best_model.predict(X_validation_test_tfidf)

# Evaluate the model performance (e.g., accuracy)
f1score = f1_score(y_validation_test_tfidf, y_val_pred_tfidf, average='weighted')
print(f"F1 on test set: {f1score:.4f}")

Iteration 1, loss = 0.58250530
Iteration 2, loss = 0.39547893
Iteration 3, loss = 0.25933650
Iteration 4, loss = 0.16802419
Iteration 5, loss = 0.11227565
Iteration 6, loss = 0.07675395
Iteration 7, loss = 0.05621246
Iteration 8, loss = 0.04562142
Iteration 9, loss = 0.03706747
Iteration 10, loss = 0.03219829
Iteration 11, loss = 0.02744225
Iteration 12, loss = 0.02615657
Iteration 13, loss = 0.02369998
Iteration 14, loss = 0.02126173
Iteration 15, loss = 0.02139052
Iteration 16, loss = 0.01975514
Iteration 17, loss = 0.01905451
Iteration 18, loss = 0.01956984
Iteration 19, loss = 0.01915120
Iteration 20, loss = 0.02142935
Iteration 21, loss = 0.01856879
Iteration 22, loss = 0.01644559
Iteration 23, loss = 0.01615572
Iteration 24, loss = 0.01652447
Iteration 25, loss = 0.01512943
Iteration 26, loss = 0.01517099
Iteration 27, loss = 0.01766475
Iteration 28, loss = 0.01577301
Iteration 29, loss = 0.01633920
Iteration 30, loss = 0.01515577
Iteration 31, loss = 0.01742626
Iteration 32, los

In [14]:
mlp_model.fit(model_train_tfidf, target)

# Make predictions on the test set
y_nn_pred_tfidf = best_model.predict(model_test_tfidf)

neural_network_tfidf_submission = pd.read_csv("../Data/sample_submission.csv")

# replace empty target column with predicted values
neural_network_tfidf_submission['target'] = y_nn_pred_tfidf

# Save results for submission
neural_network_tfidf_submission.to_csv('../csv_files/neural_network_tfidf_submission.csv', index=False)

Iteration 1, loss = 0.57174146
Iteration 2, loss = 0.38137917
Iteration 3, loss = 0.24825102
Iteration 4, loss = 0.15916433
Iteration 5, loss = 0.10640699
Iteration 6, loss = 0.07546437
Iteration 7, loss = 0.05728817
Iteration 8, loss = 0.04456114
Iteration 9, loss = 0.03965334
Iteration 10, loss = 0.03453431
Iteration 11, loss = 0.03189475
Iteration 12, loss = 0.02924954
Iteration 13, loss = 0.02737406
Iteration 14, loss = 0.02649217
Iteration 15, loss = 0.02566075
Iteration 16, loss = 0.02461917
Iteration 17, loss = 0.02476661
Iteration 18, loss = 0.02333141
Iteration 19, loss = 0.02363084
Iteration 20, loss = 0.02320705
Iteration 21, loss = 0.02150139
Iteration 22, loss = 0.02331961
Iteration 23, loss = 0.02391142
Iteration 24, loss = 0.02367958
Iteration 25, loss = 0.02197073
Iteration 26, loss = 0.02293009
Iteration 27, loss = 0.02401903
Iteration 28, loss = 0.02185660
Iteration 29, loss = 0.01918794
Iteration 30, loss = 0.02092213
Iteration 31, loss = 0.02068775
Iteration 32, los

## Fit count vectorized data and make predictions

In [15]:
X_validation_train_count, X_validation_test_count, y_validation_train_count, y_validation_test_count = train_test_split(model_train_count, target, test_size=0.2, random_state=42)

In [17]:
## fit count data

mlp_model.fit(X_validation_train_count, y_validation_train_count)

# Make predictions on the test set
y_val_pred_count = mlp_model.predict(X_validation_test_count)

# Evaluate the model performance (e.g., accuracy)
from sklearn.metrics import f1_score
f1score = f1_score(y_validation_test_count, y_val_pred_count, average='weighted')

print(f"F1: {f1score:.4f}")

Iteration 1, loss = 0.52233737
Iteration 2, loss = 0.27095740
Iteration 3, loss = 0.13755575
Iteration 4, loss = 0.07729568
Iteration 5, loss = 0.05022810
Iteration 6, loss = 0.03324872
Iteration 7, loss = 0.02323554
Iteration 8, loss = 0.02167002
Iteration 9, loss = 0.01965517
Iteration 10, loss = 0.01889555
Iteration 11, loss = 0.01372819
Iteration 12, loss = 0.01350051
Iteration 13, loss = 0.01303919
Iteration 14, loss = 0.01204631
Iteration 15, loss = 0.01384831
Iteration 16, loss = 0.01155426
Iteration 17, loss = 0.01423066
Iteration 18, loss = 0.01395587
Iteration 19, loss = 0.01423809
Iteration 20, loss = 0.01384358
Iteration 21, loss = 0.01340790
Iteration 22, loss = 0.01128727
Iteration 23, loss = 0.01188176
Iteration 24, loss = 0.01329917
Iteration 25, loss = 0.01199416
Iteration 26, loss = 0.01064152
Iteration 27, loss = 0.01237469
Iteration 28, loss = 0.01112669
Iteration 29, loss = 0.01414105
Iteration 30, loss = 0.01170316
Iteration 31, loss = 0.01121986
Iteration 32, los

In [18]:
mlp_model.fit(model_train_count, target)

# Make predictions on the test set
y_nn_pred_count = mlp_model.predict(model_test_count)

neural_network_count_submission = pd.read_csv("../Data/sample_submission.csv")

# replace empty target column with predicted values
neural_network_count_submission['target'] = y_nn_pred_count

# Save results for submission
neural_network_count_submission.to_csv('../csv_files/neural_network_count_submission.csv', index=False)

Iteration 1, loss = 0.51309920
Iteration 2, loss = 0.27165443
Iteration 3, loss = 0.14166102
Iteration 4, loss = 0.08116558
Iteration 5, loss = 0.05247939
Iteration 6, loss = 0.03807743
Iteration 7, loss = 0.03014583
Iteration 8, loss = 0.02452322
Iteration 9, loss = 0.02339924
Iteration 10, loss = 0.02080437
Iteration 11, loss = 0.02203789
Iteration 12, loss = 0.02006962
Iteration 13, loss = 0.02061589
Iteration 14, loss = 0.02134817
Iteration 15, loss = 0.02080122
Iteration 16, loss = 0.02077087
Iteration 17, loss = 0.01834361
Iteration 18, loss = 0.01928815
Iteration 19, loss = 0.02047938
Iteration 20, loss = 0.02038682
Iteration 21, loss = 0.01778848
Iteration 22, loss = 0.01805455
Iteration 23, loss = 0.01796438
Iteration 24, loss = 0.01728696
Iteration 25, loss = 0.02012459
Iteration 26, loss = 0.02244958
Iteration 27, loss = 0.01951059
Iteration 28, loss = 0.01848613
Iteration 29, loss = 0.01606726
Iteration 30, loss = 0.01738664
Iteration 31, loss = 0.01571252
Iteration 32, los