## Implementation of a Neural Network

In [None]:
import numpy as np
import pandas as pd
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
from sklearn.neural_network import MLPClassifier
from pickle import load
from joblib import load

# Load in the target once, which can be used for both vectorizers
target = load('target.pkl')

# Load the saved TF-IDF feature matrix and target variable for the training data
model_train_tfidf = load('model_train_tfidf.joblib')
model_test_tfidf = load('model_test_tfidf.joblib')

# Load the saved Count Vectorizer feature matrix and target varible for the training data
model_train_count = load('model_train_count.joblib')
model_test_count = load('model_test_count.joblib')

In [None]:
# Split the training data into training and validation sets
X_validation_train_tfidf, X_validation_test_tfidf, y_validation_train_tfidf, y_validation_test_tfidf = train_test_split(model_train_tfidf, target, test_size=0.2, random_state=42)

In [None]:
mlp_tfidf = MLPClassifier(hidden_layer_sizes=(112,24),  # Two hidden layers, with 512 and 256 neurons respectively     
                    solver='sgd',                 # SGD optimizer
                    alpha = 0.0001,                # default for l2 (ridge)
                    max_iter=200,                # Maximum number of iterations
                    random_state=42,             # Random seed for reproducibility
                    batch_size=64,               # Batch size for gradient descent
                    learning_rate='adaptive',    # Adaptive learning rate
                    learning_rate_init = 0.01,
                    momentum = 0.9,
                    nesterovs_momentum = True,
                    early_stopping = True,
                    warm_start=False,            # Whether to reuse the previous solution
                    tol=1e-4,                    # Tolerance for stopping
                    verbose=True)       

mlp_tfidf.fit(X_validation_train_tfidf, y_validation_train_tfidf)

# Make predictions on the test set
y_val_pred_tfidf = mlp_tfidf.predict(X_validation_test_tfidf)

# Evaluate the model performance (e.g., accuracy)
from sklearn.metrics import f1_score
f1score = f1_score(y_validation_test_tfidf, y_val_pred_tfidf, average='weighted')

print(f"F1: {f1score:.4f}")

#joblib.dump(mlp, 'mlp_model.pkl')

Iteration 1, loss = 0.68409021
Validation score: 0.569787
Iteration 2, loss = 0.68098069
Validation score: 0.569787
Iteration 3, loss = 0.67682423
Validation score: 0.569787
Iteration 4, loss = 0.67179675
Validation score: 0.569787
Iteration 5, loss = 0.66428722
Validation score: 0.569787
Iteration 6, loss = 0.65120345
Validation score: 0.604269
Iteration 7, loss = 0.63107876
Validation score: 0.715928
Iteration 8, loss = 0.59697963
Validation score: 0.802956
Iteration 9, loss = 0.54869654
Validation score: 0.793103
Iteration 10, loss = 0.48811557
Validation score: 0.817734
Iteration 11, loss = 0.43371778
Validation score: 0.817734
Iteration 12, loss = 0.38831655
Validation score: 0.809524
Iteration 13, loss = 0.35004480
Validation score: 0.816092
Iteration 14, loss = 0.31513672
Validation score: 0.817734
Iteration 15, loss = 0.27917737
Validation score: 0.809524
Iteration 16, loss = 0.24460711
Validation score: 0.789819
Iteration 17, loss = 0.21294929
Validation score: 0.816092
Iterat

NameError: name 'mlp' is not defined