In [None]:
import sys

# Select path to current folder and split by \\
main_path = sys.path[0].split("\\")

# Asssign path to parent folder
# path_to_parent allows access to any folder from within parent folder, no matter the location of this file within the parent folder
# i.e.: Don't need to specify "../" x amount of times
path_to_parent = []
for element in main_path:
    path_to_parent.append(element)
    if "Fake_Users_Movies_Classifier" == element:
        break

path_to_parent = "\\".join(path_to_parent)

# Add path to feature generation folder
sys.path.append(path_to_parent+"\\feature_generation")

In [None]:
# Import feature generator for week 1
from feature_gen_wk1 import feature_gen

# Create string path to labelled data
path_to_file = path_to_parent + "/data/labelled_data/first_batch_with_labels_likes.npz"
# Generate features from file
df_final = feature_gen().retrieveAndGenerate(path_to_file)

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score

# Splitting the data into training and validation sets
features = df_final.columns.difference(['user', 'label'])
X = df_final[features]
y = df_final['label']
X_train, X_val, y_train, y_val = train_test_split(
    X, y, test_size=0.2, random_state=11, stratify=y)

# Scaling the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)

# Define the hyperparameters for a 2-layer neural network
hidden_layer_sizes = (50, 10)  # The number of neurons in each hidden layer
activation = 'tanh'  # Activation function for the hidden layers ('logistic', 'tanh', 'relu', etc.)
solver = 'adam'  # The optimization algorithm ('adam', 'sgd', 'lbfgs', etc.)
alpha = 0.0001  # L2 regularization parameter
learning_rate = 'adaptive'  # The learning rate schedule for weight updates ('constant', 'invscaling', 'adaptive')
max_iter = 2000  # Maximum number of iterations
random_state = 44  # Seed for random initialization

# Construct the 2-layer neural network given the above parameters
mlp = MLPClassifier(
    hidden_layer_sizes=hidden_layer_sizes,
    activation=activation,
    solver=solver,
    alpha=alpha,
    learning_rate=learning_rate,
    max_iter=max_iter,
    random_state=random_state,
    batch_size=410,
    beta_1=0.7,
    beta_2=0.994
)
# Train neural network
mlp.fit(X_train_scaled, y_train)

# Predicting probabilities for the validation set
mlp_probs = mlp.predict_proba(X_val_scaled)[:, 1]
mlp_auc = roc_auc_score(y_val, mlp_probs)

# Convert probabilities to binary predictions using a threshold (e.g., 0.5)
mlp_preds = (mlp_probs >= 0.50).astype(int)

# Calculate precision, recall, and F1-score
precision = precision_score(y_val, mlp_preds)
recall = recall_score(y_val, mlp_preds)
f1 = f1_score(y_val, mlp_preds)

print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1)
print("ROC AUC for 2-layer Neural Network:", mlp_auc)