In [10]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Load datasets
train_data = pd.read_csv('datasets/Video_games_esrb_rating.csv')  # Adjust to your file path for training data
test_data = pd.read_csv('datasets/test_esrb.csv')  # Adjust to your file path for test data

# Preprocess datasets
# Assuming 'esrb_rating' is the target for both datasets. Adjust as necessary.
X_train = train_data.drop(columns=['esrb_rating', 'title'])
y_train = train_data['esrb_rating']
X_test = test_data.drop(columns=['esrb_rating', 'title'])
y_test = test_data['esrb_rating']

# Encode target labels
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_test_encoded = label_encoder.transform(y_test)

# Standardize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define the neural network
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return x * (1 - x)

def initialize_weights(input_size, hidden_size, output_size):
    weights_input_hidden = np.random.uniform(-1, 1, (input_size, hidden_size))
    weights_hidden_output = np.random.uniform(-1, 1, (hidden_size, output_size))
    return weights_input_hidden, weights_hidden_output

def forward_backward_propagation(X, y, weights_input_hidden, weights_hidden_output, learning_rate):
    # Forward pass
    hidden_layer_input = np.dot(X, weights_input_hidden)
    hidden_layer_output = sigmoid(hidden_layer_input)
    output_layer_input = np.dot(hidden_layer_output, weights_hidden_output)
    predicted_output = sigmoid(output_layer_input)
    
    # Calculate error
    error = y - predicted_output
    
    # Backward pass
    d_predicted_output = error * sigmoid_derivative(predicted_output)
    error_hidden_layer = d_predicted_output.dot(weights_hidden_output.T)
    d_hidden_layer = error_hidden_layer * sigmoid_derivative(hidden_layer_output)
    
    # Update weights
    weights_hidden_output += hidden_layer_output.T.dot(d_predicted_output) * learning_rate
    weights_input_hidden += X.T.dot(d_hidden_layer) * learning_rate
    
    return weights_input_hidden, weights_hidden_output, np.mean(np.abs(error))

# Neural network parameters
input_size = X_train_scaled.shape[1]
hidden_size = 10  # Adjust based on dataset complexity
output_size = len(np.unique(y_train_encoded))
learning_rate = 0.01
epochs = 1000  # Adjust based on convergence

# Initialize weights
weights_input_hidden, weights_hidden_output = initialize_weights(input_size, hidden_size, output_size)

# Train neural network
for epoch in range(epochs):
    weights_input_hidden, weights_hidden_output, avg_error = forward_backward_propagation(X_train_scaled, y_train_encoded.reshape(-1, 1), weights_input_hidden, weights_hidden_output, learning_rate)
    if epoch % 100 == 0:
        print(f"Epoch {epoch}, Error: {avg_error}")

# Evaluate neural network
hidden_layer_input_test = np.dot(X_test_scaled, weights_input_hidden)
hidden_layer_output_test = sigmoid(hidden_layer_input_test)
output_layer_input_test = np.dot(hidden_layer_output_test, weights_hidden_output)
predicted_output_test = sigmoid(output_layer_input_test)

# Get class predictions from the probabilities
predictions = np.argmax(predicted_output_test, axis=1)

# Print shapes for debugging
print("Shape of y_test_encoded:", y_test_encoded.shape)
print("Shape of predictions:", predictions.shape)

# Assert that the shapes match
assert y_test_encoded.shape == predictions.shape, "Mismatch in shape between y_test_encoded and predictions"

# Calculate metrics
accuracy = accuracy_score(y_test_encoded, predictions)
precision = precision_score(y_test_encoded, predictions, average='macro', zero_division=0)
recall = recall_score(y_test_encoded, predictions, average='macro', zero_division=0)
f1 = f1_score(y_test_encoded, predictions, average='macro', zero_division=0)

print(f"Accuracy: {accuracy}, Precision: {precision}, Recall: {recall}, F1 Score: {f1}")


Epoch 0, Error: 1.4978422159294185
Epoch 100, Error: 1.1509453183076181
Epoch 200, Error: 1.1508913910610474
Epoch 300, Error: 1.1491424240716632
Epoch 400, Error: 0.9826396927341013
Epoch 500, Error: 0.956928711335942
Epoch 600, Error: 0.9532990703746113
Epoch 700, Error: 0.9513502934364741
Epoch 800, Error: 0.9503375750604631
Epoch 900, Error: 0.9497744187936084
Shape of y_test_encoded: (500,)
Shape of predictions: (500,)
Accuracy: 0.344, Precision: 0.40945125591220094, Recall: 0.42205486542443066, F1 Score: 0.32561595515459407
