In [1]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score

# Load merged dataset
df = pd.read_csv('merged_steam_data.csv')

# Create binary target: popular if ccu > 5000
df['popular'] = (df['ccu'] > 5000).astype(int)

# Encode the 'name' column into numeric labels
le = LabelEncoder()
df['name_encoded'] = le.fit_transform(df['name'])

# Select features for training
feature_columns = ['price_x', 'name_encoded']  # You can add more features here
X = df[feature_columns].values
y = df['popular'].values

# Scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split into train/test sets
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42
)

# Define and train the ANN using scikit-learn
model = MLPClassifier(
    hidden_layer_sizes=(64, 32),
    activation='relu',
    alpha=0.01,                # L2 regularization (penalty term)
    batch_size=32,
    learning_rate_init=0.001,
    max_iter=200,
    random_state=42
)

print("Training the MLPClassifier...")
model.fit(X_train, y_train)

# Predict and evaluate
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print(f"\nTest Accuracy: {accuracy:.4f}")


KeyError: 'name'