In [None]:
import numpy as np
import pickle
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

## Load and Prepare Data

In [None]:
# Load training data
with open('data/training_data.pkl', 'rb') as f:
    training_data = pickle.load(f)

print(f"Total samples: {len(training_data)}")

In [None]:
# Extract features and labels
X_user = []
X_game = []
X_cross = []
y = []

for x in training_data:
    # User features
    user_feats = []
    for k, v in x['user_feature'].items():
        if isinstance(v, np.ndarray):
            user_feats.extend(v.tolist())
        else:
            user_feats.append(v)
    X_user.append(user_feats)

    # Game features
    game_feats = []
    for k, v in x['game_feature'].items():
        if isinstance(v, np.ndarray):
            game_feats.extend(v.tolist())
        else:
            game_feats.append(v)
    X_game.append(game_feats)

    # Cross features
    cross_feats = []
    for k, v in x['cross_feature'].items():
        cross_feats.append(v)
    X_cross.append(cross_feats)

    # Label
    y.append(x['label'])

# Convert to numpy arrays
X_user = np.array(X_user, dtype=np.float32)
X_game = np.array(X_game, dtype=np.float32)
X_cross = np.array(X_cross, dtype=np.float32)
y = np.array(y, dtype=np.float32)

# Concatenate all features
X = np.concatenate([X_user, X_game, X_cross], axis=1)

print(f"Feature matrix shape: {X.shape}")
print(f"Label distribution: {np.bincount(y.astype(int))}")

In [None]:
# Split into training and test sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.1, random_state=42, shuffle=True
)

print(f"Training set size: {X_train.shape[0]}")
print(f"Test set size: {X_test.shape[0]}")

## Train Random Forest Classifier

In [None]:
# Create and train the Random Forest classifier
rf_classifier = RandomForestClassifier(
    n_estimators=300,
    max_depth=None,
    random_state=42,
    n_jobs=-1,
    verbose=1
)

print("Training Random Forest...")
rf_classifier.fit(X_train, y_train)
print("Training complete!")

## Evaluate Model Performance

In [None]:
# Make predictions on test set
y_pred = rf_classifier.predict(X_test)

# Calculate accuracy
test_accuracy = accuracy_score(y_test, y_pred)
print(f"Test Accuracy: {test_accuracy:.4f}")
print(f"Test Accuracy (%): {test_accuracy * 100:.2f}%")

In [None]:
# Detailed classification report
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=['Not Recommend', 'Recommend']))

In [None]:
# Confusion matrix
print("\nConfusion Matrix:")
cm = confusion_matrix(y_test, y_pred)
print(cm)
print(f"\nTrue Negatives: {cm[0,0]}")
print(f"False Positives: {cm[0,1]}")
print(f"False Negatives: {cm[1,0]}")
print(f"True Positives: {cm[1,1]}")