# 📊 Train Difficulty Predictor
This notebook trains a model to classify game difficulty (easy/medium/hard) based on gameplay data.

In [None]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
import joblib

In [None]:
# Load the dataset
df = pd.read_csv("game_stats.csv")
df.head()

In [None]:
# Define difficulty labels based on time_taken and guesses_used
def label_difficulty(row):
    if row['time_taken'] <= 30 and row['guesses_used'] <= 5:
        return 'easy'
    elif row['time_taken'] <= 60:
        return 'medium'
    else:
        return 'hard'

df['difficulty_label'] = df.apply(label_difficulty, axis=1)
df[['time_taken', 'guesses_used', 'difficulty_label']].head()

In [None]:
# Features and target
features = ['time_taken', 'guesses_used', 'hints_used', 'code_length', 'allow_duplicates']
X = df[features]
y = df['difficulty_label']

# Convert boolean to int
X['allow_duplicates'] = X['allow_duplicates'].astype(int)

In [None]:
# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train classifier
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

In [None]:
# Evaluate model
predictions = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, predictions))
print("Classification Report:\n", classification_report(y_test, predictions))

In [None]:
# Save model
joblib.dump(model, "difficulty_predictor.pkl")
print("Model saved as difficulty_predictor.pkl")