# Online Gaming Engagement Prediction using XGBoost

In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from xgboost import XGBClassifier
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix


In [4]:
file_path = "C:/Users/Micha/Desktop/Data/online_gaming_behavior_dataset.csv"
df = pd.read_csv(file_path)
df.head()


Unnamed: 0,PlayerID,Age,Gender,Location,GameGenre,PlayTimeHours,InGamePurchases,GameDifficulty,SessionsPerWeek,AvgSessionDurationMinutes,PlayerLevel,AchievementsUnlocked,EngagementLevel
0,9000,43,Male,Other,Strategy,16.271119,0,Medium,6,108,79,25,Medium
1,9001,29,Female,USA,Strategy,5.525961,0,Medium,5,144,11,10,Medium
2,9002,22,Female,USA,Sports,8.223755,0,Easy,16,142,35,41,High
3,9003,35,Male,USA,Action,5.265351,1,Easy,9,85,57,47,Medium
4,9004,33,Male,Europe,Action,15.531945,0,Medium,2,131,95,37,Medium


In [5]:
print(df.dtypes)
print("\nMissing values in each column:")
print(df.isnull().sum())


PlayerID                       int64
Age                            int64
Gender                        object
Location                      object
GameGenre                     object
PlayTimeHours                float64
InGamePurchases                int64
GameDifficulty                object
SessionsPerWeek                int64
AvgSessionDurationMinutes      int64
PlayerLevel                    int64
AchievementsUnlocked           int64
EngagementLevel               object
dtype: object

Missing values in each column:
PlayerID                     0
Age                          0
Gender                       0
Location                     0
GameGenre                    0
PlayTimeHours                0
InGamePurchases              0
GameDifficulty               0
SessionsPerWeek              0
AvgSessionDurationMinutes    0
PlayerLevel                  0
AchievementsUnlocked         0
EngagementLevel              0
dtype: int64


In [6]:
df = pd.get_dummies(df, columns=['Gender', 'Location', 'GameGenre', 'InGamePurchases', 'GameDifficulty'], drop_first=True)
df.head()


Unnamed: 0,PlayerID,Age,PlayTimeHours,SessionsPerWeek,AvgSessionDurationMinutes,PlayerLevel,AchievementsUnlocked,EngagementLevel,Gender_Male,Location_Europe,Location_Other,Location_USA,GameGenre_RPG,GameGenre_Simulation,GameGenre_Sports,GameGenre_Strategy,InGamePurchases_1,GameDifficulty_Hard,GameDifficulty_Medium
0,9000,43,16.271119,6,108,79,25,Medium,True,False,True,False,False,False,False,True,False,False,True
1,9001,29,5.525961,5,144,11,10,Medium,False,False,False,True,False,False,False,True,False,False,True
2,9002,22,8.223755,16,142,35,41,High,False,False,False,True,False,False,True,False,False,False,False
3,9003,35,5.265351,9,85,57,47,Medium,True,False,False,True,False,False,False,False,True,False,False
4,9004,33,15.531945,2,131,95,37,Medium,True,True,False,False,False,False,False,False,False,False,True


In [7]:
label_encoder = LabelEncoder()
df['EngagementLevel_encoded'] = label_encoder.fit_transform(df['EngagementLevel'])


In [8]:
X = df.drop(['PlayerID', 'EngagementLevel', 'EngagementLevel_encoded'], axis=1)
y = df['EngagementLevel_encoded']


In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [10]:
model = XGBClassifier(eval_metric='mlogloss')
model.fit(X_train, y_train)


In [11]:
y_pred = model.predict(X_test)

print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("Accuracy:", accuracy_score(y_test, y_pred))


Confusion Matrix:
 [[1795   68  172]
 [  62 1846  185]
 [ 105  119 3655]]

Classification Report:
               precision    recall  f1-score   support

           0       0.91      0.88      0.90      2035
           1       0.91      0.88      0.89      2093
           2       0.91      0.94      0.93      3879

    accuracy                           0.91      8007
   macro avg       0.91      0.90      0.91      8007
weighted avg       0.91      0.91      0.91      8007

Accuracy: 0.9112026976395654


In [12]:
print("\n--- Predict Engagement Level ---")
input_data = {}

input_data['PlayTimeHours'] = float(input("Enter PlayTimeHours: "))
input_data['SessionsPerWeek'] = int(input("Enter SessionsPerWeek: "))
input_data['AvgSessionDurationMinutes'] = float(input("Enter AvgSessionDurationMinutes: "))
input_data['PlayerLevel'] = int(input("Enter PlayerLevel: "))
input_data['AchievementsUnlocked'] = int(input("Enter AchievementsUnlocked: "))
input_data['Age'] = int(input("Enter Age: "))
input_data['InGamePurchases_1'] = int(input("Did the player make in-game purchases? (1=yes, 0=no): "))

gender = input("Is the player Male? (1=yes, 0=no): ").strip()
input_data['Gender_Male'] = int(gender)

location = input("Where is the player from? (Europe/Other/USA): ").strip().lower()
input_data['Location_Europe'] = 1 if location == 'europe' else 0
input_data['Location_Other'] = 1 if location == 'other' else 0
input_data['Location_USA'] = 1 if location == 'usa' else 0

genre = input("What is the game genre? (RPG/Simulation/Sports/Strategy/Other): ").strip().lower()
input_data['GameGenre_RPG'] = 1 if genre == 'rpg' else 0
input_data['GameGenre_Simulation'] = 1 if genre == 'simulation' else 0
input_data['GameGenre_Sports'] = 1 if genre == 'sports' else 0
input_data['GameGenre_Strategy'] = 1 if genre == 'strategy' else 0
input_data['GameGenre_Other'] = 1 if genre == 'other' else 0

difficulty = input("What is the game difficulty? (Easy/Medium/Hard): ").strip().lower()
input_data['GameDifficulty_Easy'] = 1 if difficulty == 'easy' else 0
input_data['GameDifficulty_Medium'] = 1 if difficulty == 'medium' else 0
input_data['GameDifficulty_Hard'] = 1 if difficulty == 'hard' else 0

user_df = pd.DataFrame([input_data], columns=X.columns)

user_pred = model.predict(user_df)
predicted_label = label_encoder.inverse_transform(user_pred)[0]

print(f"\n✅ Predicted Engagement Level: {predicted_label}")



--- Predict Engagement Level ---

✅ Predicted Engagement Level: Low
