In [24]:
# import necessary libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import gradio as gr
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score


In [4]:
# load the dataset
df = pd.read_csv(r"C:\Users\mibra\OneDrive\Desktop\ML Project\online_gaming_behavior_dataset.csv")

In [5]:
# Do necessary data exploration
df.head()

Unnamed: 0,PlayerID,Age,Gender,Location,GameGenre,PlayTimeHours,InGamePurchases,GameDifficulty,SessionsPerWeek,AvgSessionDurationMinutes,PlayerLevel,AchievementsUnlocked,EngagementLevel
0,9000,43,Male,Other,Strategy,16.271119,0,Medium,6,108,79,25,Medium
1,9001,29,Female,USA,Strategy,5.525961,0,Medium,5,144,11,10,Medium
2,9002,22,Female,USA,Sports,8.223755,0,Easy,16,142,35,41,High
3,9003,35,Male,USA,Action,5.265351,1,Easy,9,85,57,47,Medium
4,9004,33,Male,Europe,Action,15.531945,0,Medium,2,131,95,37,Medium


In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 40034 entries, 0 to 40033
Data columns (total 13 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   PlayerID                   40034 non-null  int64  
 1   Age                        40034 non-null  int64  
 2   Gender                     40034 non-null  object 
 3   Location                   40034 non-null  object 
 4   GameGenre                  40034 non-null  object 
 5   PlayTimeHours              40034 non-null  float64
 6   InGamePurchases            40034 non-null  int64  
 7   GameDifficulty             40034 non-null  object 
 8   SessionsPerWeek            40034 non-null  int64  
 9   AvgSessionDurationMinutes  40034 non-null  int64  
 10  PlayerLevel                40034 non-null  int64  
 11  AchievementsUnlocked       40034 non-null  int64  
 12  EngagementLevel            40034 non-null  object 
dtypes: float64(1), int64(7), object(5)
memory usag

In [7]:
df.shape

(40034, 13)

In [8]:
df.describe()

Unnamed: 0,PlayerID,Age,PlayTimeHours,InGamePurchases,SessionsPerWeek,AvgSessionDurationMinutes,PlayerLevel,AchievementsUnlocked
count,40034.0,40034.0,40034.0,40034.0,40034.0,40034.0,40034.0,40034.0
mean,29016.5,31.992531,12.024365,0.200854,9.471774,94.792252,49.655568,24.526477
std,11556.964675,10.043227,6.914638,0.400644,5.763667,49.011375,28.588379,14.430726
min,9000.0,15.0,0.000115,0.0,0.0,10.0,1.0,0.0
25%,19008.25,23.0,6.067501,0.0,4.0,52.0,25.0,12.0
50%,29016.5,32.0,12.008002,0.0,9.0,95.0,49.0,25.0
75%,39024.75,41.0,17.963831,0.0,14.0,137.0,74.0,37.0
max,49033.0,49.0,23.999592,1.0,19.0,179.0,99.0,49.0


In [9]:
df.columns

Index(['PlayerID', 'Age', 'Gender', 'Location', 'GameGenre', 'PlayTimeHours',
       'InGamePurchases', 'GameDifficulty', 'SessionsPerWeek',
       'AvgSessionDurationMinutes', 'PlayerLevel', 'AchievementsUnlocked',
       'EngagementLevel'],
      dtype='object')

In [10]:
# Labeling churn risk based on engagement level
df['ChurnRisk'] = df['EngagementLevel'].apply(lambda x: 1 if x == 'Low' else 0)

In [11]:
# Puting input features and target variable
X = df.drop(['PlayerID', 'EngagementLevel', 'ChurnRisk'], axis=1)
y = df['ChurnRisk']

In [12]:
# Converting categorical variables to dummy/indicator variables
x = pd.get_dummies(X, drop_first=True)

In [13]:
# Splitting the dataset into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [14]:
scaler = StandardScaler()

In [15]:
# Scaling the features
x_train_scaled = scaler.fit_transform(x_train)
x_test_scaled = scaler.transform(x_test)

In [16]:
lr = LogisticRegression()

In [17]:
# Fitting the model
lr.fit(x_train_scaled,y_train)

0,1,2
,penalty,'l2'
,dual,False
,tol,0.0001
,C,1.0
,fit_intercept,True
,intercept_scaling,1
,class_weight,
,random_state,
,solver,'lbfgs'
,max_iter,100


In [18]:
# Making predictions
predict = lr.predict(x_test_scaled)

In [19]:
# Evaluating the model
cm=confusion_matrix(y_test,predict)
print("Confusion Matrix")
print("----------------")
print("            Predicted 0   Predicted 1")
print(f"Actual 0        {cm[0][0]}            {cm[0][1]}")
print(f"Actual 1        {cm[1][0]}            {cm[1][1]}")

Confusion Matrix
----------------
            Predicted 0   Predicted 1
Actual 0        5571            343
Actual 1        638            1455


In [20]:
acc=accuracy_score(y_test,predict)
print("Model Accuracy")
print("--------------")
print(f"Accuracy: {acc:.2%}")

Model Accuracy
--------------
Accuracy: 87.75%


In [21]:
print("Classification Report")
print("---------------------")
report = classification_report(y_test, predict, target_names=['No Churn', 'Churn'])
print(report)

Classification Report
---------------------
              precision    recall  f1-score   support

    No Churn       0.90      0.94      0.92      5914
       Churn       0.81      0.70      0.75      2093

    accuracy                           0.88      8007
   macro avg       0.85      0.82      0.83      8007
weighted avg       0.87      0.88      0.87      8007



In [None]:
# Save the trained model
import joblib as jb
jb.dump(lr, r"C:\Users\mibra\OneDrive\Desktop\ML Project\online_gaming_behavior_dataset.pkl") 

['C:\\Users\\mibra\\OneDrive\\Desktop\\ML Project\\online_gaming_behavior_dataset.pkl']

In [None]:
# Load the trained model
model = jb.load(r"C:\Users\mibra\OneDrive\Desktop\ML Project\online_gaming_behavior_dataset.pkl")

In [25]:
# Function to predict churn risk and suggest actions
def predict_churn(
    Age,Gender,Location, GameGenre, PlayTimeHours, InGamePurchases, GameDifficulty, SessionsPerWeek, AvgSessionDurationMinutes,
    PlayerLevel, AchievementsUnlocked):
    input_df = pd.DataFrame([{
        "Age": Age,
        "Gender": Gender,
        "Location": Location,
        "GameGenre": GameGenre,
        "PlayTimeHours": PlayTimeHours,
        "InGamePurchases": InGamePurchases,
        "GameDifficulty": GameDifficulty,
        "SessionsPerWeek": SessionsPerWeek,
        "AvgSessionDurationMinutes": AvgSessionDurationMinutes,
        "PlayerLevel": PlayerLevel,
        "AchievementsUnlocked": AchievementsUnlocked
    }])
    input_encoded = pd.get_dummies(input_df)
    churn_prob = model.predict_proba(input_encoded)[0][1]

    if churn_prob > 0.7:
        action = "High Risk: Offer discounts or free rewards"
    elif churn_prob > 0.4:
        action = "Medium Risk: Personalized missions & reminders"
    else:
        action = "Low Risk: Loyalty rewards"

    return round(churn_prob, 3), action


In [26]:
# Define the input and output components for Gradio
inputs = [
    gr.Number(label="Age"),
    gr.Dropdown(["Male", "Female"], label="Gender"),
    gr.Dropdown(["North America", "Europe", "Asia", "Other"], label="Location"),
    gr.Dropdown(["Action", "Strategy", "RPG", "Sports"], label="Game Genre"),
    gr.Number(label="Play Time (Hours)"),
    gr.Number(label="In-Game Purchases"),
    gr.Dropdown(["Easy", "Medium", "Hard"], label="Game Difficulty"),
    gr.Number(label="Sessions Per Week"),
    gr.Number(label="Avg Session Duration (Minutes)"),
    gr.Number(label="Player Level"),
    gr.Number(label="Achievements Unlocked")
]

outputs = [
    gr.Number(label="Churn Risk Probability"),
    gr.Textbox(label="Retention Recommendation")
]

# Launch the interface
gr.Interface(
    fn=predict_churn,
    inputs=inputs,
    outputs=outputs,
    title="ðŸŽ® Player Churn Risk Prediction",
    description="Predicts the likelihood of a player churning based on engagement behavior."
).launch()


* Running on local URL:  http://127.0.0.1:7860
* To create a public link, set `share=True` in `launch()`.


