In [14]:
#Importing necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

#Loading the dataset
df = pd.read_csv(r"C:\Users\mibra\OneDrive\Desktop\ML Project\online_gaming_behavior_dataset.csv")


In [33]:
#Displaying the first few rows of the dataset
df.head()

Unnamed: 0,PlayerID,Age,Gender,Location,GameGenre,PlayTimeHours,InGamePurchases,GameDifficulty,SessionsPerWeek,AvgSessionDurationMinutes,PlayerLevel,AchievementsUnlocked,EngagementLevel,ChurnRisk
0,9000,43,Male,Other,Strategy,16.271119,0,Medium,6,108,79,25,Medium,0
1,9001,29,Female,USA,Strategy,5.525961,0,Medium,5,144,11,10,Medium,0
2,9002,22,Female,USA,Sports,8.223755,0,Easy,16,142,35,41,High,0
3,9003,35,Male,USA,Action,5.265351,1,Easy,9,85,57,47,Medium,0
4,9004,33,Male,Europe,Action,15.531945,0,Medium,2,131,95,37,Medium,0


In [32]:
#Exploratory Data Analysis
df.info()
df.isnull().sum()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 40034 entries, 0 to 40033
Data columns (total 14 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   PlayerID                   40034 non-null  int64  
 1   Age                        40034 non-null  int64  
 2   Gender                     40034 non-null  object 
 3   Location                   40034 non-null  object 
 4   GameGenre                  40034 non-null  object 
 5   PlayTimeHours              40034 non-null  float64
 6   InGamePurchases            40034 non-null  int64  
 7   GameDifficulty             40034 non-null  object 
 8   SessionsPerWeek            40034 non-null  int64  
 9   AvgSessionDurationMinutes  40034 non-null  int64  
 10  PlayerLevel                40034 non-null  int64  
 11  AchievementsUnlocked       40034 non-null  int64  
 12  EngagementLevel            40034 non-null  object 
 13  ChurnRisk                  40034 non-null  int

PlayerID                     0
Age                          0
Gender                       0
Location                     0
GameGenre                    0
PlayTimeHours                0
InGamePurchases              0
GameDifficulty               0
SessionsPerWeek              0
AvgSessionDurationMinutes    0
PlayerLevel                  0
AchievementsUnlocked         0
EngagementLevel              0
ChurnRisk                    0
dtype: int64

In [35]:
#Statistical summary of the dataset
df.describe()

Unnamed: 0,PlayerID,Age,PlayTimeHours,InGamePurchases,SessionsPerWeek,AvgSessionDurationMinutes,PlayerLevel,AchievementsUnlocked,ChurnRisk
count,40034.0,40034.0,40034.0,40034.0,40034.0,40034.0,40034.0,40034.0,40034.0
mean,29016.5,31.992531,12.024365,0.200854,9.471774,94.792252,49.655568,24.526477,0.257881
std,11556.964675,10.043227,6.914638,0.400644,5.763667,49.011375,28.588379,14.430726,0.437474
min,9000.0,15.0,0.000115,0.0,0.0,10.0,1.0,0.0,0.0
25%,19008.25,23.0,6.067501,0.0,4.0,52.0,25.0,12.0,0.0
50%,29016.5,32.0,12.008002,0.0,9.0,95.0,49.0,25.0,0.0
75%,39024.75,41.0,17.963831,0.0,14.0,137.0,74.0,37.0,1.0
max,49033.0,49.0,23.999592,1.0,19.0,179.0,99.0,49.0,1.0


In [15]:
#Preparing the data
df['ChurnRisk'] = df['EngagementLevel'].apply(lambda x: 1 if x == 'Low' else 0)
X = df.drop(['PlayerID', 'EngagementLevel', 'ChurnRisk'], axis=1)
y = df['ChurnRisk']

In [16]:
# Encoding categorical variables
X_encoded = pd.get_dummies(X, drop_first=True)
numerical_cols = X.select_dtypes(include=np.number).columns.tolist()

In [17]:
# Splitting the dataset
X_train, X_test, y_train, y_test = train_test_split(X_encoded, y, test_size=0.3, random_state=42, stratify=y)

In [18]:
# Scaling numerical features
scaler = StandardScaler()
X_train[numerical_cols] = scaler.fit_transform(X_train[numerical_cols])
X_test[numerical_cols] = scaler.transform(X_test[numerical_cols])

In [19]:
# Training the Logistic Regression model
log_reg_model = LogisticRegression(random_state=42, solver='liblinear')
log_reg_model.fit(X_train, y_train)
y_pred = log_reg_model.predict(X_test)


In [20]:
# Evaluating the model
cm=confusion_matrix(y_test, y_pred)
print("Confusion Matrix")
print("----------------")
print("            Predicted 0   Predicted 1")
print(f"Actual 0        {cm[0][0]}            {cm[0][1]}")
print(f"Actual 1        {cm[1][0]}            {cm[1][1]}")


Confusion Matrix
----------------
            Predicted 0   Predicted 1
Actual 0        8382            532
Actual 1        966            2131


In [21]:
acc=accuracy_score(y_test,y_pred)
print("Model Accuracy")
print("--------------")
print(f"Accuracy: {acc:.2%}")

Model Accuracy
--------------
Accuracy: 87.53%


In [22]:
print("Classification Report")
print("---------------------")
report = classification_report(y_test, y_pred, target_names=['No Churn', 'Churn'])
print(report)

Classification Report
---------------------
              precision    recall  f1-score   support

    No Churn       0.90      0.94      0.92      8914
       Churn       0.80      0.69      0.74      3097

    accuracy                           0.88     12011
   macro avg       0.85      0.81      0.83     12011
weighted avg       0.87      0.88      0.87     12011



In [26]:
# Saving the model and scaler
import joblib

joblib.dump(log_reg_model, r"C:\Users\mibra\OneDrive\Desktop\ML Project\churn.pkl")
joblib.dump(scaler, r"C:\Users\mibra\OneDrive\Desktop\ML Project\scaler.pkl")
joblib.dump(X_encoded.columns.tolist(),r"C:\Users\mibra\OneDrive\Desktop\ML Project\model_columns.pkl")


['C:\\Users\\mibra\\OneDrive\\Desktop\\ML Project\\model_columns.pkl']

In [28]:
# Building the Gradio web application
import gradio as gr
import pandas as pd
import numpy as np
import joblib

# Loading the saved model, scaler, and model columns
model = joblib.load(r"C:\Users\mibra\Downloads\archive\churn.pkl")
scaler = joblib.load(r"C:\Users\mibra\Downloads\archive\scaler.pkl")
model_columns = joblib.load(r"C:\Users\mibra\Downloads\archive\model_columns.pkl")

In [27]:
# Prediction function
def predict_churn(
    age,
    gender,
    location,
    game_genre,
    playtime,
    purchases,
    game_difficulty,
    sessions,
    avg_session,
    player_level,
    achievements
):

    input_df = pd.DataFrame([{
        "Age": age,
        "Gender": gender,
        "Location": location,
        "GameGenre": game_genre,
        "PlayTimeHours": playtime,
        "InGamePurchases": purchases,
        "GameDifficulty": game_difficulty,
        "SessionsPerWeek": sessions,
        "AvgSessionDurationMinutes": avg_session,
        "PlayerLevel": player_level,
        "AchievementsUnlocked": achievements
    }])


    input_encoded = pd.get_dummies(input_df)


    input_encoded = input_encoded.reindex(columns=model_columns, fill_value=0)


    numerical_cols = input_df.select_dtypes(include=np.number).columns
    input_encoded[numerical_cols] = scaler.transform(
        input_encoded[numerical_cols]
    )

    churn_prob = model.predict_proba(input_encoded)[0][1]


    if churn_prob > 0.7:
        action = "High Risk: Offer discounts or free rewards"
    elif churn_prob > 0.4:
        action = "Medium Risk: Personalized missions & reminders"
    else:
        action = "Low Risk: Loyalty rewards"

    return round(churn_prob, 3), action


In [29]:
# Creating the Gradio interface
app = gr.Interface(
    fn=predict_churn,
    inputs=[
        gr.Number(label="Age"),
        gr.Dropdown(["Male", "Female"], label="Gender"),
        gr.Dropdown(["North America", "Europe", "Asia", "Other"], label="Location"),
        gr.Dropdown(["Action", "Strategy", "RPG", "Sports"], label="Game Genre"),
        gr.Number(label="Play Time (Hours)"),
        gr.Number(label="In-Game Purchases"),
        gr.Dropdown(["Easy", "Medium", "Hard"], label="Game Difficulty"),
        gr.Number(label="Sessions Per Week"),
        gr.Number(label="Avg Session Duration (Minutes)"),
        gr.Number(label="Player Level"),
        gr.Number(label="Achievements Unlocked")
    ],
    outputs=[
        gr.Number(label="Churn Risk Probability"),
        gr.Textbox(label="Retention Recommendation")
    ],
    title="ðŸŽ® Player Churn Risk Prediction",
    description="Logistic Regression model based on engagement behavior."
)

app.launch()


* Running on local URL:  http://127.0.0.1:7862
* To create a public link, set `share=True` in `launch()`.


