In [302]:
import pandas as pd
import pickle
import numpy as np
import keras
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from keras import layers
from keras.optimizers import Adam
from keras.utils import to_categorical

kd = pd.read_csv('online_gaming_missing.csv')
kd.columns
kd.isnull().any()

PlayerID                     True
Age                          True
Gender                       True
Location                     True
GameGenre                    True
PlayTimeHours                True
InGamePurchases              True
GameDifficulty               True
SessionsPerWeek              True
AvgSessionDurationMinutes    True
PlayerLevel                  True
AchievementsUnlocked         True
EngagementLevel              True
dtype: bool

In [303]:
kd = kd.dropna()
for col in kd.columns:
    print(kd[col].unique())

[ 9026.  9033.  9036. ... 49007. 49009. 49024.]
[21. 23. 34. 22. 38. 37. 31. 16. 20. 42. 15. 26. 46. 36. 41. 18. 30. 43.
 40. 44. 19. 47. 17. 27. 39. 28. 45. 49. 25. 48. 29. 35. 24. 33. 32.]
['Male' 'Female']
['USA' 'Europe' 'Other' 'Asia']
['Strategy' 'Simulation' 'Action' 'RPG' 'Sports']
[ 4.89834249 18.96432413 13.77645119 ... 21.23248526 19.16611234
 19.73464349]
[0. 1.]
['Medium' 'Hard' 'Easy']
[12. 19. 17.  7. 11. 13.  3.  9.  0.  5. 16.  2.  8.  1.  4. 10. 14. 18.
  6. 15.]
[118.  48.  28. 167.  79. 131. 102. 147. 124.  27.  19. 135. 146.  90.
  57. 175.  84.  88.  93.  91.  71.  61.  95.  14. 127.  44.  86. 166.
 138.  64. 177.  92.  72.  66. 123. 111. 153. 145.  18. 108.  24. 170.
 103. 119.  41.  50.  62. 107. 156.  96. 122.  81.  23.  34.  35. 161.
 160.  36. 109.  97.  69. 164. 155.  32. 162.  25.  54.  67. 114. 129.
 143.  55.  43. 140. 126.  56.  30. 152.  46.  60.  39. 134. 178.  94.
  87. 136. 116.  63. 139. 101.  51.  45. 144.  75.  53. 173. 121. 142.
  16.  83. 115.  

In [304]:
def map_data(column, value):
    mapping_dict = {
        "Gender": {"Male": 0, "Female": 1},
        "Location": {"USA": 0, "Europe": 1, "Asia": 2, "Other": 3},
        "GameGenre": {'Strategy': 0, 'Simulation': 1, 'Action': 2, 'RPG': 3, 'Sports': 4},
        "GameDifficulty": {'Easy': 0, 'Medium': 1, 'Hard': 2},
        "EngagementLevel": {'Low': 0, 'Medium': 1, 'High': 2}
    }
    return mapping_dict[column].get(value)

for column in kd.columns:
    if kd[column].dtype == 'object':
        print(column)
        kd[column] = kd[column].apply(lambda x: map_data(column, x))
kd = kd.astype(int)
for col in kd.columns:
    print(kd[col].unique())

Gender
Location
GameGenre
GameDifficulty
EngagementLevel
[ 9026  9033  9036 ... 49007 49009 49024]
[21 23 34 22 38 37 31 16 20 42 15 26 46 36 41 18 30 43 40 44 19 47 17 27
 39 28 45 49 25 48 29 35 24 33 32]
[0 1]
[0 1 3 2]
[0 1 2 3 4]
[ 4 18 13 12  2 10 19 16  1 20  3 14  9 22  8 11  5 21  7 17  6 15 23  0]
[0 1]
[1 2 0]
[12 19 17  7 11 13  3  9  0  5 16  2  8  1  4 10 14 18  6 15]
[118  48  28 167  79 131 102 147 124  27  19 135 146  90  57 175  84  88
  93  91  71  61  95  14 127  44  86 166 138  64 177  92  72  66 123 111
 153 145  18 108  24 170 103 119  41  50  62 107 156  96 122  81  23  34
  35 161 160  36 109  97  69 164 155  32 162  25  54  67 114 129 143  55
  43 140 126  56  30 152  46  60  39 134 178  94  87 136 116  63 139 101
  51  45 144  75  53 173 121 142  16  83 115  74 179  15 154 157 163 112
 151  29  59  13 106  33 176  42 159  68 158 137 130  76 171 172  73 132
 149 148  47  20 169  99 113 100 141  38  52 165  21 104  10  40 120  89
  82  80  17  49  70 133  37 11

In [305]:
X = kd.drop(columns=['EngagementLevel', 'PlayTimeHours', 'PlayerID'])
y = kd['EngagementLevel']

scaler = MinMaxScaler()
X_scaled = pd.DataFrame(scaler.fit_transform(X), columns=X.columns)
X_scaled

Unnamed: 0,Age,Gender,Location,GameGenre,InGamePurchases,GameDifficulty,SessionsPerWeek,AvgSessionDurationMinutes,PlayerLevel,AchievementsUnlocked
0,0.176471,0.0,0.000000,0.00,0.0,0.5,0.631579,0.639053,0.714286,0.530612
1,0.235294,0.0,0.000000,0.25,0.0,1.0,1.000000,0.224852,0.622449,0.857143
2,0.558824,0.0,0.333333,0.50,0.0,1.0,1.000000,0.106509,0.795918,0.836735
3,0.235294,0.0,1.000000,0.25,1.0,0.5,0.894737,0.928994,0.724490,0.489796
4,0.205882,0.0,0.333333,0.75,1.0,0.0,0.368421,0.408284,0.459184,0.653061
...,...,...,...,...,...,...,...,...,...,...
4809,0.588235,1.0,0.000000,0.00,1.0,0.0,0.052632,0.763314,0.030612,0.122449
4810,0.676471,0.0,0.333333,1.00,0.0,0.0,0.368421,0.171598,0.561224,0.734694
4811,0.441176,0.0,0.666667,0.50,0.0,0.5,0.263158,0.272189,0.765306,0.632653
4812,0.705882,0.0,0.000000,0.25,0.0,1.0,0.631579,0.325444,0.744898,0.244898


In [306]:
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)
num_classes = len(np.unique(y))
y_train = to_categorical(y_train, num_classes=num_classes)

y_test = to_categorical(y_test, num_classes=num_classes)

In [307]:
model = keras.Sequential([
    layers.Input(shape=(X_train.shape[1],)),
    layers.Dense(128, activation="relu"),
    layers.Dropout(0.3),
    layers.Dense(64, activation="relu"),
    layers.Dropout(0.2),
    layers.Dense(32, activation="relu"),
    layers.Dense(3, activation="softmax")
])
# Use a lower learning rate for better convergence
optimizer = Adam(learning_rate=0.001)

model.compile(
    optimizer=optimizer,
    loss='categorical_crossentropy',
    metrics=['accuracy']
)
history = model.fit(
    X_train, y_train,
    epochs=20, 
    batch_size = 16,
    validation_split = 0.2

)

test_loss, test_accuracy = model.evaluate(X_test, y_test)
from math import ceil
print(f"Batch : {ceil(((len(X_train) - int((len(X_train) * 0.2)))) / 32)}")
print(f"Test accuracy: {test_accuracy:.2f}")

Epoch 1/20
[1m193/193[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.4811 - loss: 1.0095 - val_accuracy: 0.7743 - val_loss: 0.6566
Epoch 2/20
[1m193/193[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7042 - loss: 0.7138 - val_accuracy: 0.8210 - val_loss: 0.5729
Epoch 3/20
[1m193/193[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7847 - loss: 0.5965 - val_accuracy: 0.8495 - val_loss: 0.5205
Epoch 4/20
[1m193/193[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.8063 - loss: 0.5394 - val_accuracy: 0.8560 - val_loss: 0.4905
Epoch 5/20
[1m193/193[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7887 - loss: 0.5410 - val_accuracy: 0.8807 - val_loss: 0.4591
Epoch 6/20
[1m193/193[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.8268 - loss: 0.4789 - val_accuracy: 0.8482 - val_loss: 0.4634
Epoch 7/20
[1m193/193[0m 

In [308]:
#save
model.save('fnn_model.keras')
pickle.dump(scaler, open('fnn_scaler.pkl', 'wb'))