In [15]:
# Importing necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder, Binarizer
from sklearn.naive_bayes import MultinomialNB, GaussianNB, BernoulliNB
from sklearn.metrics import confusion_matrix, accuracy_score, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report


In [None]:
# Loading data
data = pd.read_csv('game_details_with_counts.csv')


In [None]:
# Creating the 'cult_classic' feature
# Defining the thresholds for moderate ratings and high ratings
moderate_ratings_threshold = 1000
high_ratings_threshold = 8.0

# Creating the 'cult_classic' feature
data['cult_classic'] = ((data['users_rated'] > moderate_ratings_threshold) & 
                        (data['average_rating'] > high_ratings_threshold)).astype(int)

# Checking the first few rows to confirm the new feature
print(data[['name', 'users_rated', 'average_rating', 'cult_classic']].head())


                name  users_rated  average_rating  cult_classic
0              CATAN       129246         7.09753             0
1        Carcassonne       128379         7.41021             0
2           Pandemic       126656         7.53305             0
3          7 Wonders       105584         7.68019             0
4  Terraforming Mars       100639         8.35596             1


In [18]:
print(data.columns)

Index(['name', 'year_published', 'min_players', 'max_players', 'min_playtime',
       'max_playtime', 'min_age', 'categories', 'mechanics', 'users_rated',
       'average_rating', 'weight', 'category_count', 'mechanic_count',
       'cult_classic'],
      dtype='object')


In [19]:
# Using integer-based features for MultinomialNB
multinomial_features = data[['category_count', 'mechanic_count', 'min_players', 'max_players', 'min_playtime', 'max_playtime']].values
X_train_m, X_test_m, y_train_m, y_test_m = train_test_split(
    multinomial_features, data['cult_classic'], test_size=0.3, random_state=42
)

In [20]:
# Selecting only continuous fields and scaling them
gaussian_features = data[['year_published', 'weight', 'users_rated']].values
scaler = StandardScaler()
gaussian_features_scaled = scaler.fit_transform(gaussian_features)

# Splitting dataset for Gaussian Naïve Bayes
X_train_g, X_test_g, y_train_g, y_test_g = train_test_split(
    gaussian_features_scaled, data['cult_classic'], test_size=0.3, random_state=42
)

In [21]:
# Creating the 'popular_game' binary feature
data['popular_game'] = (data['average_rating'] > 7).astype(int)

# Creating a 'large_group' binary feature for games that support 4 or more players
data['large_group'] = (data['max_players'] >= 4).astype(int)

# Selecting the binary features for Bernoulli Naïve Bayes
bernoulli_features = data[['popular_game', 'large_group']].values

# Splitting dataset for Bernoulli Naïve Bayes
X_train_b, X_test_b, y_train_b, y_test_b = train_test_split(
    bernoulli_features, data['cult_classic'], test_size=0.3, random_state=42
)

In [22]:
# Multinomial Naïve Bayes
mnb = MultinomialNB()
mnb.fit(X_train_m, y_train_m)
y_pred_m = mnb.predict(X_test_m)
print("Multinomial Naïve Bayes Results:")
print("Confusion Matrix:\n", confusion_matrix(y_test_m, y_pred_m))
print("Accuracy:", accuracy_score(y_test_m, y_pred_m))
print("Classification Report:\n", classification_report(y_test_m, y_pred_m))


Multinomial Naïve Bayes Results:
Confusion Matrix:
 [[706  93]
 [ 59  42]]
Accuracy: 0.8311111111111111
Classification Report:
               precision    recall  f1-score   support

           0       0.92      0.88      0.90       799
           1       0.31      0.42      0.36       101

    accuracy                           0.83       900
   macro avg       0.62      0.65      0.63       900
weighted avg       0.85      0.83      0.84       900



In [23]:
# Gaussian Naïve Bayes
gnb = GaussianNB()
gnb.fit(X_train_g, y_train_g)
y_pred_g = gnb.predict(X_test_g)
print("\nGaussian Naïve Bayes Results:")
print("Confusion Matrix:\n", confusion_matrix(y_test_g, y_pred_g))
print("Accuracy:", accuracy_score(y_test_g, y_pred_g))
print("Classification Report:\n", classification_report(y_test_g, y_pred_g))



Gaussian Naïve Bayes Results:
Confusion Matrix:
 [[622 177]
 [ 23  78]]
Accuracy: 0.7777777777777778
Classification Report:
               precision    recall  f1-score   support

           0       0.96      0.78      0.86       799
           1       0.31      0.77      0.44       101

    accuracy                           0.78       900
   macro avg       0.64      0.78      0.65       900
weighted avg       0.89      0.78      0.81       900



In [24]:
# Bernoulli Naïve Bayes
bnb = BernoulliNB()
bnb.fit(X_train_b, y_train_b)
y_pred_b = bnb.predict(X_test_b)
print("\nBernoulli Naïve Bayes Results:")
print("Confusion Matrix:\n", confusion_matrix(y_test_b, y_pred_b))
print("Accuracy:", accuracy_score(y_test_b, y_pred_b))
print("Classification Report:\n", classification_report(y_test_b, y_pred_b))



Bernoulli Naïve Bayes Results:
Confusion Matrix:
 [[799   0]
 [101   0]]
Accuracy: 0.8877777777777778
Classification Report:
               precision    recall  f1-score   support

           0       0.89      1.00      0.94       799
           1       0.00      0.00      0.00       101

    accuracy                           0.89       900
   macro avg       0.44      0.50      0.47       900
weighted avg       0.79      0.89      0.84       900



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
