In [32]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, FunctionTransformer
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, confusion_matrix

In [33]:
# Loading dataset
df = pd.read_csv("game_details_with_counts.csv")

# Ensuring 'cult_classic' is binary (1 for cult classic, 0 for not)
# Cult classic if 'average_rating' > 8 and 'users_rated' < 5000
df['cult_classic'] = (df['average_rating'] > 8) & (df['users_rated'] < 5000)

# Selecting features and target
X = df[['average_rating', 'users_rated']]  # Features for classification
y = df['cult_classic']  # Target label

In [34]:
# Splitting the dataset into training and testing sets (70% training, 30% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Applying log transformation to ensure positive values for MultinomialNB and scaling for LogisticRegression
log_transformer = FunctionTransformer(func=lambda x: np.log1p(x), validate=False)
X_train_log = log_transformer.fit_transform(X_train)
X_test_log = log_transformer.transform(X_test)


In [35]:
# Logistic Regression 
logreg = LogisticRegression()
logreg.fit(X_train_log, y_train)
y_pred_logreg = logreg.predict(X_test_log)

# Multinomial Naïve Bayes
nb = MultinomialNB()
nb.fit(X_train_log, y_train)
y_pred_nb = nb.predict(X_test_log)




In [36]:
# Evaluating the models
print("Logistic Regression Accuracy:", accuracy_score(y_test, y_pred_logreg))
print("\nLogistic Regression Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_logreg))


Logistic Regression Accuracy: 0.9344444444444444

Logistic Regression Confusion Matrix:
[[838   0]
 [ 59   3]]


In [37]:
# Evaluating the models
print("Naïve Bayes Accuracy:", accuracy_score(y_test, y_pred_nb))
print("\nNaïve Bayes Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_nb))

Naïve Bayes Accuracy: 0.9311111111111111

Naïve Bayes Confusion Matrix:
[[838   0]
 [ 62   0]]
