In [16]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, FunctionTransformer
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, confusion_matrix

In [17]:
# Loading dataset
df = pd.read_csv("game_details_with_counts.csv")

# Ensuring 'cult_classic' is binary (1 for cult classic, 0 for not)
# Cult classic if 'average_rating' > 8 and 'users_rated' < 5000
df['cult_classic'] = (df['average_rating'] > 8) & (df['users_rated'] < 5000)

# Selecting features and target
X = df[['average_rating', 'users_rated']]  # Features for classification
y = df['cult_classic']  # Target label

In [None]:
# Splitting the dataset into training and testing sets (70% training, 30% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Log transform to handle the continuous features with MultinomialNB
# This will ensure that the features are strictly positive
log_transformer = FunctionTransformer(func=lambda x: np.log1p(x), validate=False)
X_train_log = log_transformer.fit_transform(X_train)
X_test_log = log_transformer.transform(X_test)

# Now standardizing the features 
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_log)
X_test_scaled = scaler.transform(X_test_log)



In [None]:
# Logistic Regression
logreg = LogisticRegression()
logreg.fit(X_train_scaled, y_train)
y_pred_logreg = logreg.predict(X_test_scaled)

# Multinomial Naïve Bayes
nb = MultinomialNB()
nb.fit(X_train_scaled, y_train)
y_pred_nb = nb.predict(X_test_scaled)

# Evaluating both models
print("Logistic Regression Accuracy:", accuracy_score(y_test, y_pred_logreg))
print("Naïve Bayes Accuracy:", accuracy_score(y_test, y_pred_nb))

print("\nLogistic Regression Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_logreg))

print("\nNaïve Bayes Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_nb))


ValueError: Negative values in data passed to MultinomialNB (input X)