In [None]:

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.ensemble import RandomForestClassifier
import matplotlib.pyplot as plt
import numpy as np

In [None]:
# Load the dataset
file_path = "/content/drive/MyDrive/Dataset /Sales_data.csv"

In [None]:
sales_data = pd.read_csv(file_path)

In [None]:

# Handling missing values (simplified for this example)
sales_data['Year_of_Release'] = sales_data['Year_of_Release'].fillna(sales_data['Year_of_Release'].median())
sales_data['Critic_Score'] = sales_data['Critic_Score'].fillna(sales_data['Critic_Score'].median())
sales_data['Critic_Count'] = sales_data['Critic_Count'].fillna(sales_data['Critic_Count'].median())
sales_data['User_Score'] = sales_data['User_Score'].replace('tbd', None)
sales_data['User_Score'] = pd.to_numeric(sales_data['User_Score'], errors='coerce')
sales_data['User_Score'] = sales_data['User_Score'].fillna(sales_data['User_Score'].median())
sales_data['User_Count'] = sales_data['User_Count'].fillna(sales_data['User_Count'].median())


In [None]:
# Feature Engineering
threshold = sales_data['Global_Sales'].quantile(0.90)
sales_data['Best_Seller'] = (sales_data['Global_Sales'] >= threshold).astype(int)
label_encoder = LabelEncoder()
sales_data['Genre_Encoded'] = label_encoder.fit_transform(sales_data['Genre'])
sales_data['Game_Age'] = 2024 - sales_data['Year_of_Release']


In [None]:
# Select relevant features
features = ['NA_Sales', 'EU_Sales', 'JP_Sales', 'Other_Sales', 'Critic_Score',
            'Critic_Count', 'User_Score', 'User_Count', 'Genre_Encoded', 'Game_Age']
X = sales_data[features]
y = sales_data['Best_Seller']

In [None]:
# Standardize the features
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [None]:

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Neural Network Model
nn_model = Sequential()
nn_model.add(Dense(64, input_dim=X_train.shape[1], activation='relu'))
nn_model.add(Dense(32, activation='relu'))
nn_model.add(Dense(1, activation='sigmoid'))


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [None]:
# Compile the model
nn_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
# Train the model
nn_model.fit(X_train, y_train, epochs=20, batch_size=10, verbose=1)

Epoch 1/20
[1m1338/1338[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - accuracy: 0.9256 - loss: 0.1810
Epoch 2/20
[1m1338/1338[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.9940 - loss: 0.0175
Epoch 3/20
[1m1338/1338[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.9954 - loss: 0.0123
Epoch 4/20
[1m1338/1338[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.9940 - loss: 0.0128
Epoch 5/20
[1m1338/1338[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9965 - loss: 0.0088
Epoch 6/20
[1m1338/1338[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.9964 - loss: 0.0078
Epoch 7/20
[1m1338/1338[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.9974 - loss: 0.0066
Epoch 8/20
[1m1338/1338[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - accuracy: 0.9974 - loss: 0.0076
Epoch 9/20
[1m1338/1338

<keras.src.callbacks.history.History at 0x7a81628af1c0>

In [None]:
# Evaluate the model
nn_predictions = nn_model.predict(X_test)
nn_predictions = (nn_predictions > 0.5).astype(int)
nn_accuracy = accuracy_score(y_test, nn_predictions)
print(f"Neural Network Accuracy: {nn_accuracy * 100:.2f}%")


[1m105/105[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Neural Network Accuracy: 99.58%


In [None]:
# Machine Learning Model (Random Forest)
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

In [None]:
# Evaluate the model
rf_predictions = rf_model.predict(X_test)
rf_accuracy = accuracy_score(y_test, rf_predictions)
print(f"Random Forest Accuracy: {rf_accuracy * 100:.2f}%")


Random Forest Accuracy: 99.64%
