In [None]:
# Import libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import LabelEncoder, MinMaxScaler


In [None]:
# Load dataset
data = pd.read_csv("gameDatabase/Video_Games_Sales_as_at_22_Dec_2016.csv")

# Check for missing values
print(data.isnull().sum())

# Display the first few rows
data.head()


Name                  2
Platform              0
Year_of_Release     269
Genre                 2
Publisher            54
NA_Sales              0
EU_Sales              0
JP_Sales              0
Other_Sales           0
Global_Sales          0
Critic_Score       8582
Critic_Count       8582
User_Score         6704
User_Count         9129
Developer          6623
Rating             6769
dtype: int64


Unnamed: 0,Name,Platform,Year_of_Release,Genre,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales,Critic_Score,Critic_Count,User_Score,User_Count,Developer,Rating
0,Wii Sports,Wii,2006.0,Sports,Nintendo,41.36,28.96,3.77,8.45,82.53,76.0,51.0,8.0,322.0,Nintendo,E
1,Super Mario Bros.,NES,1985.0,Platform,Nintendo,29.08,3.58,6.81,0.77,40.24,,,,,,
2,Mario Kart Wii,Wii,2008.0,Racing,Nintendo,15.68,12.76,3.79,3.29,35.52,82.0,73.0,8.3,709.0,Nintendo,E
3,Wii Sports Resort,Wii,2009.0,Sports,Nintendo,15.61,10.93,3.28,2.95,32.77,80.0,73.0,8.0,192.0,Nintendo,E
4,Pokemon Red/Pokemon Blue,GB,1996.0,Role-Playing,Nintendo,11.27,8.89,10.22,1.0,31.37,,,,,,


In [None]:
# Drop unnecessary columns
data = data.drop(columns=['Name', 'Developer'])


In [None]:
# Drop rows with missing 'Genre' or 'Global_Sales'
data = data.dropna(subset=['Genre', 'Global_Sales'])

In [None]:
# Handle 'User_Score': Replace 'tbd' with NaN and convert to numeric
data['User_Score'] = data['User_Score'].replace('tbd', np.nan).astype(float)

In [None]:
# Fill missing values for 'Critic_Score' and 'User_Score' with their mean
data['Critic_Score'] = data['Critic_Score'].fillna(data['Critic_Score'].mean())
data['User_Score'] = data['User_Score'].fillna(data['User_Score'].mean())

In [None]:
# Normalize 'Global_Sales' for easier regression
scaler = MinMaxScaler()
data['Global_Sales'] = scaler.fit_transform(data[['Global_Sales']])

In [None]:
# Convert categorical columns (Genre, Platform) into one-hot encoding
data = pd.get_dummies(data, columns=['Genre', 'Platform'], drop_first=True)

# Check the transformed data
data.head()

Unnamed: 0,Year_of_Release,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales,Critic_Score,Critic_Count,User_Score,...,Platform_SAT,Platform_SCD,Platform_SNES,Platform_TG16,Platform_WS,Platform_Wii,Platform_WiiU,Platform_X360,Platform_XB,Platform_XOne
0,2006.0,Nintendo,41.36,28.96,3.77,8.45,1.0,76.0,51.0,8.0,...,False,False,False,False,False,True,False,False,False,False
1,1985.0,Nintendo,29.08,3.58,6.81,0.77,0.487518,68.967679,,7.125046,...,False,False,False,False,False,False,False,False,False,False
2,2008.0,Nintendo,15.68,12.76,3.79,3.29,0.43032,82.0,73.0,8.3,...,False,False,False,False,False,True,False,False,False,False
3,2009.0,Nintendo,15.61,10.93,3.28,2.95,0.396995,80.0,73.0,8.0,...,False,False,False,False,False,True,False,False,False,False
4,1996.0,Nintendo,11.27,8.89,10.22,1.0,0.380029,68.967679,,7.125046,...,False,False,False,False,False,False,False,False,False,False


In [None]:
# Separate features (X) and target variable (y)
X = data.drop(columns=['Global_Sales'])
y = data['Global_Sales']

In [None]:
# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Check the data types of X
print(X.dtypes)

# If any columns are object (string), display them
non_numeric_columns = X.select_dtypes(include=['object']).columns
print("Non-numeric columns:", non_numeric_columns)


Year_of_Release       float64
Publisher              object
NA_Sales              float64
EU_Sales              float64
JP_Sales              float64
Other_Sales           float64
Critic_Score          float64
Critic_Count          float64
User_Score            float64
User_Count            float64
Rating                 object
Genre_Adventure          bool
Genre_Fighting           bool
Genre_Misc               bool
Genre_Platform           bool
Genre_Puzzle             bool
Genre_Racing             bool
Genre_Role-Playing       bool
Genre_Shooter            bool
Genre_Simulation         bool
Genre_Sports             bool
Genre_Strategy           bool
Platform_3DO             bool
Platform_3DS             bool
Platform_DC              bool
Platform_DS              bool
Platform_GB              bool
Platform_GBA             bool
Platform_GC              bool
Platform_GEN             bool
Platform_GG              bool
Platform_N64             bool
Platform_NES             bool
Platform_N

In [None]:
from sklearn.preprocessing import LabelEncoder

# Encode non-numeric columns
label_encoder = LabelEncoder()
for col in non_numeric_columns:
    X[col] = label_encoder.fit_transform(X[col])


In [None]:
# One-Hot Encode non-numeric columns
X = pd.get_dummies(X, columns=non_numeric_columns, drop_first=True)


In [None]:
print(X.dtypes)


Year_of_Release    float64
NA_Sales           float64
EU_Sales           float64
JP_Sales           float64
Other_Sales        float64
                    ...   
Rating_4              bool
Rating_5              bool
Rating_6              bool
Rating_7              bool
Rating_8              bool
Length: 639, dtype: object


In [None]:
# Split the data again (if X has changed)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
# Train the Random Forest Regressor
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

In [None]:
# Make predictions on the test set
y_pred = model.predict(X_test)

In [None]:
# Calculate Mean Squared Error
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse}")

Mean Squared Error: 0.00010790491227212128


In [None]:
import joblib


In [None]:
# Save the trained model
joblib.dump(model, "game_sales_model.pkl")

['game_sales_model.pkl']

In [None]:
# Load the model (for reuse later)
loaded_model = joblib.load("game_sales_model.pkl")

In [None]:
# Example usage
example_predictions = loaded_model.predict(X_test[:5])
print("Example Predictions:", example_predictions)

Example Predictions: [0.00519995 0.00036355 0.00131241 0.00620092 0.00339796]


In [None]:
# Define a mapping of moods to genres
mood_genre_mapping = {
    'happy': ['Action', 'Adventure', 'Sports'],
    'sad': ['Puzzle', 'Casual', 'Role-Playing'],
    'stressed': ['Strategy', 'Simulation', 'Combat']
}

In [None]:
# Filter recommendations based on mood
def recommend_games_by_mood(mood, X_test, data):
    genres = mood_genre_mapping.get(mood.lower(), [])
    genre_cols = [col for col in data.columns if any(genre in col for genre in genres)]

    # Get predictions and filter for selected genres
    recommendations = pd.DataFrame(X_test, columns=data.columns[:-1])
    recommendations['Predicted_Sales'] = loaded_model.predict(X_test)

    filtered_recommendations = recommendations[recommendations[genre_cols].sum(axis=1) > 0]
    return filtered_recommendations.sort_values(by='Predicted_Sales', ascending=False).head(5)

In [None]:
# Example: Get recommendations for 'happy' mood
recommendations = recommend_games_by_mood('sad', X_test, data)
print(recommendations)

     Year_of_Release  Publisher  NA_Sales  EU_Sales  JP_Sales  Other_Sales  \
27            2010.0        NaN      5.51      3.17      5.65         0.80   
47            2014.0        NaN      4.35      3.49      3.10         0.74   
84            2001.0        NaN      2.91      2.07      2.73         0.33   
131           2004.0        NaN      2.57      1.58      2.06         0.21   
88            1999.0        NaN      2.28      1.72      3.63         0.23   

     Global_Sales  Critic_Score  Critic_Count  User_Score  ...  Platform_SAT  \
27            NaN     68.967679           NaN    7.125046  ...         False   
47            NaN     68.967679           NaN    7.125046  ...         False   
84            NaN     92.000000          53.0    8.700000  ...         False   
131           NaN     76.000000          28.0    9.000000  ...         False   
88            NaN     90.000000          24.0    8.600000  ...         False   

     Platform_SCD  Platform_SNES  Platform_TG16  P

In [None]:
def recommend_games_by_mood(mood, X_test, data, loaded_model, num_recommendations=5):
    """
    Recommend games dynamically based on mood using a trained ML model.

    Parameters:
        mood (str): The user's mood for filtering genres.
        X_test (DataFrame): Feature data for prediction.
        data (DataFrame): Original data containing game details and genre columns.
        loaded_model: Pre-trained ML model for prediction.
        num_recommendations (int): Number of games to recommend.

    Returns:
        DataFrame: Top `num_recommendations` recommended games.
    """
    # Map mood to relevant genres
    genres = mood_genre_mapping.get(mood.lower(), [])
    if not genres:
        print(f"No genres found for mood: {mood}. Returning random recommendations.")
        genres = data.columns  # Fallback to all genres if none match

    # Identify relevant genre columns
    genre_cols = [col for col in data.columns if any(genre in col.lower() for genre in genres)]

    # Get predictions
    recommendations = pd.DataFrame(X_test, columns=data.columns[:-1])  # Feature names from data
    recommendations['Predicted_Score'] = loaded_model.predict(X_test)

    # Filter by genres related to the mood
    filtered_recommendations = recommendations[recommendations[genre_cols].sum(axis=1) > 0]

    if filtered_recommendations.empty:
        print("No specific games match the mood. Recommending from all games.")
        filtered_recommendations = recommendations

    # Sort by predicted scores
    sorted_recommendations = filtered_recommendations.sort_values(by='Predicted_Score', ascending=False)

    # Randomize within the top 50% and select `num_recommendations`
    top_half = sorted_recommendations.head(len(sorted_recommendations) // 2)
    final_recommendations = top_half.sample(n=min(num_recommendations, len(top_half)), random_state=None)

    return final_recommendations


In [None]:
# Define the updated function for recommending games with names included

def recommend_games_by_mood_with_names(mood, X_test, data, loaded_model, num_recommendations=5):
    """
    Recommend games dynamically based on mood using a trained ML model.

    Parameters:
        mood (str): The user's mood for filtering genres.
        X_test (DataFrame): Feature data for prediction.
        data (DataFrame): Original data containing game details and genre columns.
        loaded_model: Pre-trained ML model for prediction.
        num_recommendations (int): Number of games to recommend.

    Returns:
        DataFrame: Top `num_recommendations` recommended games including their names.
    """
    # Map mood to relevant genres
    mood_genre_mapping = {
        "happy": ["Adventure", "Action"],
        "relaxed": ["Puzzle", "Simulation"],
        "competitive": ["Sports", "Racing"],
        "strategic": ["Strategy", "Role-Playing"],
    }

    genres = mood_genre_mapping.get(mood.lower(), [])
    if not genres:
        print(f"No genres found for mood: {mood}. Returning random recommendations.")
        genres = data['Genre'].unique()  # Fallback to all genres if none match

    # Filter data for genres matching the mood
    genre_filtered_data = data[data['Genre'].isin(genres)]

    if genre_filtered_data.empty:
        print("No specific games match the mood. Recommending from all games.")
        genre_filtered_data = data

    # Add predictions to the genre-filtered data
    genre_filtered_data['Predicted_Score'] = loaded_model.predict(X_test)

    # Sort by predicted scores
    sorted_recommendations = genre_filtered_data.sort_values(by='Predicted_Score', ascending=False)

    # Randomize within the top 50% and select `num_recommendations`
    top_half = sorted_recommendations.head(len(sorted_recommendations) // 2)
    final_recommendations = top_half.sample(n=min(num_recommendations, len(top_half)), random_state=None)

    return final_recommendations[['Name', 'Genre', 'Predicted_Score']].reset_index(drop=True)

# Note: This function assumes `X_test` is already prepared with the same features as the training data
# and the model (`loaded_model`) has been properly trained. Let me know if you want me to simulate model usage.


In [None]:
import numpy as np
import pandas as pd

# Define the updated recommendation function
def recommend_games_by_mood_with_names(mood, X_test, data, loaded_model, num_recommendations=5):
    """
    Recommend games dynamically based on mood using a trained ML model.

    Parameters:
        mood (str): The user's mood for filtering genres.
        X_test (DataFrame): Feature data for prediction.
        data (DataFrame): Original data containing game details and genre columns.
        loaded_model: Pre-trained ML model for prediction.
        num_recommendations (int): Number of games to recommend.

    Returns:
        DataFrame: Top `num_recommendations` recommended games with names.
    """
    # Debug: Print column names to verify "Genre" column exists
    print("Available columns in data:", data.columns)

    # Map mood to relevant genres
    genres = mood_genre_mapping.get(mood.lower(), [])
    if not genres:
        print(f"No genres found for mood: {mood}. Returning random recommendations.")
        genres = data['Genre'].unique()  # Fallback to all genres if none match

    # Filter data for genres matching the mood
    genre_filtered_data = data[data['Genre'].isin(genres)]

    if genre_filtered_data.empty:
        print("No specific games match the mood. Recommending from all games.")
        genre_filtered_data = data

    # Prepare test data and get predictions
    genre_filtered_data['Predicted_Score'] = loaded_model.predict(X_test)

    # Sort by predicted scores
    sorted_recommendations = genre_filtered_data.sort_values(by='Predicted_Score', ascending=False)

    # Randomize within the top 50% and select `num_recommendations`
    top_half = sorted_recommendations.head(len(sorted_recommendations) // 2)
    final_recommendations = top_half.sample(n=min(num_recommendations, len(top_half)), random_state=None)

    # Return only the names and predicted scores
    return final_recommendations[['Name', 'Predicted_Score']]


In [None]:
def recommend_games_by_mood(mood, X_test, data, loaded_model, num_recommendations=10):
    """
    Recommend games dynamically based on mood using a trained ML model.

    Parameters:
        mood (str): The user's mood for filtering genres.
        X_test (DataFrame): Feature data for prediction.
        data (DataFrame): Original data containing game details and genre columns.
        loaded_model: Pre-trained ML model for prediction.
        num_recommendations (int): Number of games to recommend.

    Returns:
        DataFrame: Top `num_recommendations` recommended games including their names.
    """
    # Map mood to relevant genres
    genres = mood_genre_mapping.get(mood.lower(), [])
    genre_cols = [col for col in data.columns if any(genre in col.lower() for genre in genres)]

    # Get predictions
    recommendations = pd.DataFrame(X_test, columns=data.columns[:-1])  # Feature names from data
    recommendations['Predicted_Sales'] = loaded_model.predict(X_test)

    # Add game names to the recommendations
    recommendations['Name'] = data['Name'].values  # Add game names to the DataFrame

    # Filter by genres related to the mood
    filtered_recommendations = recommendations[recommendations[genre_cols].sum(axis=1) > 0]

    # If no matching genres, recommend from all games
    if filtered_recommendations.empty:
        print("No specific games match the mood. Recommending from all games.")
        filtered_recommendations = recommendations

    # Sort by predicted sales and select top recommendations
    sorted_recommendations = filtered_recommendations.sort_values(by='Predicted_Sales', ascending=False)
    return sorted_recommendations[['Name', 'Predicted_Sales']].head(num_recommendations)


In [None]:
def recommend_games_by_mood(mood, X_test, original_data, processed_data, loaded_model, num_recommendations=10):
    """
    Recommend games dynamically based on mood using a trained ML model.

    Parameters:
        mood (str): The user's mood for filtering genres.
        X_test (DataFrame): Feature data for prediction.
        original_data (DataFrame): Original dataset containing game details (e.g., 'Name').
        processed_data (DataFrame): Processed dataset used for training/prediction.
        loaded_model: Pre-trained ML model for prediction.
        num_recommendations (int): Number of games to recommend.

    Returns:
        DataFrame: Top `num_recommendations` recommended games including their names and predicted sales.
    """
    # Map mood to relevant genres
    genres = mood_genre_mapping.get(mood.lower(), [])
    genre_cols = [col for col in processed_data.columns if any(genre in col.lower() for genre in genres)]

    # Get predictions
    recommendations = pd.DataFrame(X_test, columns=processed_data.columns)  # Use processed column names
    recommendations['Predicted_Sales'] = loaded_model.predict(X_test)

    # Add game names to the recommendations from the original dataset
    recommendations['Name'] = original_data['Name'].values[:len(recommendations)]

    # Filter by genres related to the mood
    if genres:
        filtered_recommendations = recommendations[recommendations[genre_cols].sum(axis=1) > 0]
    else:
        print("No genres mapped to this mood. Recommending games from all genres.")
        filtered_recommendations = recommendations

    # Sort by predicted sales and select top recommendations
    sorted_recommendations = filtered_recommendations.sort_values(by='Predicted_Sales', ascending=False)
    return sorted_recommendations[['Name', 'Predicted_Sales']].head(num_recommendations)


In [None]:
import random

def recommend_games_by_mood(mood, X_test, original_data, processed_data, loaded_model, num_recommendations=10):
    """
    Recommend games dynamically based on mood using a trained ML model, with randomized recommendations.

    Parameters:
        mood (str): The user's mood for filtering genres.
        X_test (DataFrame): Feature data for prediction.
        original_data (DataFrame): Original dataset containing game details (e.g., 'Name').
        processed_data (DataFrame): Processed dataset used for training/prediction.
        loaded_model: Pre-trained ML model for prediction.
        num_recommendations (int): Number of games to recommend.

    Returns:
        DataFrame: Randomized `num_recommendations` recommended games including their names and predicted sales.
    """
    # Map mood to relevant genres
    genres = mood_genre_mapping.get(mood.lower(), [])
    genre_cols = [col for col in processed_data.columns if any(genre in col.lower() for genre in genres)]

    # Get predictions
    recommendations = pd.DataFrame(X_test, columns=processed_data.columns)  # Use processed column names
    recommendations['Predicted_Sales'] = loaded_model.predict(X_test)

    # Add game names to the recommendations from the original dataset
    recommendations['Name'] = original_data['Name'].values[:len(recommendations)]

    # Filter by genres related to the mood
    if genres:
        filtered_recommendations = recommendations[recommendations[genre_cols].sum(axis=1) > 0]
    else:
        print("No genres mapped to this mood. Recommending games from all genres.")
        filtered_recommendations = recommendations

    # If no filtered recommendations, fallback to all
    if filtered_recommendations.empty:
        print("No specific games match the mood. Recommending from all games.")
        filtered_recommendations = recommendations

    # Shuffle (randomize) recommendations and select the top `num_recommendations`
    randomized_recommendations = filtered_recommendations.sample(
        n=min(num_recommendations, len(filtered_recommendations)),
        random_state=random.randint(1, 10000)  # Use a new random state for each execution
    )

    return randomized_recommendations[['Name', 'Predicted_Sales']]
