## Section 1 Data Preparation

In [3]:
#library for data processing
import streamlit as st
import pandas as pd

# library to make the recommendation system model
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [5]:
# Load the data for content-based recommendations
def load_content_data():
    
    #load video games metadata
    df = pd.read_csv('all_video_games(cleaned).csv')
    
    #Drop missing-value
    df = df.dropna(subset=['Genres', 'Platforms', 'Publisher', 'User Score', 'Release Date', 'User Ratings Count'])
    
    #Change the user score data type to float as a numerical feature
    df['User Score'] = df['User Score'].astype(float)
    df['content'] = df['Genres'] + ' ' + df['Platforms'] + ' ' + df['Publisher']
    return df





In [7]:
df_content = load_content_data()

In [9]:
df_content.head()

Unnamed: 0,Title,Release Date,Developer,Publisher,Genres,Product Rating,User Score,User Ratings Count,Platforms,content
0,Ziggurat (2012),17/2/2012,Action Button Entertainment,Freshuu Inc.,Action,Not Rated,6.9,14.0,iOS (iPhone/iPad),Action iOS (iPhone/iPad) Freshuu Inc.
3,Gothic 3,14/11/2006,Piranha Bytes,Aspyr,Western RPG,Rated T For Teen,7.5,832.0,PC,Western RPG PC Aspyr
4,Siege Survival: Gloria Victis,18/5/2021,FishTankStudio,Black Eye Games,RPG,Not Rated,6.5,10.0,PC,RPG PC Black Eye Games
5,Guitar Hero III: Legends of Rock,28/10/2007,Neversoft Entertainment,RedOctane,Rhythm,Rated T For Teen,8.4,144.0,Wii,Rhythm Wii RedOctane
7,Xenoraid,8/11/2016,10tons,10tons,Vertical Shoot-'Em-Up,Rated E +10 For Everyone +10,6.2,6.0,PlayStation 4,Vertical Shoot-'Em-Up PlayStation 4 10tons


In [11]:
df_content.shape

(11252, 10)

In [13]:
# Check dataset information
df_content.info()

<class 'pandas.core.frame.DataFrame'>
Index: 11252 entries, 0 to 14053
Data columns (total 10 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   Title               11252 non-null  object 
 1   Release Date        11252 non-null  object 
 2   Developer           11252 non-null  object 
 3   Publisher           11252 non-null  object 
 4   Genres              11252 non-null  object 
 5   Product Rating      11252 non-null  object 
 6   User Score          11252 non-null  float64
 7   User Ratings Count  11252 non-null  float64
 8   Platforms           11252 non-null  object 
 9   content             11252 non-null  object 
dtypes: float64(2), object(8)
memory usage: 967.0+ KB


In [15]:
# Check NaN value in columns
df_content.isna().sum()

Title                 0
Release Date          0
Developer             0
Publisher             0
Genres                0
Product Rating        0
User Score            0
User Ratings Count    0
Platforms             0
content               0
dtype: int64

In [17]:
# Load the data for correlation finder
def load_correlation_data():
     #load video games metadata
    df = pd.read_csv('all_video_games(cleaned).csv')

     #load User metadata
    userset = pd.read_csv('User_Dataset.csv')

    #merge both file based on Title
    data = pd.merge(df, userset, on='Title').dropna()
    return data





In [19]:
df_corr = load_correlation_data()

In [20]:
df_corr.head()

Unnamed: 0,Title,Release Date,Developer,Publisher,Genres,Product Rating,User Score,User Ratings Count,Platforms,user_id,user_score
0,Ziggurat (2012),17/2/2012,Action Button Entertainment,Freshuu Inc.,Action,Not Rated,6.9,14.0,iOS (iPhone/iPad),user_9,3.01
1,Ziggurat (2012),17/2/2012,Action Button Entertainment,Freshuu Inc.,Action,Not Rated,6.9,14.0,iOS (iPhone/iPad),user_81,7.79
2,Ziggurat (2012),17/2/2012,Action Button Entertainment,Freshuu Inc.,Action,Not Rated,6.9,14.0,iOS (iPhone/iPad),user_66,8.31
3,Ziggurat (2012),17/2/2012,Action Button Entertainment,Freshuu Inc.,Action,Not Rated,6.9,14.0,iOS (iPhone/iPad),user_47,8.4
4,Ziggurat (2012),17/2/2012,Action Button Entertainment,Freshuu Inc.,Action,Not Rated,6.9,14.0,iOS (iPhone/iPad),user_49,2.72


In [23]:
df_corr.shape

(169326, 11)

In [25]:
# Check dataset information
df_corr.info()

<class 'pandas.core.frame.DataFrame'>
Index: 169326 entries, 0 to 217585
Data columns (total 11 columns):
 #   Column              Non-Null Count   Dtype  
---  ------              --------------   -----  
 0   Title               169326 non-null  object 
 1   Release Date        169326 non-null  object 
 2   Developer           169326 non-null  object 
 3   Publisher           169326 non-null  object 
 4   Genres              169326 non-null  object 
 5   Product Rating      169326 non-null  object 
 6   User Score          169326 non-null  float64
 7   User Ratings Count  169326 non-null  float64
 8   Platforms           169326 non-null  object 
 9   user_id             169326 non-null  object 
 10  user_score          169326 non-null  float64
dtypes: float64(3), object(8)
memory usage: 15.5+ MB


In [27]:
# Check NaN value in columns
df_corr.isna().sum()

Title                 0
Release Date          0
Developer             0
Publisher             0
Genres                0
Product Rating        0
User Score            0
User Ratings Count    0
Platforms             0
user_id               0
user_score            0
dtype: int64

## Section 2 Features Generation

2.1 Content Based Recommendations

In [31]:
#Function to recommend games based on cosine similarity
def content_based_recommendations(game_name, num_recommendations=5):

    #Vectorize the content using TfidVectorizer
    vectorizer = TfidfVectorizer(stop_words='english')
    content_matrix = vectorizer.fit_transform(df_content['content'])

    try:
        #Calculate the cosine similarity
        cosine_sim = cosine_similarity(content_matrix, content_matrix)

        #Get the index of the input game
        idx = df_content[df_content['Title'].str.lower() == game_name.lower()].index[0]

        #Get the similarity scores of all games with that game
        sim_scores = list(enumerate(cosine_sim[idx]))

        # Sort the games based on the similarity scores
        sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

        # Get the scores of the 10 most similar games
        sim_scores = sim_scores[1:11]

        # Get the game indices
        sim_indices = [i[0] for i in sim_scores[1:num_recommendations+1]]

        #return the most similar games
        return df_content.iloc[sim_indices][['Title', 'Genres', 'User Score', 'Platforms', 'Release Date']]
    except IndexError:
        return pd.DataFrame(columns=['Title', 'Genres', 'User Score'])

In [33]:
#obtain the game that has the similarity to user input e.g. Star Wars Episode III: Revenge of the Sith
content_based_recommendations('Star Wars Episode III: Revenge of the Sith')

Unnamed: 0,Title,Genres,User Score,Platforms,Release Date
8899,Valiant Hearts: The Great War,2D Platformer,8.2,PC,24/6/2014
197,Dark Void Zero,2D Platformer,5.7,PC,18/1/2010
291,Street Fighter X Mega Man,2D Platformer,7.8,PC,17/12/2012
4410,Bionic Commando: Rearmed,2D Platformer,7.9,PC,13/8/2008
2726,Skelattack,2D Platformer,6.4,PC,2/6/2020


2.2 KnowledgeBasedGamesRecommendation

In [35]:
# Load the dataset from a file
file_path = r'C:\Users\User\Downloads\Ai_Assignment\all_video_games(cleaned).csv'  # Replace with your file path
df_uploaded = pd.read_csv(file_path)

# Filter options
preferred_genre = input("Enter your preferred genre (e.g., Action): ")
min_user_score = float(input("Enter the minimum user score (0.0 to 10.0): "))

# Function to recommend games based on file upload and filters
def recommend_games(df, preferences):
    genre_filter = df['Genres'].str.contains(preferences['Genres'], case=False, na=False)
    score_filter = df['User Score'] >= preferences['Minimum User Score']
    filtered_df = df[genre_filter & score_filter]
    return filtered_df

# Get recommendations
recommended_games = recommend_games(df_uploaded, {'Genres': preferred_genre, 'Minimum User Score': min_user_score})

if not recommended_games.empty:
    top_10_games = recommended_games.head(10)
    print("### Top 10 Recommended Games")
    print(top_10_games)
else:
    print("No games match your preferences. Try adjusting the genre or score.")


FileNotFoundError: [Errno 2] No such file or directory: 'C:\\Users\\User\\Downloads\\Ai_Assignment\\all_video_games(cleaned).csv'

2.3 Collaborative_Filtering

In [None]:
# Load the correlation data
data = load_correlation_data()

# Create the score matrix
score_matrix = data.pivot_table(index='user_id', columns='Title', values='user_score', fill_value=0)

# Get game correlation
game_title = input("Enter a game title to find its correlation: ")

if game_title:
    game_user_score = score_matrix[game_title]
    similar_to_game = score_matrix.corrwith(game_user_score)
    corr_drive = pd.DataFrame(similar_to_game, columns=['Correlation']).dropna()
    
    print(f"### Games correlated with '{game_title}':")
    print(corr_drive.sort_values('Correlation', ascending=False).head(10))
else:
    print("Please select a game title.")


In [None]:
import matplotlib.pyplot as plt 
import seaborn as sns 
  
sns.set_style('white') 
%matplotlib inline 
 
# plot graph of 'num of ratings column' 
plt.figure(figsize =(20, 4)) 
  
data['Product Rating'].hist(bins = 60) 