__COMP9727 PROJECT:A STEAM GAME RECOMMENDER SYSTEM__


# Introduction
This model is a recommender system for Steam games. It is based on the dataset from Steam, which is a digital distribution platform for video games. The dataset contains 316 games and 21 million users. The recommender system is based on collaborative filtering and content based model. The system integrates the recommender module and a user interaction module.

__PIPELINE__:
1. Data preprocessing, Analysis and Visualization
2. Extracting features from the game dataset, build feature vectors for games. The item vector is a hierarchical structure. First, for the description, use text vectorization technology (such as TF-IDF or Word2Vec) to process the description text of the game to generate a dense numerical vector for each game. For Tags, we still use TF-IDF. But since the game tag describes the game more significantly, we manually increase the TF-IDF weight of the tag.

__Inporting Libraries__

In [536]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer

__Data Preprocessing__

In [537]:
file_path_reviews = "steam_reviews.csv"
file_path_desc = "desc.csv"
file_path_tags = "tags.csv"

df_reviews = pd.read_csv(file_path_reviews)
df_desc = pd.read_csv(file_path_desc)
df_tags = pd.read_csv(file_path_tags)

__Clean the data__

In [538]:

import re
def tags_preprocess(tag):
    tag = ast.literal_eval(tag)
    tag = [x.lower() for x in tag]
    tag = [x.replace(' ', '_') for x in tag]
    tag = [x.replace('-', '_') for x in tag]
    tag = ' '.join(tag)
    return tag
def desc_preprocess(desc):
    desc = '' if pd.isna(desc) else str(desc)
    desc = desc.lower()
    desc = re.sub(r'[^\w\s]', '', desc)  # Remove punctuation
    words = desc.split()  # Tokenize by splitting on whitespace
    return ' '.join(words)

In [539]:
df_desc = df_desc.dropna(subset=['short_description'])
df_tags['tags'] = df_tags['tags'].apply(lambda x: tags_preprocess(x))
df_desc['short_description'] = df_desc['short_description'].apply(desc_preprocess)

__Extracting Features from the description dataset and the tags dataset__

In [540]:
import ast
tfidf_vectorizer_desc = TfidfVectorizer(stop_words='english')
tfidf_matrix_desc = tfidf_vectorizer_desc.fit_transform(df_desc['short_description'].tolist())
print(tfidf_matrix_desc)

  (0, 944)	0.27707007158803415
  (0, 1488)	0.27707007158803415
  (0, 2527)	0.2585299893139412
  (0, 1561)	0.27707007158803415
  (0, 2179)	0.23517224803621373
  (0, 1692)	0.23517224803621373
  (0, 2924)	0.2386355452946131
  (0, 1743)	0.18493768577197395
  (0, 475)	0.27707007158803415
  (0, 1200)	0.24537559159788733
  (0, 406)	0.27707007158803415
  (0, 1126)	0.18493768577197395
  (0, 2758)	0.18493768577197395
  (0, 1124)	0.10053639446178186
  (0, 2037)	0.27707007158803415
  (0, 2542)	0.23517224803621373
  (0, 1814)	0.27707007158803415
  (1, 535)	0.1923484174817216
  (1, 1025)	0.2235605171169861
  (1, 1809)	0.29737536420845806
  (1, 2627)	0.263358129952708
  (1, 1075)	0.2774765577905971
  (1, 2874)	0.1734214546066264
  (1, 2077)	0.29737536420845806
  (1, 2397)	0.263358129952708
  :	:
  (313, 1038)	0.18556620844540345
  (313, 382)	0.18556620844540345
  (313, 25)	0.18556620844540345
  (313, 584)	0.18556620844540345
  (313, 263)	0.18556620844540345
  (313, 2108)	0.18556620844540345
  (313, 1

Create user class. The user class takes user model as input. A user could be an existed user in our reviews data set either a new user, needing cold start which will be implemented later.

In [585]:
from sklearn.metrics.pairwise import cosine_similarity
class User:
    def __init__(self, user_mod, user_id):
        self.user_mod = user_mod
        self.user_id = user_id
        self.profile = [] # this array contains game ids.
        self.feature_arr = None # feature array(TF-IDF) depends on the profile array.
        self.tags = []
        self.recommendations = []
        self.limit = 30
    def hot_start(self):
        df_user_reviews = df_reviews[df_reviews['author.steamid'] == self.user_id]
        for i in df_user_reviews['app_id'].values:
            self.update_profile(i)
        self.generate_new_profile_vector()
    def cold_start(self):
        pass
    def update_profile(self, app_id):
        self.profile.append(app_id)
    def generate_new_profile_vector(self):
        desc = df_desc.loc[df_desc['id'].isin(self.profile)]
        self.tags = df_tags[df_tags['app_id'].isin(self.profile)]['tags']
        self.feature_arr = tfidf_vectorizer_desc.transform(desc['short_description'])

    def like_game(self,app_id):
        self.update_profile(app_id)
    def run_model(self):
        cosine_similarities = cosine_similarity(self.feature_arr, tfidf_matrix_desc)
        mean_cosine_similarities = cosine_similarities.mean(axis=0)
        similarity_scores_df = pd.DataFrame(mean_cosine_similarities, index=df_desc.index, columns=['similarity_score'])


        sorted_similarity_scores_df = similarity_scores_df.sort_values(by='similarity_score', ascending=False)
        user_liked_indices = df_desc.index[df_desc['id'].isin(self.profile)]
        recommendations_indices = sorted_similarity_scores_df.drop(user_liked_indices).head(self.limit).index
        final_recommendations = pd.DataFrame({
            'app_id': df_desc.loc[recommendations_indices, 'id'].values,
            'similarity_score': sorted_similarity_scores_df.loc[recommendations_indices].values.flatten()
        })
        print(final_recommendations)
        return final_recommendations['app_id']




In [586]:
user = User(0,76561199028326951)
user.hot_start()
user.run_model()


0      open_world rpg story_rich atmospheric mature f...
175    open_world action multiplayer crime automobile...
180    city_builder simulation building management st...
Name: tags, dtype: object
     app_id  similarity_score
0    352550          0.097705
1    543900          0.055397
2       420          0.051354
3    306130          0.046683
4    239140          0.046102
5    638970          0.045617
6    323190          0.041700
7    555220          0.039071
8    427520          0.038571
9    282560          0.037958
10       70          0.036021
11   385560          0.035557
12   613100          0.033401
13   339800          0.032696
14   792990          0.032626
15   526870          0.032405
16   281990          0.031981
17   671510          0.031916
18   595520          0.031845
19   834910          0.031652
20   570940          0.030449
21   787480          0.029983
22   583950          0.029889
23   582010          0.029592
24     8870          0.029120
25   552520          0.

0      352550
1      543900
2         420
3      306130
4      239140
5      638970
6      323190
7      555220
8      427520
9      282560
10         70
11     385560
12     613100
13     339800
14     792990
15     526870
16     281990
17     671510
18     595520
19     834910
20     570940
21     787480
22     583950
23     582010
24       8870
25     552520
26     730310
27    1017900
28     527230
29     701160
Name: app_id, dtype: int64

__UI MODULE__

In [543]:
import ipywidgets as widgets
from IPython.display import display, clear_output
import random

class SteamGameRecommenderUI:
    def __init__(self):
        self.user_id = None
        self.df_reviews = df_reviews
        self.df_tags = df_tags
        self.df_desc = df_desc
        self.current_page = 0
        self.login_input = widgets.Text(description='User ID:')
        self.login_button = widgets.Button(description='Login')
        self.next_page_button = widgets.Button(description='Next Page')
        self.search_input = widgets.Text(description='Search Game:')
        self.search_button = widgets.Button(description='Search')
        self.output_area = widgets.Output()

        # 绑定事件
        self.login_button.on_click(self.on_login_clicked)
        self.next_page_button.on_click(self.on_next_page_clicked)
        self.search_button.on_click(self.on_search_clicked)


    def display(self):
        display(self.login_input, self.login_button, self.output_area)

    def on_login_clicked(self, b):
        self.user_id = self.login_input.value
        self.display_recommended_games()

    def display_recommended_games(self):
        with self.output_area:
            clear_output()
            display(self.next_page_button)
            display(self.search_input, self.search_button)
            games_vbox = widgets.VBox()
            recommended_games = self.get_recommended_games()
            for game in recommended_games:
                game_label = widgets.Label(f'{game["id"]}: {game["name"]}')
                like_button = widgets.Button(description='Like')
                dislike_button = widgets.Button(description='Dislike')
                like_button.on_click(self.on_like_clicked)
                dislike_button.on_click(self.on_dislike_clicked)
                game_hbox = widgets.HBox([game_label, like_button, dislike_button])
                games_vbox.children += (game_hbox,)
            display(games_vbox)

    def get_recommended_games(self):
        start = self.current_page * 10
        end = start + 10
        return self.games_db[start:end]

    def on_next_page_clicked(self, b):
        self.current_page += 1
        self.display_recommended_games()

    def on_search_clicked(self, b):
        search_query = self.search_input.value
        with self.output_area:
            clear_output()
            print(f"Search results for '{search_query}':")
            for game in random.sample(self.games_db, 5):
                print(f'{game["id"]}: {game["name"]}')

    def on_like_clicked(self, b):
        print("Liked!")

    def on_dislike_clicked(self, b):
        print("Disliked!")

# ui = SteamGameRecommenderUI()
# ui.display()
