Let's import the dataframe and cosine matrix of the Qwen-3 model V2

In [1]:
import pandas as pd

In [2]:
import gc

In [5]:
Full_cosine_matrix = pd.read_pickle("Full_cosine_matrix_Qwen-3-6B-V2.pkl")

In [6]:
sg_df_clean = pd.read_csv("sg_df_clean.csv")

In [7]:
gc.collect()

0

In [8]:
sg_df_clean["rating_ratio"] = sg_df_clean["rating"]/100
print(sg_df_clean["rating_ratio"])

0        0.352941
1        0.913793
2        1.000000
3        0.862069
4        0.639706
           ...   
72366    0.808989
72367    0.681199
72368    1.000000
72369    1.000000
72370    0.666667
Name: rating_ratio, Length: 72371, dtype: float64


In [9]:
c = sg_df_clean["rating_ratio"].mean()
m= sg_df_clean["user_reviews"].median()

In [10]:
def weighted_game_score(x, c=c, m=m):
    r = x["rating_ratio"] #I am taking the rating_ratio for each game (row) and storing it as variable r
    n = x["user_reviews"]#I am taking the user_reviews for each game (row) and storing it as variable n
    return ((n*r) / (n+m) + (m*c) / (n+m))

In [11]:
sg_df_clean["game_score"] = sg_df_clean.apply(weighted_game_score, axis=1)

In [12]:
sg_df_clean[["name","game_score"]].sample(20)

Unnamed: 0,name,game_score
60089,Art of Golf,0.794876
18993,Space Rocks,0.803795
19054,Tunnel of Doom,0.69384
48405,人类VS怪物(Human vs Monster),0.724364
6168,Mystery of Neuschwanstein,0.790186
49891,Gunship Battle2 VR: Steam Edition,0.66088
44321,Rejoinder,0.716838
13833,Mad Age & This Guy,0.848256
24161,Ultimate Low Poly Pet,0.754314
71840,Liminal Lane,0.823981


In [13]:
def recommend_games(game, df = sg_df_clean, sim_matrix = Full_cosine_matrix):
    # 1. Find the game's index in the dataframe to use it in the similarity matrix
    try: 
        index = df[df['name'] == game].index[0]
    except IndexError:
        return "The game you typed does not exist in the database. Please make sure the spelling exactly matches the game on steam" #to give an error if the spelling is incorrect
    
    # 2. We create a temp dataframe to modify without changing the original
    temp_df = df.copy()
    
    # 3. We create a new column with the list of cosine similarities for the specified game
    temp_df['similarity'] = sim_matrix[index]
    
    # 4. FILTER (Get top 20 matches)
    # Sort by similarity (Descending)
    # iloc[1:21] grabs the top 20, skipping the game itself (which is the first one since it's the most similar)
    top_similar = temp_df.sort_values('similarity', ascending=False).iloc[1:21]
    
    # 4. RANK (Pick top 5 best quality)
    # Sort the 20 candidates by 'game_score' and pick the top 5
    top_picks = top_similar.sort_values('game_score', ascending=False).head(5)
    
    # 5. Return only the relevant columns
    cols = ['name', 'similarity', 'game_score']
    return top_picks[cols]



In [14]:
recommend_games(game = "Hogwarts Legacy")

Unnamed: 0,name,similarity,game_score
984,Gedonia,0.597085,0.900852
6764,Risen,0.595148,0.88661
52738,Map Of Materials,0.615564,0.883701
57089,Arto,0.629387,0.880158
41303,Horizon Zero Dawn™ Complete Edition,0.646263,0.861853


In [15]:
def rec_rating():
    a = recommend_games(game = "The Elder Scrolls V: Skyrim Special Edition")
    b = recommend_games(game = "DOOM Eternal")
    c = recommend_games(game = "Hollow Knight")
    d = recommend_games(game = "Hades")
    e = recommend_games(game = "ELDEN RING")
    return a, b, c, d, e


In [16]:
rec_rating()

(                                                    name  similarity  \
 5785   The Elder Scrolls III: Morrowind® Game of the ...    0.600299   
 9109                         The Elder Scrolls V: Skyrim    0.693637   
 3765                                Sid Meier's Pirates!    0.619610   
 63572           Horizon Forbidden West™ Complete Edition    0.596526   
 32158                   Middle-earth™: Shadow of Mordor™    0.593767   
 
        game_score  
 5785     0.953199  
 9109     0.948258  
 3765     0.941963  
 63572    0.938925  
 32158    0.922377  ,
                 name  similarity  game_score
 3401   Ultimate Doom    0.609788    0.964302
 57770       Trepang2    0.599833    0.954439
 7623            DOOM    0.603032    0.952628
 15523        DOOM 64    0.594878    0.938437
 29452      Shrine II    0.589017    0.930044,
                                   name  similarity  game_score
 31391      Silver Axe - The Honest Elf    0.606743    0.909746
 35409         Shantae: Half

In order, these are the scores for whether the recommendations are good or not: 
1) 3/5
2) 5/5
3) 5/5
4) 5/5
5) 4/5

Total = 22/25

This model doesnt seem better or worse than the previous version, it's just a bit different. 