# Natural Language Processing
Here we will explore only using features created by NLP

This feature will originate from game descriptions, and will be used to make a 'genre' on its own

In [194]:
import sys
import pandas as pd
import matplotlib.pyplot as plt

sys.path.append("/Users/antonis/code/Ant-mel/legendary_game_recs/")

import string
from nltk.corpus import stopwords
from nltk import word_tokenize
from nltk.stem import WordNetLemmatizer

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import LatentDirichletAllocation

from sklearn.preprocessing import MultiLabelBinarizer, PowerTransformer
from sklearn.compose import ColumnTransformer

from sklearn.neighbors import KNeighborsRegressor

from preprocessing.preprocess_2_features import keeping_ohe_columns_and_dropping_the_originals, keep_x_OHE_columns


In [133]:
def nlp_topic(x, vectorizer, lda_model):
    """
    Determines which topic the description belongs to
    Should be used to add topic to a column in the training data
    """
    vectorized_x = vectorizer.transform(pd.Series(x))
    vec_x_array = lda_model.transform(vectorized_x)
    vec_list = vec_x_array[0].tolist()

    return vec_list.index(max(vec_list))


In [121]:
def yeo_johnson_scaling(X_train, column):
    """
    This scales skewed data, and must be fed X_train and not the entire dataset
    Otherwise you looks visibility on your y, or just make it harder to find
    """
    num_transformer_yeo = PowerTransformer(method='yeo-johnson', standardize=False)

    col_transformer = ColumnTransformer([('num_transformer', num_transformer_yeo,
                                    [column])],
                                    remainder='passthrough')

    transfomed_X_train = pd.DataFrame(col_transformer.fit_transform(X_train))

    return transfomed_X_train


In [145]:
def preprocess_text(sentence):
    """
    Function to clean description text, tokenize and lemmatize words
    """

    # List of extra stopwords we decided do not help create new genres
    extended_stop_words = ['able','access','across','also','always','another','away',
                        'back','become','best','better','big','box','bring','certain',
                        'clear','close','come','console','content','could','course','digital',
                        'dont','one','two','three','four','five','six','seven','eight','nine',
                        'ten','hundred','thousand','either','enjoy','enough','even','exclusive',
                        'extra','feature','franchise','full','fully','fun','game','gameplay',
                        'genre','get','give','go','good','great','great','greatest','happen',
                        'however','huge','ii','improve','include','increase','inside',
                        'interactive','introduce','instead','involve','know','large','last',
                        'later','launch','lead','let','level','like','little','look','long',
                        'main','may','meet','might','mix','modern','mode','much','must','nan',
                        'name','new','next','need','number','nintendo','official','offer',
                        'object','option','order','original','originally','others','part','pc',
                        'perfect','platform','play','playable','player','playstation','plus',
                        'possible','port','prepare','previous','progress','project','publish',
                        'put','reach','ready','remain','return','screen','scroll','second',
                        'first','third','see','sega','sequel','series','set','several','show',
                        'side','similar','since','small','something','sound','special','start',
                        'stat','state','stay','still','studio','super','take','switch','tell',
                        'test','th','though','throughout','title','together','top','try',
                        'ultimate','unique','update','upon','us','use','version','via','wait',
                        'want','wii','within','without','would','xbox','youll','youre','youve']

    ## define stopwords
    stop_words = stopwords.words('english')
    stop_words.extend(extended_stop_words)

    # Basic cleaning
    sentence = sentence.strip() ## remove whitespaces
    sentence = sentence.lower() ## lowercase
    sentence = ''.join(char for char in sentence if not char.isdigit()) ## remove numbers

    # Advanced cleaning

    # remove punctuation
    for punctuation in string.punctuation:
        sentence = sentence.replace(punctuation, '')

    # tokenize
    tokenized_sentence = word_tokenize(sentence)

    # remove stopwords
    tokenized_sentence_cleaned = [
        w for w in tokenized_sentence if not w in stop_words]


    noun_lemmatized = [
        WordNetLemmatizer().lemmatize(word, pos = "n")
        for word in tokenized_sentence_cleaned]

    lemmatized = [
        WordNetLemmatizer().lemmatize(word, pos = "v")
        for word in noun_lemmatized]

    cleaned_sentence = ' '.join(word for word in lemmatized)

    return cleaned_sentence


In [146]:
def vectorize_text(data, column, min_df=0.02):
    vectorizer = TfidfVectorizer(min_df=min_df)

    # Fit transform on clean text
    vectorized_descriptions = vectorizer.fit_transform(data[column])

    # Create dataframe of vectorized descriptions
    vectorized_descriptions = pd.DataFrame(
        vectorized_descriptions.toarray(),
        columns = vectorizer.get_feature_names_out())

    return vectorizer, vectorized_descriptions


In [218]:
def create_nlp_topics_and_append(data, column_to_clean, origin_column, lda_components=30, total_features_to_make=None):
    # Create dataframe of vectorized descriptions
    vectorizer, vectorized_descriptions = vectorize_text(data, column_to_clean)

    # Instantiate LDA model
    lda_model = LatentDirichletAllocation(n_components=lda_components, max_iter = 20, learning_method='online')

    # Fit the LDA on the vectorized documents
    lda_model.fit(vectorized_descriptions)

    # Creating topics column, and encoding into dara
    data['topic'] = data[column_to_clean].apply(nlp_topic, vectorizer = vectorizer, lda_model = lda_model)
    topics = pd.get_dummies(data['topic'])

    if total_features_to_make == None:
        pass
    else:
        topics = keep_x_OHE_columns(topics, total_features_to_make)

    # Creating final df to be split into training data
    concatenated_df = data.drop(columns=[origin_column, column_to_clean, 'topic'], axis=1)
    final_df = pd.concat((concatenated_df, topics), axis=1)

    return final_df


In [219]:
def topics_from_nlp(data, column_with_text, number_of_topics_to_keep=None, lda_components=30):

    data['clean_text'] = data[column_with_text].apply(preprocess_text)
    topics_nlp_df = create_nlp_topics_and_append(data, column_to_clean='clean_text',
                                                 origin_column=column_with_text, total_features_to_make=number_of_topics_to_keep,
                                                 lda_components=lda_components)

    return topics_nlp_df


In [79]:
def print_topics(model, vectorizer):
    """
    Allows you to print topics to review performance
    """
    for idx, topic in enumerate(model.components_):
        print("Topic %d:" % (idx))
        print([(vectorizer.get_feature_names_out()[i], topic[i])
                        for i in topic.argsort()[:-10 - 1:-1]])


In [149]:
def model_predict(indicies, model, reference_data, X_train):
    game = X_train[indicies:indicies+1]

    ind_list = list(model.kneighbors(game,n_neighbors=10)[1][0])
    prediction = reference_data.iloc[ind_list]

    return prediction


Removing all columns, except descriptions. 

Later, we will add genre to the NLP features and see if anything improves

In [125]:
final_data = pd.read_json('../raw_data/final_data')

# Columns we don't need immediately
cols_to_drop = ['title', 'release_date', 'plays', 'playing', 'backlogs', 'wishlist',
       'developers', 'platforms', 'genres',
       'total_reviews', 'main', 'ratings_zero_five',
       'ratings_one_zero', 'ratings_one_five', 'ratings_two_zero',
       'ratings_two_five', 'ratings_three_zero', 'ratings_three_five',
       'ratings_four_zero', 'ratings_four_five', 'ratings_five_zero', 'url',
       'game_id']

final_useful_columns = final_data.drop(columns=cols_to_drop)

# descriptions were an onject, we need them as a string!
final_useful_columns['description'] = final_useful_columns['description'].astype(str)


In [129]:
# Preprocessing description text
final_useful_columns['clean_text'] = final_useful_columns['description'].apply(preprocess_text)


In [None]:
# Creating X and y train
final_training_data = topics_from_nlp(final_useful_columns, 'description')
X_train_only_topic = final_training_data.drop(columns='avg_review')
y_train_only_topic = final_training_data['avg_review']

# Scaling X_train
X_train_scaled = yeo_johnson_scaling(X_train_only_topic, 'total_lists')

# Instantiating the model
only_topic_knn = KNeighborsRegressor(n_neighbors=10).fit(X_train_scaled,
                                                       y_train_only_topic)


In [85]:
# Used to get game index to test model
final_data[(final_data['title'] == 'Call of Duty: Black Ops 4')
               | (final_data['title'] == 'The Legend of Zelda: Breath of the Wild')
              | (final_data['title'] == 'Omori')
              | (final_data['title'] == 'FIFA 21')
              | (final_data['title'] == 'Tetris')
              | (final_data['title'] == 'Sid Meier\'s Civilization V')
              | (final_data['title'] == 'Red Dead Redemption')
              | (final_data['title'] == 'Grand Theft Auto V')
                 | (final_data['title'] == 'Pikmin 4')
              | (final_data['title'] == 'The Sims 4')
              | (final_data['title'] == 'Animal Crossing: New Horizons')]


Unnamed: 0,title,release_date,plays,playing,backlogs,wishlist,developers,avg_review,genres,platforms,...,ratings_one_five,ratings_two_zero,ratings_two_five,ratings_three_zero,ratings_three_five,ratings_four_zero,ratings_four_five,ratings_five_zero,url,game_id
841,Red Dead Redemption,1692230400000,40,6,14,35,"[Double Eleven, Rockstar Games]",4.1,"[Adventure, RPG, Shooter]","[PlayStation 4, Nintendo Switch]",...,0,0,0,0,3,2,2,1,/games/red-dead-redemption--2/,260737
906,Pikmin 4,1689897600000,2100,573,1400,2300,[Nintendo],4.3,"[Adventure, Real Time Strategy, Strategy]",[Nintendo Switch],...,5,11,15,52,114,373,520,577,/games/pikmin-4/,59843
2583,Grand Theft Auto V,1655251200000,53,3,8,2,[],4.3,[],"[PlayStation 4, Xbox One, PlayStation 5, Xbox ...",...,0,0,0,1,1,9,9,8,/games/grand-theft-auto-v--2/,239064
5919,FIFA 21,1602115200000,1600,39,57,20,"[Electronic Arts, EA Vancouver]",2.5,"[Simulator, Sport]","[Windows PC, PlayStation 4, Xbox One, PlayStat...",...,90,176,211,255,114,70,18,12,/games/fifa-21/,134101
6988,Animal Crossing: New Horizons,1584576000000,27000,2100,1900,1500,"[Nintendo EPD, Nintendo]",3.6,[Simulator],[Nintendo Switch],...,189,629,1141,2833,3691,4224,1761,1417,/games/animal-crossing-new-horizons/,109462
12552,The Legend of Zelda: Breath of the Wild,1488499200000,44000,3500,6900,3600,"[Nintendo EPD Production Group No. 3, Nintendo]",4.4,"[Adventure, Puzzle, RPG]","[Wii U, Nintendo Switch]",...,131,367,563,1257,2163,5236,6908,13704,/games/the-legend-of-zelda-breath-of-the-wild/,7346
16815,The Sims 4,1409616000000,13000,666,1100,139,"[Electronic Arts, The Sims Studio]",3.1,"[RPG, Simulator]","[Windows PC, Mac, PlayStation 4, Xbox One, Pla...",...,202,617,952,1844,1355,938,206,224,/games/the-sims-4/,3212
21323,Sid Meier's Civilization V,1285027200000,7300,70,506,179,"[2K Games, Firaxis Games]",4.0,"[Simulator, Strategy, Turn Based Strategy]","[Windows PC, Mac, Linux]",...,20,50,102,376,683,1485,754,570,/games/sid-meier-s-civilization-v/,866
36255,Omori,1608854400000,11000,1400,6700,5200,"[OMOCAT, PLAYISM]",4.1,"[Adventure, Indie, RPG, Turn Based Strategy]","[Windows PC, Mac, PlayStation 4, Nintendo 3DS,...",...,104,191,254,491,766,1552,1587,3009,/games/omori/,26673
36689,Call of Duty: Black Ops 4,1539302400000,5100,22,414,145,"[Treyarch, Activision]",2.3,[Shooter],"[Windows PC, PlayStation 4, Xbox One]",...,360,598,554,607,296,176,45,23,/games/call-of-duty-black-ops-4/,83727


In [89]:
model_predict(12552, only_topic_knn, final_data, X_train_scaled)


Unnamed: 0,title,release_date,plays,playing,backlogs,wishlist,developers,avg_review,genres,platforms,...,ratings_one_five,ratings_two_zero,ratings_two_five,ratings_three_zero,ratings_three_five,ratings_four_zero,ratings_four_five,ratings_five_zero,url,game_id
12552,The Legend of Zelda: Breath of the Wild,1488499200000,44000,3500,6900,3600,"[Nintendo EPD Production Group No. 3, Nintendo]",4.4,"[Adventure, Puzzle, RPG]","[Wii U, Nintendo Switch]",...,131,367,563,1257,2163,5236,6908,13704,/games/the-legend-of-zelda-breath-of-the-wild/,7346
3112,Elden Ring,1645747200000,28000,4800,7600,6600,"[FromSoftware, Bandai Namco Entertainment]",4.5,"[Adventure, RPG]","[Windows PC, PlayStation 4, Xbox One, PlayStat...",...,66,193,270,703,1207,3133,5152,10720,/games/elden-ring/,119133
15954,Bloodborne,1427155200000,24000,1500,8300,4800,"[FromSoftware, Sony Computer Entertainment]",4.5,"[Adventure, RPG]",[PlayStation 4],...,67,186,207,492,851,2304,3446,9048,/games/bloodborne/,7334
32679,Super Metroid,764035200000,14000,394,4000,1100,"[Playtronic, Nintendo R&D1]",4.3,"[Adventure, Platform, Shooter]","[Wii U, Super Famicom, SNES, Wii, New Nintendo...",...,26,76,137,443,884,2092,2092,2968,/games/super-metroid/,1103
11529,Cuphead,1506643200000,27000,1600,6800,3500,[Studio MDHR],4.1,"[Arcade, Indie, Platform, Shooter]","[Windows PC, Mac, PlayStation 4, Xbox One, Nin...",...,69,178,308,1186,2364,5561,3855,3128,/games/cuphead/,9061
8281,Fire Emblem: Three Houses,1564012800000,16000,1300,4700,2000,"[Intelligent Systems, Nintendo]",4.0,"[Adventure, RPG, Strategy, Tactical, Turn Base...",[Nintendo Switch],...,67,218,345,887,1582,3002,2262,1697,/games/fire-emblem-three-houses/,26845
7601,Star Wars Jedi: Fallen Order,1573776000000,21000,1000,5700,1700,"[Respawn Entertainment, Electronic Arts]",3.7,[Adventure],"[Windows PC, PlayStation 4, Xbox One, PlayStat...",...,132,347,600,1871,3583,4650,1642,701,/games/star-wars-jedi-fallen-order/,74701
2384,Xenoblade Chronicles 3,1659052800000,6100,921,3600,3100,"[Nintendo, Monolith Soft]",4.4,"[Adventure, RPG]",[Nintendo Switch],...,29,78,93,225,301,633,986,2160,/games/xenoblade-chronicles-3/,191411
28632,Halo: Combat Evolved,1005782400000,15000,235,1700,572,"[MacSoft Games, Bungie]",3.8,[Shooter],"[Windows PC, Mac, Xbox, Xbox 360]",...,89,244,390,1196,1820,2570,1297,1305,/games/halo-combat-evolved/,740
3785,Inscryption,1634601600000,9000,793,3900,3100,"[Daniel Mullins Games, Devolver Digital]",4.1,"[Adventure, Card & Board Game, Indie, Puzzle, ...","[Windows PC, Mac, Linux, PlayStation 4, Xbox O...",...,16,79,131,408,743,1687,1460,1385,/games/inscryption/,139090


In [90]:
model_predict(5919, only_topic_knn, final_data, X_train_scaled)


Unnamed: 0,title,release_date,plays,playing,backlogs,wishlist,developers,avg_review,genres,platforms,...,ratings_one_five,ratings_two_zero,ratings_two_five,ratings_three_zero,ratings_three_five,ratings_four_zero,ratings_four_five,ratings_five_zero,url,game_id
20602,Brink,1304985600000,1600,2,161,38,"[Splash Damage, Bethesda Softworks]",1.8,[Shooter],"[Windows PC, Xbox 360, PlayStation 3]",...,147,195,114,83,51,11,4,9,/games/brink/,502
5919,FIFA 21,1602115200000,1600,39,57,20,"[Electronic Arts, EA Vancouver]",2.5,"[Simulator, Sport]","[Windows PC, PlayStation 4, Xbox One, PlayStat...",...,90,176,211,255,114,70,18,12,/games/fifa-21/,134101
29880,WinBack: Covert Operations,943056000000,204,10,161,64,"[Omega Force, Koei]",3.1,"[Shooter, Strategy]","[Nintendo 64, PlayStation 2]",...,2,12,14,23,25,16,5,4,/games/winback-covert-operations/,47680
21189,Grand Theft Auto IV: Complete Edition,1288051200000,948,59,349,70,"[Rockstar North, Rockstar Games]",4.2,"[Adventure, Shooter]","[Windows PC, Xbox 360, PlayStation 3]",...,2,5,11,33,57,154,109,134,/games/grand-theft-auto-iv-complete-edition/,27912
36882,Absolver,1503964800000,932,7,260,58,"[Sloclap, Devolver Digital]",2.9,"[Adventure, Fighting, Indie, RPG]","[Windows PC, Mac, Linux, PlayStation 4, Xbox One]",...,32,75,101,134,118,47,11,5,/games/absolver/,19333
27175,F-Zero: GP Legend,1069977600000,359,7,158,66,[Suzak],3.3,[Racing],"[Wii U, Game Boy Advance]",...,1,10,22,53,56,25,8,11,/games/f-zero-gp-legend/,3493
21557,Transformers: War for Cybertron,1277164800000,1300,14,245,151,"[High Moon Studios, Activision]",3.6,"[Adventure, Shooter]","[Windows PC, Xbox 360, PlayStation 3]",...,8,23,50,133,246,188,72,51,/games/transformers-war-for-cybertron/,555
32020,Mega Man: The Power Battle,812505600000,608,1,66,30,[Capcom],3.1,[Fighting],[Arcade],...,4,23,55,139,89,38,6,10,/games/mega-man-the-power-battle/,1724
35240,Deus Ex Machina,465436800000,84,3,133,171,"[Automata U.K. Limited, Nu Wave UK]",3.7,"[Indie, Music, Puzzle, Simulator]","[Commodore C64/128, ZX Spectrum, MSX]",...,0,1,1,10,13,24,10,3,/games/deus-ex-machina/,26432
9600,Project Warlock,1539820800000,429,12,304,77,"[Buckshot Software, gaming company]",3.2,"[Adventure, Indie, Shooter]","[Windows PC, PlayStation 4, Xbox One, Nintendo...",...,3,27,36,81,79,60,21,3,/games/project-warlock/,105738


In [91]:
model_predict(36255, only_topic_knn, final_data, X_train_scaled)


Unnamed: 0,title,release_date,plays,playing,backlogs,wishlist,developers,avg_review,genres,platforms,...,ratings_one_five,ratings_two_zero,ratings_two_five,ratings_three_zero,ratings_three_five,ratings_four_zero,ratings_four_five,ratings_five_zero,url,game_id
7643,Death Stranding,1573171200000,14000,1300,7800,3000,"[Sony Interactive Entertainment, Kojima Produc...",3.9,"[Adventure, RPG, Shooter]","[Windows PC, PlayStation 4]",...,114,292,334,809,1193,2132,1707,1867,/games/death-stranding/,19564
36255,Omori,1608854400000,11000,1400,6700,5200,"[OMOCAT, PLAYISM]",4.1,"[Adventure, Indie, RPG, Turn Based Strategy]","[Windows PC, Mac, PlayStation 4, Nintendo 3DS,...",...,104,191,254,491,766,1552,1587,3009,/games/omori/,26673
19706,Journey,1331596800000,17000,121,3900,2400,"[Sony Computer Entertainment, ThatGameCompany]",3.9,"[Adventure, Platform]","[Windows PC, PlayStation 4, iOS, PlayStation 3]",...,77,291,420,1188,1660,2769,1497,2037,/games/journey/,1352
14046,Uncharted 4: A Thief's End,1462838400000,20000,240,3900,1700,"[Naughty Dog, Sony Interactive Entertainment]",4.1,"[Adventure, Shooter]",[PlayStation 4],...,56,182,263,868,1724,3856,3272,2899,/games/uncharted-4-a-thief-s-end/,7331
4965,It Takes Two,1616630400000,13000,1400,3700,2900,"[Hazelight Studios, Electronic Arts]",4.2,"[Adventure, Platform, Puzzle]","[Windows PC, PlayStation 4, Xbox One, PlayStat...",...,44,90,134,507,999,2293,2199,2234,/games/it-takes-two/,135243
19228,Borderlands 2,1347926400000,23000,523,4200,727,"[Gearbox Software, 2K Games]",3.6,"[RPG, Shooter]","[Windows PC, Android, Mac, Xbox 360, Linux, Pl...",...,269,657,947,2084,2570,3262,1702,1579,/games/borderlands-2/,1011
18962,Far Cry 3,1354060800000,21000,267,3000,987,"[Ubisoft Entertainment, Ubisoft Montreal]",3.8,"[Adventure, Shooter]","[Windows PC, Xbox 360, PlayStation 3]",...,93,276,584,1661,2968,3675,1771,1229,/games/far-cry-3/,529
3021,Tunic,1647388800000,5600,619,4100,3900,"[Andrew Shouldice, Finji]",3.9,"[Adventure, Indie, Puzzle, RPG]","[Windows PC, Mac, PlayStation 4, Xbox One, Pla...",...,22,100,138,394,553,978,692,630,/games/tunic/,23733
24082,Uncharted: Drake's Fortune,1195171200000,17000,115,1800,654,"[Sony Computer Entertainment, Naughty Dog]",3.1,"[Adventure, Platform, Shooter]",[PlayStation 3],...,273,882,1358,3050,2505,1477,315,266,/games/uncharted-drake-s-fortune/,431
4031,Deathloop,1631577600000,6600,477,3800,2400,"[Arkane Studios, Bethesda Softworks]",3.4,"[Adventure, Shooter]","[Windows PC, PlayStation 5, Xbox Series]",...,93,292,350,769,1040,1089,454,211,/games/deathloop/,113598


## Evaluation
Predictions for Breath of the Wild are identical with only genres without scaling, and is much improved after scaling
Predicitons for Fifa 21 are worse, with Fifa 21 not even being the closest neighbour to itself!!
Predictions for Omori are better, but could still be improved as Omori also isn't first to itself!!

## Mixing NLP Topics with Genres

In [208]:
def drop_column_and_concat(data, new_columns, column_to_drop):
    new_df = pd.concat((data, new_columns), axis=1)
    new_df.drop(columns=column_to_drop, inplace=True)

    return new_df

ohe_added_to_data = drop_column_and_concat(final_useful_columns, ohe_columns, 'genres')

ohe_and_topics = topics_from_nlp(ohe_added_to_data, 'description', 10)


In [221]:
def ohe_and_nlp(data, ohe_column_name, n_ohe_features, nlp_column_name, n_nlp_features):
    ohe_columns = keeping_ohe_columns_and_dropping_the_originals(data[[ohe_column_name]], n_ohe_features)

    ohe_added_to_data = drop_column_and_concat(data, ohe_columns, ohe_column_name)

    ohe_and_topics = topics_from_nlp(ohe_added_to_data, nlp_column_name, n_nlp_features)

    return ohe_and_topics


In [213]:
def train_model(X_train, y_train, neighbours=10):
    model = KNeighborsRegressor(n_neighbors=neighbours).fit(X_train, y_train)

    return model


In [220]:
final_data = pd.read_json('../raw_data/final_data')

# Columns we don't need immediately
cols_to_drop = ['title', 'release_date', 'plays', 'playing', 'backlogs', 'wishlist',
       'developers', 'platforms',
       'total_reviews', 'main', 'ratings_zero_five',
       'ratings_one_zero', 'ratings_one_five', 'ratings_two_zero',
       'ratings_two_five', 'ratings_three_zero', 'ratings_three_five',
       'ratings_four_zero', 'ratings_four_five', 'ratings_five_zero', 'url',
       'game_id']

final_useful_columns = final_data.drop(columns=cols_to_drop)

# descriptions were an onject, we need them as a string!
final_useful_columns['description'] = final_useful_columns['description'].astype(str)


In [224]:
ohe_and_topics = ohe_and_nlp(final_useful_columns, 'genres', 20, 'description', 10)




In [225]:
X_train = yeo_johnson_scaling(ohe_and_topics.drop('avg_review', axis=1), 'total_lists')
y_train = ohe_and_topics['avg_review']

topic_and_genre_model = train_model(X_train, y_train)




In [215]:
model_predict(12552, topic_and_genre_model, final_data, X_train)


Unnamed: 0,title,release_date,plays,playing,backlogs,wishlist,developers,avg_review,genres,platforms,...,ratings_one_five,ratings_two_zero,ratings_two_five,ratings_three_zero,ratings_three_five,ratings_four_zero,ratings_four_five,ratings_five_zero,url,game_id
12552,The Legend of Zelda: Breath of the Wild,1488499200000,44000,3500,6900,3600,"[Nintendo EPD Production Group No. 3, Nintendo]",4.4,"[Adventure, Puzzle, RPG]","[Wii U, Nintendo Switch]",...,131,367,563,1257,2163,5236,6908,13704,/games/the-legend-of-zelda-breath-of-the-wild/,7346
19153,Dishonored,1349740800000,20000,382,4900,1200,"[Arkane Studios, Bethesda Softworks]",4.0,"[Adventure, Puzzle, RPG]","[Windows PC, Xbox 360, PlayStation 3]",...,50,208,379,1135,2131,3753,2218,1877,/games/dishonored/,533
30494,The Legend of Zelda: Ocarina of Time,911606400000,24000,682,4600,1900,"[iQue, Nintendo EAD]",4.3,"[Adventure, RPG]","[Wii U, Wii, Nintendo 64]",...,52,131,282,835,1468,3065,2862,5250,/games/the-legend-of-zelda-ocarina-of-time/,1029
33395,Kaeru no Tame ni Kane wa Naru,719020800000,245,11,243,163,[Nintendo],3.9,"[Adventure, Puzzle, RPG]","[Nintendo 3DS, Game Boy]",...,2,1,6,13,36,50,26,25,/games/kaeru-no-tame-ni-kane-wa-naru/,49078
27827,The Legend of Zelda: The Wind Waker,1039737600000,15000,337,4000,1900,"[Nintendo EAD Software Development Group No.3,...",4.2,"[Adventure, Puzzle, RPG]",[Nintendo GameCube],...,24,92,118,439,942,2282,2144,2513,/games/the-legend-of-zelda-the-wind-waker/,1033
28345,Kingdom Hearts,1017273600000,16000,377,3100,1400,"[Square, Disney Interactive Studios]",3.7,"[Adventure, RPG]",[PlayStation 2],...,115,331,476,1359,2031,2517,1102,1124,/games/kingdom-hearts/,1219
25654,Kingdom Hearts II,1135209600000,13000,172,2300,918,"[Buena Vista Games, Square Enix]",4.3,"[Adventure, RPG]",[PlayStation 2],...,37,103,133,353,650,1717,1623,2640,/games/kingdom-hearts-ii/,1221
7794,Disco Elysium,1571097600000,6200,732,4300,2400,[ZA/UM],4.5,"[Adventure, RPG]","[Windows PC, Mac]",...,10,33,39,76,143,431,755,2256,/games/disco-elysium/,26472
26948,Katamari Damacy,1079568000000,5300,86,1300,983,"[Keita Takahashi, Namco]",4.2,"[Adventure, Puzzle]","[PlayStation 3, PlayStation 2]",...,11,24,31,162,300,835,621,957,/games/katamari-damacy/,6453
24496,The Legend of Zelda: Phantom Hourglass,1182556800000,8900,134,2400,824,"[Nintendo, Nintendo EAD Software Development G...",3.3,"[Adventure, Puzzle, RPG]","[Wii U, Nintendo DS]",...,98,316,507,1184,1138,845,250,185,/games/the-legend-of-zelda-phantom-hourglass/,1037


In [216]:
model_predict(5919, topic_and_genre_model, final_data, X_train)


Unnamed: 0,title,release_date,plays,playing,backlogs,wishlist,developers,avg_review,genres,platforms,...,ratings_one_five,ratings_two_zero,ratings_two_five,ratings_three_zero,ratings_three_five,ratings_four_zero,ratings_four_five,ratings_five_zero,url,game_id
5919,FIFA 21,1602115200000,1600,39,57,20,"[Electronic Arts, EA Vancouver]",2.5,"[Simulator, Sport]","[Windows PC, PlayStation 4, Xbox One, PlayStat...",...,90,176,211,255,114,70,18,12,/games/fifa-21/,134101
11731,Everybody's Golf,1503964800000,337,9,95,42,"[Clap Hanz, Sony Interactive Entertainment]",3.7,"[Simulator, Sport]",[PlayStation 4],...,1,5,7,39,45,66,19,16,/games/everybodys-golf--1/,28187
2187,NBA 2K23,1662595200000,550,81,120,16,"[Visual Concepts, 2K Games]",2.7,"[Simulator, Sport]","[Windows PC, PlayStation 4, Xbox One, PlayStat...",...,25,52,54,108,62,42,5,8,/games/nba-2k23/,207393
14952,WWE 2K16,1445904000000,923,4,55,19,"[2K, Visual Concepts]",3.0,"[Simulator, Sport]","[Windows PC, Xbox 360, PlayStation 4, PlayStat...",...,31,51,94,194,119,67,20,13,/games/wwe-2k16/,11057
6189,PGA Tour 2K21,1597881600000,212,10,181,8,"[HB Studios, 2K]",2.9,"[Simulator, Sport]","[Windows PC, PlayStation 4, Xbox One, Nintendo...",...,5,13,34,32,34,16,3,1,/games/pga-tour-2k21/,133939
7580,Football Manager 2020,1574121600000,425,14,205,5,[Sports Interactive],3.7,"[Simulator, Sport]","[Windows PC, Google Stadia]",...,6,5,20,40,68,106,27,24,/games/football-manager-2020/,122080
34143,Pilotwings,661737600000,920,6,224,65,"[Playtronic, Nintendo]",3.0,"[Simulator, Sport]","[Wii U, Super Famicom, SNES, Wii, New Nintendo...",...,16,38,72,152,92,49,6,13,/games/pilotwings/,9619
1318,WWE 2K23,1679011200000,306,72,39,53,"[2K Games, Visual Concepts]",3.5,"[Simulator, Sport]","[Windows PC, PlayStation 4, Xbox One, PlayStat...",...,2,5,19,39,81,68,19,8,/games/wwe-2k23/,233028
9814,NBA 2K19,1536624000000,744,5,35,7,"[Visual Concepts, 2K Sports]",2.7,"[Simulator, Sport]","[Windows PC, PlayStation 4, iOS, Xbox One, Nin...",...,38,73,80,103,77,54,12,6,/games/nba-2k19/,103218
2803,Nintendo Switch Sports,1651190400000,2000,105,277,393,[Nintendo],3.0,"[Simulator, Sport]",[Nintendo Switch],...,40,112,158,376,245,115,29,17,/games/nintendo-switch-sports/,191408


In [217]:
model_predict(36255, topic_and_genre_model, final_data, X_train)


Unnamed: 0,title,release_date,plays,playing,backlogs,wishlist,developers,avg_review,genres,platforms,...,ratings_one_five,ratings_two_zero,ratings_two_five,ratings_three_zero,ratings_three_five,ratings_four_zero,ratings_four_five,ratings_five_zero,url,game_id
36255,Omori,1608854400000,11000,1400,6700,5200,"[OMOCAT, PLAYISM]",4.1,"[Adventure, Indie, RPG, Turn Based Strategy]","[Windows PC, Mac, PlayStation 4, Nintendo 3DS,...",...,104,191,254,491,766,1552,1587,3009,/games/omori/,26673
15176,Undertale,1442275200000,39000,907,7400,2600,"[tobyfox, 8-4]",4.3,"[Adventure, Indie, RPG, Turn Based Strategy]","[Windows PC, Mac, Linux, PlayStation 4, Xbox O...",...,175,400,540,1492,2521,5560,5488,9709,/games/undertale/,12517
813,Sea of Stars,1693180800000,1200,960,1900,2800,[Sabotage],3.9,"[Adventure, Indie, RPG, Turn Based Strategy]","[Windows PC, PlayStation 4, Xbox One, PlayStat...",...,5,29,47,75,128,239,177,172,/games/sea-of-stars/,131890
7563,Bug Fables: The Everlasting Sapling,1574294400000,1500,179,1600,704,"[DANGEN Entertainment, Moonsprout Games]",4.1,"[Adventure, Indie, RPG, Turn Based Strategy]","[Windows PC, PlayStation 4, Xbox One, Nintendo...",...,5,18,20,74,95,222,220,208,/games/bug-fables-the-everlasting-sapling/,119190
5923,Ikenfell,1602115200000,299,21,315,226,"[Chevy Ray Johnston, Humble Games]",3.7,"[Adventure, Indie, RPG, Turn Based Strategy]","[Windows PC, Mac, PlayStation 4, Xbox One, Nin...",...,2,7,12,25,39,56,27,24,/games/ikenfell/,23873
36514,Grimm's Hollow,1572480000000,647,19,384,109,[ghosthunter],3.6,"[Adventure, Indie, RPG, Turn Based Strategy]",[Windows PC],...,2,8,31,103,125,119,40,34,/games/grimms-hollow/,125110
7284,Temtem,1579564800000,753,98,396,316,"[CremaGames, Humble Bundle]",3.0,"[Adventure, Indie, RPG, Turn Based Strategy]","[Windows PC, PlayStation 4, Xbox One, PlayStat...",...,16,58,60,110,78,71,21,10,/games/temtem/,100357
3255,Pokémon Legends: Arceus,1643328000000,14000,1800,3900,2700,"[Nintendo, Game Freak]",3.7,"[Adventure, RPG, Turn Based Strategy]",[Nintendo Switch],...,121,277,492,1195,1909,2627,1390,779,/games/pokemon-legends-arceus/,144054
13424,Persona 5,1473897600000,21000,772,3600,2100,"[Deep Silver, P Studio]",4.1,"[Adventure, RPG, Turn Based Strategy]","[PlayStation 4, PlayStation 3]",...,125,300,454,1067,1717,3420,3092,3687,/games/persona-5/,9927
7306,Yakuza: Like a Dragon,1579132800000,8600,1100,6900,3600,"[Sega, Ryū Ga Gotoku Studios]",4.3,"[Adventure, RPG, Turn Based Strategy]","[Windows PC, PlayStation 4, Xbox One, PlayStat...",...,45,103,129,315,515,1308,1635,2077,/games/yakuza-like-a-dragon/,36550


# Conclusion

Model is performing better, as sport games are being reccomended for FIFA, and Horror/Dark games are being recommended for Omori
However, Breath of The Wild Still Brings back a lot of Zelda games, that vary a lot in style. Despite being the same genre and basic storyline. 

## Next steps

Try get more data to suppliment the model. There is a free API called IGDB that we will use. 