# Supervised user based collaborative filtering

## Import Libraries

In [1]:
# Standard library imports
import os # allows access to OS-dependent functionalities
import re #  regular expression matching operations similar to those found in Perl
import sys # to manipulate different parts of the Python runtime environment
import warnings # is used to display the message Warning
import pickle # serializing and deserializing a Python object structure.

# Third party libraries
from fastparquet import write # parquet format, aiming integrate into python-based big data work-flows
from fuzzywuzzy import fuzz # used for string matching

import numpy as np # functions for working in domain of linear algebra, fourier transform, matrices and arrays
import pandas as pd # data analysis and manipulation tool
import joblib # set of tools to provide lightweight pipelining in Python

# visualization
import matplotlib.pyplot as plt # collection of functions that make matplotlib work like MATLAB.

# Surprise libraries
from surprise import Dataset, Reader, accuracy
from surprise.model_selection import GridSearchCV, train_test_split, cross_validate
from surprise import SVD, SVDpp, SlopeOne, NMF, NormalPredictor, KNNBaseline, KNNBasic, KNNWithMeans, KNNWithZScore, BaselineOnly, CoClustering

# pip install git+https://github.com/NicolasHug/surprise.git

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', None)

# Utils libraries
from utils import cleaning
from utils import recommend
from utils import testing
from utils import training


#Preparing folder variables
os.chdir(os.path.dirname(sys.path[0])) # This command makes the notebook the main path and can work in cascade.
main_folder = sys.path[0]
data_folder = (main_folder + "/" + "data")
saved_models_folder = (data_folder + "/" + "saved_models")
raw_data = (data_folder + "/" + "_raw")
processed_data = (data_folder + "/" + "processed")
user_based_unsupervised_data = (data_folder + "/" + "processed" + "/" + "_user_based_unsupervised")
content_based_unsupervised_data = (data_folder  + "/" + "processed" + "/" + "content_based_unsupervised")
content_based_supervised_data = (data_folder + "/" + "processed" + "/" + "content_based_supervised")



ModuleNotFoundError: No module named 'utils'

## Loading and cleaning data

In [17]:
# loading the data
anime = cleaning.anime()
rating = cleaning.rating()

In [18]:
rating.head()

Unnamed: 0,user_id,anime_id,rating
0,1,20,-1
1,1,24,-1
2,1,79,-1
3,1,226,-1
4,1,241,-1


In [19]:
rating.shape

(7813737, 3)

In [20]:
rating.describe()

Unnamed: 0,user_id,anime_id,rating
count,7813737.0,7813737.0,7813737.0
mean,36727.96,8909.072,6.14403
std,20997.95,8883.95,3.7278
min,1.0,1.0,-1.0
25%,18974.0,1240.0,6.0
50%,36791.0,6213.0,7.0
75%,54757.0,14093.0,9.0
max,73516.0,34519.0,10.0


In [21]:
# Cleaning the data
ratingdf = cleaning.supervised_rating_cleaning(rating)

## Preparing the data to try different models

In [None]:
#def supervised_prepare_training(ratingdf):
#    # using groupby and some fancy logic
#    reader = Reader(rating_scale=(1,10))
#    data = Dataset.load_from_df(ratingdf[['user_id', 'anime_id', 'rating']], reader)
#
#    # Saving the file to pickle
#    joblib.dump(data,processed_data + "/" + "data_reader_all.pkl")
#
#    size = 100000
#    rating_sample = ratingdf.groupby("rating", group_keys=False).apply(lambda x: x.sample(int(np.rint(size*len(x)/len(ratingdf))))).sample(frac=1).reset_index(drop=True)
#    
#    reader = Reader(rating_scale=(1,10))
#    data_sample = Dataset.load_from_df(rating_sample[['user_id', 'anime_id', 'rating']], reader)
#
#    # Saving the table to pickle
#    joblib.dump(data_sample,processed_data + "/" + "data_reader_sample.pkl")
#
#    return data_sample
#supervised_prepare_training(ratingdf)

In [22]:
data_sample = cleaning.supervised_prepare_training(ratingdf)

## Metrics all together

In [None]:
def baseline_all(data):
    
    benchmark = []
    # Iterate over all algorithms
    svd = SVD()
    svdp = SVDpp()
    slpo = SlopeOne()
    nm  = NMF()
    nmlp = NormalPredictor()
    #knnbase = KNNBaseline()
    #knnb = KNNBasic()
    #knnmean = KNNWithMeans()
    #knnzs = KNNWithZScore()
    baseonly = BaselineOnly()
    coclus = CoClustering()

    for algorithm in [svd,svdp,slpo,nm,nmlp,baseonly,coclus]:
        benchmark_inndividual = []
        print(algorithm,"started")
        # Perform cross validation
        results = cross_validate(algorithm, data, measures=['RMSE','MSE','MAE','FCP'], cv=3, verbose=False)
        print(algorithm,"finished")
        # Get results & append algorithm name
        tmp = pd.DataFrame.from_dict(results).mean(axis=0)
        name = str(algorithm).split(' ')[0].split('.')[-1]
        tmp = tmp.append(pd.Series([str(algorithm).split(' ')[0].split('.')[-1]], index=['Algorithm']))
        benchmark_inndividual.append(tmp)
        benchmark.append(tmp)
        
        dfscores_individual = pd.DataFrame(benchmark_inndividual).set_index('Algorithm').sort_values('test_rmse')
        write(saved_models_folder + "/" + name + "_results.parq", dfscores_individual)
    dfscores = pd.DataFrame(benchmark).set_index('Algorithm').sort_values('test_rmse')
    write(saved_models_folder + "/" + "Others_Models_results.parq", dfscores)

    return dfscores

In [None]:
baseline_all(data_sample)

## Merge df resutls

In [None]:
df_others_results = pd.read_parquet(saved_models_folder + "/" + "Others_Models_results.parq", engine='fastparquet')
df_others_results.head(10)

Unnamed: 0_level_0,test_rmse,test_mse,test_mae,test_fcp,fit_time,test_time
Algorithm,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
SVD,1.410689,1.990173,1.091799,0.54983,1.127742,0.291177
SVDpp,1.413802,1.998886,1.098086,0.548991,1.091193,0.738526
BaselineOnly,1.423057,2.025098,1.101095,0.555097,0.32587,0.144077
CoClustering,1.582529,2.504417,1.207506,0.566946,4.946316,0.224878
SlopeOne,1.701391,2.894736,1.294262,0.458307,0.578457,0.31063
NormalPredictor,2.139694,4.578297,1.696845,0.4974,0.116917,0.156717
NMF,2.499246,6.246258,2.119292,0.553087,3.48473,0.298785


In [None]:
df_KNNBasic_results = pd.read_parquet(saved_models_folder + "/" + "KNNBasic_results.parq", engine='fastparquet')
df_KNNBasic_results.head(10)

Unnamed: 0_level_0,test_rmse,test_mse,test_mae,test_fcp,fit_time,test_time
Algorithm,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
KNNBasic,1.643655,2.701632,1.275451,0.462612,35.4829,1.732444


In [None]:
df_KNNBaseline_results = pd.read_parquet(saved_models_folder + "/" + "KNNBaseline_results.parq", engine='fastparquet')
df_KNNBaseline_results.head(10)

Unnamed: 0_level_0,test_rmse,test_mse,test_mae,test_fcp,fit_time,test_time
Algorithm,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
KNNBaseline,1.498532,2.245621,1.156121,0.53585,32.409142,1.71859


In [None]:
df_knn_results = pd.read_parquet(saved_models_folder + "/" + "KNN_Models_results.parq", engine='fastparquet')
df_knn_results.head(10)

Unnamed: 0_level_0,test_rmse,test_mse,test_mae,test_fcp,fit_time,test_time
Algorithm,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
KNNWithMeans,1.65163,2.727919,1.25824,0.466116,31.944902,1.796209
KNNWithZScore,1.666752,2.778087,1.267275,0.468789,36.883558,2.03843


In [None]:
vertical_concat = pd.concat([df_others_results, df_KNNBasic_results,df_KNNBaseline_results,df_knn_results], axis=0)

In [None]:
vertical_concat.head(20)

Unnamed: 0_level_0,test_rmse,test_mse,test_mae,test_fcp,fit_time,test_time
Algorithm,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
SVD,1.410689,1.990173,1.091799,0.54983,1.127742,0.291177
SVDpp,1.413802,1.998886,1.098086,0.548991,1.091193,0.738526
BaselineOnly,1.423057,2.025098,1.101095,0.555097,0.32587,0.144077
CoClustering,1.582529,2.504417,1.207506,0.566946,4.946316,0.224878
SlopeOne,1.701391,2.894736,1.294262,0.458307,0.578457,0.31063
NormalPredictor,2.139694,4.578297,1.696845,0.4974,0.116917,0.156717
NMF,2.499246,6.246258,2.119292,0.553087,3.48473,0.298785
KNNBasic,1.643655,2.701632,1.275451,0.462612,35.4829,1.732444
KNNBaseline,1.498532,2.245621,1.156121,0.53585,32.409142,1.71859
KNNWithMeans,1.65163,2.727919,1.25824,0.466116,31.944902,1.796209


In [None]:
listatests =  ["test_rmse","test_mse","test_mae","test_fcp"]
for i in listatests:
    print ("the best result in",i,"is",vertical_concat.iloc[vertical_concat[i].argmin(), 0:1])

the best result in test_rmse is test_rmse    1.410689
Name: SVD, dtype: float64
the best result in test_mse is test_rmse    1.410689
Name: SVD, dtype: float64
the best result in test_mae is test_rmse    1.410689
Name: SVD, dtype: float64
the best result in test_fcp is test_rmse    1.701391
Name: SlopeOne, dtype: float64


## Evaluation selected model SVD

In [16]:
def evaluate_svd_model(data_sample):
    param_grid = {'n_factors':[50,100,150],'n_epochs':[20,30],  'lr_all':[0.005,0.01],'reg_all':[0.02,0.1]}
    gs = GridSearchCV(SVD, param_grid, measures=["rmse", "mae"], cv=3)

    gs.fit(data_sample)
    params = gs.best_params['rmse']

    # best RMSE score
    print(gs.best_score["rmse"])

    # combination of parameters that gave the best RMSE score
    print(gs.best_params["rmse"])

    # We can now use the algorithm that yields the best rmse:
    algo = gs.best_estimator["rmse"]
    algo.fit(data_sample.build_full_trainset())

    # # Serialización del modelo
    joblib.dump(algo,saved_models_folder + "/" + "SVD_samople_fit.pkl")

    results_df = pd.DataFrame.from_dict(gs.cv_results)
    
    return results_df


In [None]:
testing.evaluate_svd_model(data_sample)

In [3]:
## Recomendacion by user Id and how many results
def reco_by_user(id,n):
    que_user = id
    chosen_user = cleaning.final_df()

    chosen_user['Estimate_Score'] = chosen_user['anime_id'].apply(lambda x: algo.predict(que_user, x).est)

    chosen_user = chosen_user.drop('anime_id', axis = 1)

    chosen_user = chosen_user.sort_values('Estimate_Score', ascending=False)


    to_return = chosen_user.head(n)

    return to_return

reco_by_user(208,20)

NameError: name 'algo' is not defined

In [21]:
def anime():
    anime = pd.read_csv(raw_data + "/" + "anime.csv")
    return anime

In [2]:
from utils import recommend
recommend.reco_by_user(675,50,"Supernatural","All") #reco_by_user(1000,50,"Supernatural","All")

AttributeError: 'NoneType' object has no attribute 'copy'

In [7]:
import pandas as pd
def map_it():
    anime = pd.read_csv(raw_data + "/" + "anime.csv")
    anime_mapping = anime.copy()
    anime_mapping = anime_mapping.drop(['episodes','members','rating'],axis=1, inplace=True)
    ## Recomendacion by user Id, how many results and gender
    return anime_mapping
map_it()

In [4]:
## Recomendacion by user Id, how many results and gender
'''
Create a df of the anime matches with the filters selected
'''
def df_recommendation(id,n,gen,typ):
    final_df = reco_by_user(id,n,gen,typ)
    to_return = final_df
    blankIndex=[''] * len(final_df)
    final_df.index=blankIndex
    if final_df.empty:
        sentence = print('WOW!!!! Sorry, there is no matches for the anime and options selected! \n Try again, you might have mroe luck')
        return sentence
    else:
        return to_return

def dict_recommendation(id,n,gen,typ):
    final_df = reco_by_user(id,n,gen,typ)
    to_return = final_df
    blankIndex=[''] * len(final_df)
    final_df.index=blankIndex
    if final_df.empty:
        sentence = print('WOW!!!! Sorry, there is no matches for the anime and options selected! \n Try again, you might have mroe luck')
        return sentence
    else:
        final_dict = final_df.to_dict('records')
        return final_dict

def sort_it(que_user,df,n):
    algo = joblib.load(saved_models_folder + "/" + "SVD_samople_fit.pkl")

    df['Estimate_Score'] = df['anime_id'].apply(lambda x: algo.predict(que_user, x).est)
    df = df.sort_values('Estimate_Score', ascending=False).drop(['anime_id'], axis = 1)
    blankIndex=[''] * len(df)
    df.index=blankIndex 
    return df.head(n)

def reco_by_user(id,n,gen,typ):
    chosen_user = cleaning.final_df()


    if (gen != "All") and (typ != "All"):
        filtered = chosen_user[chosen_user['genre'].str.contains(gen, regex=False, case=False, na=False)]
        filtered = filtered[filtered['type'].str.contains(typ, regex=False, case=False, na=False)]
        return sort_it(id,filtered,n)

    elif  (gen == "All") and (typ != "All"):
        filtered = chosen_user[chosen_user['type'].str.contains(typ, regex=False, case=False, na=False)]
        return sort_it(id,filtered,n)

    elif  (typ == "All") and (gen != "All"):
        filtered = chosen_user[chosen_user['genre'].str.contains(gen, regex=False, case=False, na=False)]
        return sort_it(id,filtered,n)

    elif  (typ == "All") and (gen == "All"):
        return sort_it(id,chosen_user,n)

df_recommendation(675,50,"Supernatural","All")

Unnamed: 0,name,english_title,japanses_title,genre,type,source,duration,episodes,rating,score,rank,members,synopsis,cover,Estimate_Score
,Kimi no Na wa.,Kimi no Na wa.,君の名は。,"Drama, Romance, School, Supernatural",Movie,Original,1 hr 46 min,1,PG-13 - Teens 13 or older,8.85,25.0,200630,"Mitsuha Miyamizu, a high school girl, yearns to live the life of a boy in the bustling city of Tokyo—a dream that stands in stark contrast to her present life in the countryside. Meanwhile in the city, Taki Tachibana lives a busy life as a high school student while juggling his part-time job and hopes for a future in architecture.\n\nOne day, Mitsuha awakens in a room that is not her own and suddenly finds herself living the dream life in Tokyo—but in Taki's body! Elsewhere, Taki finds himself living Mitsuha's life in the humble countryside. In pursuit of an answer to this strange phenomenon, they begin to search for one another.\n\nKimi no Na wa. revolves around Mitsuha and Taki's actions, which begin to have a dramatic impact on each other's lives, weaving them into a fabric held together by fate and circumstance.\n\n[Written by MAL Rewrite]",https://cdn.myanimelist.net/images/anime/5/87048l.jpg,9.30818
,Clannad: After Story,Clannad: After Story,CLANNAD〜AFTER STORY〜 クラナド アフターストーリー,"Drama, Fantasy, Romance, Slice of Life, Supernatural",TV,Visual novel,24 min per ep,24,PG-13 - Teens 13 or older,8.94,16.0,456749,"Clannad: After Story, the sequel to the critically acclaimed slice-of-life series Clannad, begins after Tomoya Okazaki and Nagisa Furukawa graduate from high school. Together, they experience the emotional rollercoaster of growing up. Unable to decide on a course for his future, Tomoya learns the value of a strong work ethic and discovers the strength of Nagisa's support. Through the couple's dedication and unity of purpose, they push forward to confront their personal problems, deepen their old relationships, and create new bonds.\n\nTime also moves on in the Illusionary World. As the plains grow cold with the approach of winter, the Illusionary Girl and the Garbage Doll are presented with a difficult situation that reveals the World's true purpose.\n\nBased on the visual novel by Key and produced by Kyoto Animation, Clannad: After Story is an impactful drama highlighting the importance of family and the struggles of adulthood.\n\n[Written by MAL Rewrite]",https://cdn.myanimelist.net/images/anime/1299/110774l.jpg,9.015971
,Hellsing Ultimate,Hellsing Ultimate,HELLSING OVA,"Action, Horror, Military, Seinen, Supernatural, Vampire",OVA,Manga,49 min per ep,10,R - 17+ (violence & profanity),8.36,199.0,297454,"There exist creatures of darkness and evil that plague the night, devouring any human unfortunate enough to be caught in their grasp. On the other side is Hellsing, an organization dedicated to destroying these supernatural forces that threaten the very existence of humanity. At its head is Integra Fairbrook Wingates Hellsing, who commands a powerful military and spends her life fighting the undead.\n\nIntegra's vast army, however, pales in comparison with her ultimate weapon: the vampire Alucard, who works against his own kind as an exterminator for Hellsing. With his new vampire servant, Seras Victoria, at his side, Alucard must battle not only monsters, but all those who stand to oppose Hellsing, be they in the guise of good or evil.\n\nIn a battle for mankind's survival, Hellsing Ultimate proves that appearances are not all they may seem, and sometimes the greatest weapon can come in the form of one's worst nightmare.\n\n[Written by MAL Rewrite]",https://cdn.myanimelist.net/images/anime/6/7333l.jpg,8.962239
,Monogatari Series: Second Season,Monogatari Series: Second Season,〈物語〉シリーズ セカンドシーズン,"Comedy, Mystery, Romance, Supernatural, Vampire",TV,Light novel,25 min per ep,26,R - 17+ (violence & profanity),8.77,36.0,205959,"Apparitions, oddities, and gods continue to manifest around Koyomi Araragi and his close-knit group of friends: Tsubasa Hanekawa, the group's modest genius; Shinobu Oshino, the resident doughnut-loving vampire; athletic deviant Suruga Kanbaru; bite-happy spirit Mayoi Hachikuji; Koyomi's cutesy stalker Nadeko Sengoku; and Hitagi Senjougahara, Koyomi's aloof classmate.\n\nMonogatari Series: Second Season revolves around these individuals and their struggle to overcome the darkness that is rapidly approaching. A new semester has begun and with graduation looming over Koyomi, he must quickly decide the paths he will walk, as well as the relationships and friends that he'll save. But as strange events begin to unfold, Koyomi is nowhere to be found, and a vicious tiger apparition has appeared in his absence. Hanekawa has become its target, and she must fend for herself—or bow to the creature's perspective on the feebleness of humanity.\n\n[Written by MAL Rewrite]",https://cdn.myanimelist.net/images/anime/1807/121534l.jpg,8.796475
,Natsume Yuujinchou San,Natsume Yuujinchou San,夏目友人帳 参,"Drama, Fantasy, Shoujo, Slice of Life, Supernatural",TV,Manga,24 min per ep,13,PG-13 - Teens 13 or older,8.58,88.0,102322,"Natsume Yuujinchou San follows Takashi Natsume, a boy who is able to see youkai. Natsume and his bodyguard Madara, nicknamed Nyanko-sensei, continue on their quest to release youkai from their contracts in the ""Book of Friends.""\n\nNatsume comes to terms with his ability to see youkai and stops thinking of it as a curse. As he spends more time with his human and youkai friends, he realizes how much he values them both and decides he doesn't have to choose between the spirit and human worlds to be happy.\n\n[Written by MAL Rewrite]",https://cdn.myanimelist.net/images/anime/8/82394l.jpg,8.774697
,Suzumiya Haruhi no Shoushitsu,Suzumiya Haruhi no Shoushitsu,涼宮ハルヒの消失,"Comedy, Mystery, Romance, School, Sci-Fi, Supernatural",Movie,Light novel,2 hr 41 min,1,PG-13 - Teens 13 or older,8.61,83.0,240297,"On a cold December day, Kyon arrives at school prepared for another outing with his fellow SOS Brigade members. However, much to his surprise, he discovers that almost everything has changed completely: Haruhi Suzumiya and Itsuki Koizumi are nowhere to be found; Mikuru Asahina does not recognize him at all; Yuki Nagato is a regular human; and Ryouko Asakura has mysteriously returned. Although he is no stranger to the supernatural, Kyon is disturbed by this odd turn of events and decides to investigate on his own.\n\nFinding himself to be the only person that is aware of the previous reality, Kyon is now faced with a difficult choice: to finally live the normal life he has always wanted, or uncover a way to turn back the hands of time and restore his chaotic yet familiar world.\n\n[Written by MAL Rewrite]",https://cdn.myanimelist.net/images/anime/1248/112352l.jpg,8.757496
,Fate/Zero,Fate/Zero,フェイト/ゼロ,"Action, Fantasy, Supernatural",TV,Light novel,28 min per ep,13,R - 17+ (violence & profanity),8.28,258.0,453630,"With the promise of granting any wish, the omnipotent Holy Grail triggered three wars in the past, each too cruel and fierce to leave a victor. In spite of that, the wealthy Einzbern family is confident that the Fourth Holy Grail War will be different; namely, with a vessel of the Holy Grail now in their grasp. Solely for this reason, the much hated ""Magus Killer"" Kiritsugu Emiya is hired by the Einzberns, with marriage to their only daughter Irisviel as binding contract.\n\nKiritsugu now stands at the center of a cutthroat game of survival, facing off against six other participants, each armed with an ancient familiar, and fueled by unique desires and ideals. Accompanied by his own familiar, Saber, the notorious mercenary soon finds his greatest opponent in Kirei Kotomine, a priest who seeks salvation from the emptiness within himself in pursuit of Kiritsugu.\n\nBased on the light novel written by Gen Urobuchi, Fate/Zero depicts the events of the Fourth Holy Grail War—10 years prior to Fate/stay night. Witness a battle royale in which no one is guaranteed to survive.\n\n[Written by MAL Rewrite]",https://cdn.myanimelist.net/images/anime/1887/117644l.jpg,8.749244
,Shinsekai yori,Shinsekai yori,新世界より,"Drama, Horror, Mystery, Sci-Fi, Supernatural",TV,Novel,22 min per ep,25,R - 17+ (violence & profanity),8.29,245.0,288376,"In the town of Kamisu 66, 12-year-old Saki Watanabe has just awakened to her psychic powers and is relieved to rejoin her friends—the mischievous Satoru Asahina, the shy Mamoru Itou, the cheerful Maria Akizuki, and Shun Aonuma, a mysterious boy whom Saki admires—at Sage Academy, a special school for psychics. However, unease looms as Saki begins to question the fate of those unable to awaken to their powers, and the children begin to get involved with secretive matters such as the rumored Tainted Cats said to abduct children.\n\nShinsekai yori tells the unique coming-of-age story of Saki and her friends as they journey to grow into their roles in the supposed utopia. Accepting these roles, however, might not come easy when faced with the dark and shocking truths of society, and the impending havoc born from the new world.\n\n[Written by MAL Rewrite]",https://cdn.myanimelist.net/images/anime/1367/131974l.jpg,8.744212
,Natsume Yuujinchou Shi,Natsume Yuujinchou Shi,夏目友人帳 肆,"Drama, Fantasy, Shoujo, Slice of Life, Supernatural",TV,Manga,24 min per ep,13,PG-13 - Teens 13 or older,8.65,67.0,98431,"Takashi Natsume, the timid youkai expert and master of the Book of Friends, continues his journey towards self-understanding and acceptance with the help of friends both new and old. His most important ally is still his gluttonous and sake-loving bodyguard, the arrogant but fiercely protective wolf spirit Madara—or Nyanko-sensei, as Madara is called when in his usual disguise of an unassuming, pudgy cat.\n\nNatsume, while briefly separated from Nyanko-sensei, is ambushed and kidnapped by a strange group of masked, monkey-like youkai, who have spirited him away to their forest as they desperately search for the Book of Friends. Realizing that his ""servant"" has been taken out from right under his nose, Nyanko-sensei enlists the help of Natsume's youkai friends and mounts a rescue operation. However, the forest of the monkey spirits holds many dangerous enemies, including the Matoba Clan, Natsume's old nemesis.\n\nStretching from the formidable hideout of the Matoba to Natsume's own childhood home, Natsume Yuujinchou Shi is a sweeping but familiar return to a world of danger and friendship, where Natsume will finally confront the demons of his own past.\n\n[Written by MAL Rewrite]",https://cdn.myanimelist.net/images/anime/3/37449l.jpg,8.706689
,Mushishi,Mushishi,蟲師,"Adventure, Fantasy, Historical, Mystery, Seinen, Slice of Life, Supernatural",TV,Manga,23 min per ep,26,PG-13 - Teens 13 or older,8.66,62.0,300030,"""Mushi"": the most basic forms of life in the world. They exist without any goals or purposes aside from simply ""being."" They are beyond the shackles of the words ""good"" and ""evil."" Mushi can exist in countless forms and are capable of mimicking things from the natural world such as plants, diseases, and even phenomena like rainbows.\n\nThis is, however, just a vague definition of these entities that inhabit the vibrant world of Mushishi, as to even call them a form of life would be an oversimplification. Detailed information on Mushi is scarce because the majority of humans are unaware of their existence.\n\nSo what are Mushi and why do they exist? This is the question that a ""Mushishi,"" Ginko, ponders constantly. Mushishi are those who research Mushi in hopes of understanding their place in the world's hierarchy of life.\n\nGinko chases rumors of occurrences that could be tied to Mushi, all for the sake of finding an answer.\n\nIt could, after all, lead to the meaning of life itself.\n\n[Written by MAL Rewrite]",https://cdn.myanimelist.net/images/anime/2/73862l.jpg,8.705345


## Evaluation selected SVD model (all dataset)

In [18]:
anime = pd.read_csv(data_folder + "/" + "anime.csv")
rating = pd.read_csv(data_folder + "/" + "rating.csv.zip")

anime_mapping = anime.copy()
anime_mapping.drop(['episodes','members','rating'],axis=1, inplace=True)

ratingdf = rating[rating.rating>0]
ratingdf = ratingdf.reset_index()
ratingdf.drop('index', axis=1,inplace=True)
ratingdf.shape

#print(rating_sample["rating"].value_counts(normalize=True),f'\n\nlength of data {rating_sample.shape}')

reader = Reader(rating_scale=(1,10))
data = Dataset.load_from_df(ratingdf[['user_id', 'anime_id', 'rating']], reader)


In [19]:
from fastparquet import write 
from surprise import SVD,NormalPredictor
from surprise.model_selection import GridSearchCV

param_grid = {'n_factors':[50,100,150],'n_epochs':[20,30],  'lr_all':[0.005,0.01],'reg_all':[0.02,0.1]}
gs = GridSearchCV(SVD, param_grid, measures=["rmse", "mae"], cv=3)


gs.fit(data)
params = gs.best_params['rmse']

# best RMSE score
print(gs.best_score["rmse"])

# combination of parameters that gave the best RMSE score
print(gs.best_params["rmse"])

# We can now use the algorithm that yields the best rmse:
algo = gs.best_estimator["rmse"]
algo.fit(data.build_full_trainset())

import pandas as pd  # noqa

results_df = pd.DataFrame.from_dict(gs.cv_results)

write(saved_models_folder + "/" + "SVD_full_results.parq", results_df)

#1.1341632727982356
#{'n_factors': 150, 'n_epochs': 30, 'lr_all': 0.01, 'reg_all': 0.1}

# # Serialización del modelo
import pickle
joblib.dump(algo,saved_models_folder + "/" + "SVD_full_fit.pkl")



## Recommendation building phase SVD model using best_params

In [None]:
algo = joblib.load(saved_models_folder + "/" + "SVD_full_fit.pkl")

In [None]:
import joblib
anime_mapping = anime.copy()
anime_mapping.drop(['episodes','members','rating'],axis=1, inplace=True)
#anime_mapping.set_index('anime_id',inplace=True) 


from surprise import SVD
algo = 
trainset = data.build_full_trainset()
algo.fit(trainset)

NameError: name 'algo' is not defined

## Getting recommendations

In [None]:
que_user = 208
chosen_user = anime_mapping.copy()
chosen_user['Estimate_Score'] = chosen_user['anime_id'].apply(lambda x: algo.predict(que_user, x).est)

chosen_user = chosen_user.drop('anime_id', axis = 1)

chosen_user = chosen_user.sort_values('Estimate_Score', ascending=False)


chosen_user.head(10)

Unnamed: 0,name,genre,type,Estimate_Score
11,Koe no Katachi,"Drama, School, Shounen",Movie,9.364396
480,Mahou Shoujo Lyrical Nanoha: The Movie 1st,"Action, Comedy, Drama, Magic, Super Power",Movie,9.350239
10,Clannad: After Story,"Drama, Fantasy, Romance, Slice of Life, Supernatural",TV,9.344733
42,Ansatsu Kyoushitsu (TV) 2nd Season,"Action, Comedy, School, Shounen",TV,9.246364
0,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,9.150686
373,K-On!!,"Comedy, Music, School, Slice of Life",TV,9.150479
16,Shigatsu wa Kimi no Uso,"Drama, Music, Romance, School, Shounen",TV,9.121163
2,Gintama°,"Action, Comedy, Historical, Parody, Samurai, Sci-Fi, Shounen",TV,9.118466
5,Haikyuu!!: Karasuno Koukou VS Shiratorizawa Gakuen Koukou,"Comedy, Drama, School, Shounen, Sports",TV,9.112615
194,Mahou Shoujo Lyrical Nanoha: The Movie 2nd A&#039;s,"Action, Comedy, Drama, Magic, Super Power",Movie,9.089225


In [3]:
que_user = 58145
chosen_user = anime_mapping.copy()
chosen_user['Estimate_Score'] = chosen_user['anime_id'].apply(lambda x: algo.predict(que_user, x).est)

chosen_user = chosen_user.drop('anime_id', axis = 1)

chosen_user = chosen_user.sort_values('Estimate_Score', ascending=False)


chosen_user.head(10)

NameError: name 'anime_mapping' is not defined