# Unsupervised content based recommendation system

## Import Libraries

In [1]:
# Standard library imports
import os # allows access to OS-dependent functionalities
import re #  regular expression matching operations similar to those found in Perl
import sys # to manipulate different parts of the Python runtime environment
import warnings # is used to display the message Warning
import pickle # serializing and deserializing a Python object structure.

# Third party libraries
from fastparquet import write # parquet format, aiming integrate into python-based big data work-flows
from fuzzywuzzy import fuzz # used for string matching

import numpy as np # functions for working in domain of linear algebra, fourier transform, matrices and arrays
import pandas as pd # data analysis and manipulation tool
import joblib # set of tools to provide lightweight pipelining in Python

# deal with sparse data libraries
from scipy.sparse import csr_matrix # Returns a copy of column i of the matrix, as a (m x 1) CSR matrix (column vector).

# visualization
#import seaborn as sns # data visualization library based on matplotlib.
import matplotlib.pyplot as plt # collection of functions that make matplotlib work like MATLAB.

## scikit Preprocessing data libraries
from sklearn.preprocessing import MinMaxScaler # Transform features by scaling each feature to a given range.

## scikit Feature Extraction libraries
from sklearn.feature_extraction.text import TfidfVectorizer # Convert a collection of raw documents to a matrix of TF-IDF features
from sklearn.feature_extraction.text import CountVectorizer # Convert a collection of text documents to a matrix of token counts.

## scikit Pairwise metrics libraries
#implements utilities to evaluate pairwise distances or affinity of sets of samples.
from sklearn.metrics.pairwise import sigmoid_kernel
from sklearn.metrics.pairwise import cosine_similarity 
from sklearn.metrics.pairwise import linear_kernel 

## scikit Cross validation iterators libraries
from sklearn.model_selection import GridSearchCV

# Unsupervised learner for implementing neighbor searches.
from sklearn.neighbors import NearestNeighbors

# setting display options
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', None)

# Utils libraries
from utils import cleaning
from utils import recommend
from utils import testing
from utils import training

#Preparing folder variables
os.chdir(os.path.dirname(sys.path[0])) # This command makes the notebook the main path and can work in cascade.
main_folder = sys.path[0]
data_folder = (main_folder + "\data")
saved_models_folder = (data_folder + "\saved_models")
raw_data = (data_folder + "\_raw")
processed_data = (data_folder + "\processed")
content_based_supervised_data = (data_folder + "\processed\content_based_supervised")



## Cleaning and preparing the data

In [None]:
anime = pd.read_csv(raw_data + "/" + "anime.csv")# load anime df

In [None]:
anime['genre'] = anime['genre'].str.split(', ')
anime = anime.explode('genre')
anime

In [None]:
all_genres = anime['genre'].unique().tolist()
all_genres

['Drama',
 'Romance',
 'School',
 'Supernatural',
 'Action',
 'Adventure',
 'Fantasy',
 'Magic',
 'Military',
 'Shounen',
 'Comedy',
 'Historical',
 'Parody',
 'Samurai',
 'Sci-Fi',
 'Thriller',
 'Sports',
 'Super Power',
 'Space',
 'Slice of Life',
 'Mecha',
 'Music',
 'Mystery',
 'Seinen',
 'Martial Arts',
 'Vampire',
 'Shoujo',
 'Horror',
 'Police',
 'Psychological',
 'Demons',
 'Ecchi',
 'Josei',
 'Shounen Ai',
 'Game',
 'Dementia',
 'Harem',
 'Cars',
 'Kids',
 'Shoujo Ai',
 nan,
 'Hentai',
 'Yaoi',
 'Yuri']

In [None]:
anime.head(5)

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Military, Shounen",TV,64,9.26,793665
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, Sci-Fi, Shounen",TV,51,9.25,114262
3,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, Sci-Fi, Shounen",TV,51,9.16,151266


In [None]:
anime.shape

(12294, 7)

In [None]:
genres_string = anime.genre.str.split(', ',expand=True).stack().unique()
genres_string

array(['Drama', 'Romance', 'School', 'Supernatural', 'Action',
       'Adventure', 'Fantasy', 'Magic', 'Military', 'Shounen', 'Comedy',
       'Historical', 'Parody', 'Samurai', 'Sci-Fi', 'Thriller', 'Sports',
       'Super Power', 'Space', 'Slice of Life', 'Mecha', 'Music',
       'Mystery', 'Seinen', 'Martial Arts', 'Vampire', 'Shoujo', 'Horror',
       'Police', 'Psychological', 'Demons', 'Ecchi', 'Josei',
       'Shounen Ai', 'Game', 'Dementia', 'Harem', 'Cars', 'Kids',
       'Shoujo Ai', 'Hentai', 'Yaoi', 'Yuri'], dtype=object)

In [None]:
genres_list = genres_string.tolist()
genres_list

['Drama',
 'Romance',
 'School',
 'Supernatural',
 'Action',
 'Adventure',
 'Fantasy',
 'Magic',
 'Military',
 'Shounen',
 'Comedy',
 'Historical',
 'Parody',
 'Samurai',
 'Sci-Fi',
 'Thriller',
 'Sports',
 'Super Power',
 'Space',
 'Slice of Life',
 'Mecha',
 'Music',
 'Mystery',
 'Seinen',
 'Martial Arts',
 'Vampire',
 'Shoujo',
 'Horror',
 'Police',
 'Psychological',
 'Demons',
 'Ecchi',
 'Josei',
 'Shounen Ai',
 'Game',
 'Dementia',
 'Harem',
 'Cars',
 'Kids',
 'Shoujo Ai',
 'Hentai',
 'Yaoi',
 'Yuri']

In [None]:
# Check unique values in genre
anime.genre.str.split(', ',expand=True).stack().unique()

array(['Drama', 'Romance', 'School', 'Supernatural', 'Action',
       'Adventure', 'Fantasy', 'Magic', 'Military', 'Shounen', 'Comedy',
       'Historical', 'Parody', 'Samurai', 'Sci-Fi', 'Thriller', 'Sports',
       'Super Power', 'Space', 'Slice of Life', 'Mecha', 'Music',
       'Mystery', 'Seinen', 'Martial Arts', 'Vampire', 'Shoujo', 'Horror',
       'Police', 'Psychological', 'Demons', 'Ecchi', 'Josei',
       'Shounen Ai', 'Game', 'Dementia', 'Harem', 'Cars', 'Kids',
       'Shoujo Ai', 'Hentai', 'Yaoi', 'Yuri'], dtype=object)

In [None]:
anime['type'].unique()

array(['Movie', 'TV', 'OVA', 'Special', 'Music', 'ONA', nan], dtype=object)

In [2]:
anime = pd.read_csv(processed_data + "/" + "anime_final.csv",sep=",")

In [3]:
anime_cleaned = cleaning.clean_anime_df(anime)
anime_cleaned.head(1)



Unnamed: 0,anime_id,name,english_title,japanses_title,genre,type,source,duration,episodes,rating,score,rank,members,synopsis,cover
0,1,cowboy bebop,Cowboy Bebop,カウボーイビバップ,"Action, Adventure, Comedy, Drama, Sci-Fi, Space",TV,Original,24 min per ep,26,R - 17+ (violence & profanity),8.75,40.0,486824.0,"Crime is timeless. By the year 2071, humanity has expanded across the galaxy, filling the surface of other planets with settlements like those on Earth. These new societies are plagued by murder, drug use, and theft, and intergalactic outlaws are hunted by a growing number of tough bounty hunters.\n\nSpike Spiegel and Jet Black pursue criminals throughout space to make a humble living. Beneath his goofy and aloof demeanor, Spike is haunted by the weight of his violent past. Meanwhile, Jet manages his own troubled memories while taking care of Spike and the Bebop, their ship. The duo is joined by the beautiful con artist Faye Valentine, odd child Edward Wong Hau Pepelu Tivrusky IV, and Ein, a bioengineered Welsh Corgi.\n\nWhile developing bonds and working to catch a colorful cast of criminals, the Bebop crew's lives are disrupted by a menace from Spike's past. As a rival's maniacal plot continues to unravel, Spike must choose between life with his newfound family or revenge for his old wounds.\n\n[Written by MAL Rewrite]",https://cdn.myanimelist.net/images/anime/4/19644l.jpg


In [4]:
anime_cleaned.shape

(12121, 15)

In [5]:
anime_cleaned.head()

Unnamed: 0,anime_id,name,english_title,japanses_title,genre,type,source,duration,episodes,rating,score,rank,members,synopsis,cover
0,1,cowboy bebop,Cowboy Bebop,カウボーイビバップ,"Action, Adventure, Comedy, Drama, Sci-Fi, Space",TV,Original,24 min per ep,26,R - 17+ (violence & profanity),8.75,40.0,486824.0,"Crime is timeless. By the year 2071, humanity has expanded across the galaxy, filling the surface of other planets with settlements like those on Earth. These new societies are plagued by murder, drug use, and theft, and intergalactic outlaws are hunted by a growing number of tough bounty hunters.\n\nSpike Spiegel and Jet Black pursue criminals throughout space to make a humble living. Beneath his goofy and aloof demeanor, Spike is haunted by the weight of his violent past. Meanwhile, Jet manages his own troubled memories while taking care of Spike and the Bebop, their ship. The duo is joined by the beautiful con artist Faye Valentine, odd child Edward Wong Hau Pepelu Tivrusky IV, and Ein, a bioengineered Welsh Corgi.\n\nWhile developing bonds and working to catch a colorful cast of criminals, the Bebop crew's lives are disrupted by a menace from Spike's past. As a rival's maniacal plot continues to unravel, Spike must choose between life with his newfound family or revenge for his old wounds.\n\n[Written by MAL Rewrite]",https://cdn.myanimelist.net/images/anime/4/19644l.jpg
1,5,cowboy bebop tengoku no tobira,Cowboy Bebop: Tengoku no Tobira,カウボーイビバップ 天国の扉,"Action, Drama, Mystery, Sci-Fi, Space",Movie,Original,1 hr 55 min,1,R - 17+ (violence & profanity),8.38,185.0,137636.0,"Another day, another bounty—such is the life of the often unlucky crew of the Bebop. However, this routine is interrupted when Faye, who is chasing a fairly worthless target on Mars, witnesses an oil tanker suddenly explode, causing mass hysteria. As casualties mount due to a strange disease spreading through the smoke from the blast, a whopping three hundred million woolong price is placed on the head of the supposed perpetrator.\n\nWith lives at stake and a solution to their money problems in sight, the Bebop crew springs into action. Spike, Jet, Faye, and Edward, followed closely by Ein, split up to pursue different leads across Alba City. Through their individual investigations, they discover a cover-up scheme involving a pharmaceutical company, revealing a plot that reaches much further than the ragtag team of bounty hunters could have realized.\n\n[Written by MAL Rewrite]",https://cdn.myanimelist.net/images/anime/1439/93480l.jpg
2,6,trigun,Trigun,トライガン,"Action, Comedy, Sci-Fi",TV,Manga,24 min per ep,26,PG-13 - Teens 13 or older,8.22,315.0,283069.0,"Vash the Stampede is the man with a $$60,000,000,000 bounty on his head. The reason: he's a merciless villain who lays waste to all those that oppose him and flattens entire cities for fun, garnering him the title ""The Humanoid Typhoon."" He leaves a trail of death and destruction wherever he goes, and anyone can count themselves dead if they so much as make eye contact—or so the rumors say. In actuality, Vash is a huge softie who claims to have never taken a life and avoids violence at all costs.\n\nWith his crazy doughnut obsession and buffoonish attitude in tow, Vash traverses the wasteland of the planet Gunsmoke, all the while followed by two insurance agents, Meryl Stryfe and Milly Thompson, who attempt to minimize his impact on the public. But soon, their misadventures evolve into life-or-death situations as a group of legendary assassins are summoned to bring about suffering to the trio. Vash's agonizing past will be unraveled and his morality and principles pushed to the breaking point.\n\n[Written by MAL Rewrite]",https://cdn.myanimelist.net/images/anime/7/20310l.jpg
3,7,witch hunter robin,Witch Hunter Robin,Witch Hunter ROBIN (ウイッチハンターロビン),"Action, Drama, Magic, Mystery, Police, Supernatural",TV,Original,25 min per ep,26,PG-13 - Teens 13 or older,7.25,2791.0,64905.0,"Robin Sena is a powerful craft user drafted into the STNJ—a group of specialized hunters that fight deadly beings known as Witches. Though her fire power is great, she's got a lot to learn about her powers and working with her cool and aloof partner, Amon. But the truth about the Witches and herself will leave Robin on an entirely new path that she never expected!\n\n(Source: Funimation)",https://cdn.myanimelist.net/images/anime/10/19969l.jpg
4,8,beet the vandel buster,Bouken Ou Beet,冒険王ビィト,"Adventure, Fantasy, Shounen, Supernatural",TV,Manga,23 min per ep,52,PG - Children,6.94,4310.0,9848.0,"It is the dark century and the people are suffering under the rule of the devil, Vandel, who is able to manipulate monsters. The Vandel Busters are a group of people who hunt these devils, and among them, the Zenon Squad is known to be the strongest busters on the continent. A young boy, Beet, dreams of joining the Zenon Squad. However, one day, as a result of Beet's fault, the Zenon squad was defeated by the devil, Beltose. The five dying busters sacrificed their life power into their five weapons, Saiga. After giving their weapons to Beet, they passed away. Years have passed since then and the young Vandel Buster, Beet, begins his adventure to carry out the Zenon Squad's will to put an end to the dark century.",https://cdn.myanimelist.net/images/anime/7/21569l.jpg


In [6]:
anime_features = cleaning.prepare_supervised_content_based(anime_cleaned)

In [7]:
anime_features.head(1)

Unnamed: 0,Action,Adventure,Comedy,Drama,Dementia,Mecha,Historical,School,Hentai,Horror,Demons,Ecchi,Fantasy,Shounen,Game,Mystery,Cars,Magic,Romance,Sci-Fi,Harem,Kids,Shoujo,Military,Super Power,Martial Arts,Music,Slice of Life,Sports,Supernatural,Parody,Vampire,Psychological,Samurai,Yaoi,Seinen,Josei,Thriller,Space,Shounen Ai,Police,Yuri,Shoujo Ai,Movie,Music.1,ONA,OVA,Special,TV
0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1


In [8]:
anime_features.shape

(12121, 49)

In [9]:
min_max = MinMaxScaler()
min_max_features = min_max.fit_transform(anime_features)

In [10]:
min_max_features.shape

(12121, 49)

In [11]:
np.round(min_max_features,2)

array([[1., 1., 1., ..., 0., 0., 1.],
       [1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 1., ..., 0., 0., 1.],
       ...,
       [0., 0., 0., ..., 0., 0., 1.],
       [0., 0., 1., ..., 0., 0., 1.],
       [0., 1., 0., ..., 0., 0., 0.]])

## Finding the best parameters for NearestNeighbors model

In [12]:
testing.param_NearestNeighbors(min_max_features)



{'algorithm': 'auto',
 'leaf_size': 30,
 'metric': 'minkowski',
 'n_neighbors': 1,
 'p': 1,
 'radius': 0.0}

## Building model with the best parameters

In [13]:
training.model_NearestNeighbors(min_max_features)

array([[    0,  1118,   376, ...,  1029,   955,  1034],
       [    1,  3154,  7607, ...,  3268,  1405,  1381],
       [ 3409, 12113,  3940, ...,  3231,   376,  3353],
       ...,
       [ 7973,  3292,   626, ...,  8364,  8387,  9359],
       [12119,  7536, 11332, ...,  1083,  7440,  2463],
       [ 7527, 12120,  9683, ...,  8052,  7518,  2190]], dtype=int64)

## Get recommendations

In [15]:
# We can get the recommendation as a dataframe
# We selec the name of the anime we want to find similitudes
# Then the genre we want (or write "All")
# Then the type we want (or write "All")
# Then the number of suggestions we have(we might get less if there not so many o none if there is no matches)

recommend.create_df(recommend.print_similar_animes("naruto"),"All","All",1)

These are the recommendations for similar animes to [1mnaruto[0m 



Unnamed: 0,name,english_title,japanses_title,genre,type,source,duration,episodes,rating,score,rank,synopsis,cover
,yakitate japan,Yakitate!! Japan,焼きたて!! ジャぱん,"Comedy, Shounen",TV,Manga,24 min per ep,69.0,PG-13 - Teens 13 or older,7.92,687.0,"While countries such as France, England, and Germany all have their own internationally celebrated bread, Japan simply does not have one that can match in reputation.\n\nThus after discovering the wonders of breadmaking at a young age, Kazuma Azuma embarks on a quest to create Japan's own unique national bread. And being blessed with unusually warm hands that allow dough to ferment faster, Azuma is able to bring his baking innovations to another level.\n\nAs he begins working at the prestigious Japanese bakery chain, Pantasia, Azuma encounters other talented bakers and experiences firsthand the competitive world of baking. Along with his newfound friends and rivals, Azuma strives to create new and unparalleled bread that will start a baking revolution. \n\n[Written by MAL Rewrite]",https://cdn.myanimelist.net/images/anime/3/76432l.jpg


In [16]:
# We can get the recommendation as a dictionary
# We selec the name of the anime we want to find similitudes
# Then the genre we want (or write "All")
# Then the type we want (or write "All")
# Then the number of suggestions we have(we might get less if there not so many o none if there is no matches)

recommend.create_dict(recommend.print_similar_animes("Monster"),"Supernatural","All",10)

I guess you misspelled the name
 Are you looking similitudes for the anime named [1mmonster[0m? 
Here are the recommendations:


[{'name': 'elfen lied',
  'english_title': 'Elfen Lied',
  'japanses_title': 'エルフェンリート',
  'genre': 'Action, Drama, Horror, Psychological, Romance, Seinen, Supernatural',
  'type': 'TV',
  'source': 'Manga',
  'duration': '25 min per ep',
  'episodes': 13.0,
  'rating': 'R+ - Mild Nudity',
  'score': 7.49,
  'rank': 1770.0,
  'synopsis': 'Lucy is a special breed of human referred to as "Diclonius," born with a short pair of horns and invisible telekinetic hands that lands her as a victim of inhumane scientific experimentation by the government. However, once circumstances present her an opportunity to escape, Lucy, corrupted by the confinement and torture, unleashes a torrent of bloodshed as she escapes her captors.\n\nDuring her breakout, she receives a crippling head injury that leaves her with a split personality: someone with the mentality of a harmless child possessing limited speech capacity. In this state of instability, she stumbles upon two college students, Kouta and his cous