# Unsupervised content based recommendation system

## Import Libraries

In [1]:
# Standard library imports
import os # allows access to OS-dependent functionalities
import re #  regular expression matching operations similar to those found in Perl
import sys # to manipulate different parts of the Python runtime environment
import warnings # is used to display the message Warning
import pickle # serializing and deserializing a Python object structure.

# Third party libraries
from fastparquet import write # parquet format, aiming integrate into python-based big data work-flows
from fuzzywuzzy import fuzz # used for string matching

import numpy as np # functions for working in domain of linear algebra, fourier transform, matrices and arrays
import pandas as pd # data analysis and manipulation tool
import joblib # set of tools to provide lightweight pipelining in Python

# deal with sparse data libraries
from scipy.sparse import csr_matrix # Returns a copy of column i of the matrix, as a (m x 1) CSR matrix (column vector).

# visualization
#import seaborn as sns # data visualization library based on matplotlib.
import matplotlib.pyplot as plt # collection of functions that make matplotlib work like MATLAB.

## scikit Preprocessing data libraries
from sklearn.preprocessing import MinMaxScaler # Transform features by scaling each feature to a given range.

## scikit Feature Extraction libraries
from sklearn.feature_extraction.text import TfidfVectorizer # Convert a collection of raw documents to a matrix of TF-IDF features
from sklearn.feature_extraction.text import CountVectorizer # Convert a collection of text documents to a matrix of token counts.

## scikit Pairwise metrics libraries
#implements utilities to evaluate pairwise distances or affinity of sets of samples.
from sklearn.metrics.pairwise import sigmoid_kernel
from sklearn.metrics.pairwise import cosine_similarity 
from sklearn.metrics.pairwise import linear_kernel 

## scikit Cross validation iterators libraries
from sklearn.model_selection import GridSearchCV

# Unsupervised learner for implementing neighbor searches.
from sklearn.neighbors import NearestNeighbors

# setting display options
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', None)

# Utils libraries
from utils import cleaning
from utils import recommend
from utils import testing
from utils import training

#Preparing folder variables
os.chdir(os.path.dirname(sys.path[0])) # This command makes the notebook the main path and can work in cascade.
main_folder = sys.path[0]
data_folder = (main_folder + "\data")
saved_models_folder = (data_folder + "\saved_models")
raw_data = (data_folder + "\_raw")
processed_data = (data_folder + "\processed")
content_based_supervised_data = (data_folder + "\processed\content_based_supervised")



## Cleaning and preparing the data

In [17]:
fichero = open(raw_data + "/" +"anime.pkl","rb")
anime_covers_cleaned = pickle.load(fichero)
fichero.close()
anime_covers_cleaned.head(5)

Unnamed: 0,English_Title,Type,Source,N_Episodes,Duration,Rating,Score,Scored_by,Rank,Genre,Theme,Released,Studios,Producers
0,Cowboy Bebop,TV,Original,26,24,R - 17+ (violence & profanity),8.75,873436,39,"Action,Sci-Fi","Adult Cast,Space",1998,Sunrise,Bandai Visual
1,Cowboy Bebop: Tengoku no Tobira,Movie,Original,1,115,R - 17+ (violence & profanity),8.38,199329,185,"Action,Sci-Fi","Adult Cast,Space",2001,Bones,"Sunrise,Bandai Visual"
2,Trigun,TV,Manga,26,24,PG-13 - Teens 13 or older,8.22,339491,323,"Action,Adventure,Comedy,Drama,Sci-Fi",Adult Cast,1998,Madhouse,Victor Entertainment
3,Witch Hunter Robin,TV,Original,26,25,PG-13 - Teens 13 or older,7.25,42135,2766,"Action,Drama,Mystery,Supernatural",Detective,2002,Sunrise,"TV Tokyo,Bandai Visual,Dentsu,Victor Entertainment"
4,Bouken Ou Beet,TV,Manga,52,23,PG - Children,6.95,6307,4188,"Adventure,Fantasy,Supernatural",Unknown,2004,Toei Animation,"TV Tokyo,Dentsu"


In [19]:
anime = pd.read_csv(raw_data + "/" + "anime.csv")# load anime df

In [20]:
anime.head(5)

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Military, Shounen",TV,64,9.26,793665
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, Sci-Fi, Shounen",TV,51,9.25,114262
3,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, Sci-Fi, Shounen",TV,51,9.16,151266


In [3]:
anime.shape

(12294, 7)

In [4]:
anime_cleaned = cleaning.clean_anime_df(anime)
anime_cleaned.head(10)



Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
0,32281,kimi no na wa,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630.0
1,5114,fullmetal alchemist brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Military, Shounen",TV,64,9.26,793665.0
2,28977,gintama,"Action, Comedy, Historical, Parody, Samurai, Sci-Fi, Shounen",TV,51,9.25,114262.0
3,9253,steins gate,"Sci-Fi, Thriller",TV,24,9.17,673572.0
4,9969,gintama 039,"Action, Comedy, Historical, Parody, Samurai, Sci-Fi, Shounen",TV,51,9.16,151266.0
5,32935,haikyuu karasuno koukou vs shiratorizawa gakuen koukou,"Comedy, Drama, School, Shounen, Sports",TV,10,9.15,93351.0
6,11061,hunter x hunter 2011,"Action, Adventure, Shounen, Super Power",TV,148,9.13,425855.0
7,820,ginga eiyuu densetsu,"Drama, Military, Sci-Fi, Space",OVA,110,9.11,80679.0
8,15335,gintama movie kanketsu hen yorozuya yo eien nare,"Action, Comedy, Historical, Parody, Samurai, Sci-Fi, Shounen",Movie,1,9.1,72534.0
9,15417,gintama 039 enchousen,"Action, Comedy, Historical, Parody, Samurai, Sci-Fi, Shounen",TV,13,9.11,81109.0


In [5]:
anime_cleaned.shape

(12210, 7)

In [6]:
anime_features = cleaning.prepare_supervised_content_based(anime_cleaned)

In [7]:
anime_features.shape

(12210, 52)

In [8]:
min_max = MinMaxScaler()
min_max_features = min_max.fit_transform(anime_features)

In [9]:
min_max_features.shape

(12210, 52)

In [10]:
np.round(min_max_features,2)

array([[0.  , 0.92, 0.2 , ..., 0.  , 0.  , 0.  ],
       [0.03, 0.91, 0.78, ..., 0.  , 0.  , 1.  ],
       [0.03, 0.91, 0.11, ..., 0.  , 0.  , 1.  ],
       ...,
       [0.  , 0.39, 0.  , ..., 1.  , 0.  , 0.  ],
       [0.  , 0.4 , 0.  , ..., 1.  , 0.  , 0.  ],
       [0.  , 0.45, 0.  , ..., 0.  , 0.  , 0.  ]])

## Finding the best parameters for NearestNeighbors model

In [11]:
testing.param_NearestNeighbors(min_max_features)



{'algorithm': 'auto',
 'leaf_size': 30,
 'metric': 'minkowski',
 'n_neighbors': 1,
 'p': 1,
 'radius': 0.0}

## Building model with the best parameters

In [12]:
training.model_NearestNeighbors(min_max_features)

array([[    0,   208,  1111, ...,  6272,  6460,  8395],
       [    1,   200,   268, ...,  4443,  8275,  4964],
       [    2,     4,     9, ...,  4905, 10874,  4910],
       ...,
       [12207, 12156, 12155, ..., 12035, 12037, 12039],
       [12208, 12149, 12150, ..., 12034, 12030, 12032],
       [12209, 11723, 11656, ...,  7291,  7313, 12018]], dtype=int64)

## Get recommendations

In [13]:
# We can get the recommendation as a dataframe
# We selec the name of the anime we want to find similitudes
# Then the genre we want (or write "All")
# Then the type we want (or write "All")
# Then the number of suggestions we have(we might get less if there not so many o none if there is no matches)

recommend.create_df(recommend.print_similar_animes("naruto"),"All","All",10)

These are the recommendations for similar animes to [1mnaruto[0m 



Unnamed: 0,name,genre,type,episodes,rating
,hunter x hunter 2011,"Action, Adventure, Shounen, Super Power",TV,148.0,9.13
,ansatsu kyoushitsu tv 2nd season,"Action, Comedy, School, Shounen",TV,25.0,8.68
,hunter x hunter,"Action, Adventure, Shounen, Super Power",TV,62.0,8.48
,katekyo hitman reborn,"Action, Comedy, Shounen, Super Power",TV,203.0,8.37
,boku no hero academia,"Action, Comedy, School, Shounen, Super Power",TV,13.0,8.36
,dragon ball z,"Action, Adventure, Comedy, Fantasy, Martial Arts, Shounen, Super Power",TV,291.0,8.32
,shijou saikyou no deshi kenichi,"Action, Comedy, Martial Arts, School, Shounen",TV,50.0,8.25
,kill la kill,"Action, Comedy, School, Super Power",TV,24.0,8.23
,ansatsu kyoushitsu tv,"Action, Comedy, School, Shounen",TV,22.0,8.2
,d gray man,"Action, Adventure, Comedy, Shounen",TV,103.0,8.2


In [14]:
# We can get the recommendation as a dictionary
# We selec the name of the anime we want to find similitudes
# Then the genre we want (or write "All")
# Then the type we want (or write "All")
# Then the number of suggestions we have(we might get less if there not so many o none if there is no matches)

recommend.create_dict(recommend.print_similar_animes("Monster"),"Supernatural","All",10)

I guess you misspelled the name
 Are you looking similitudes for the anime named [1mmonster[0m? 
Here are the recommendations:


[{'name': 'death note',
  'genre': 'Mystery, Police, Psychological, Supernatural, Thriller',
  'type': 'TV',
  'episodes': 37.0,
  'rating': 8.71},
 {'name': 'boku dake ga inai machi',
  'genre': 'Mystery, Psychological, Seinen, Supernatural',
  'type': 'TV',
  'episodes': 12.0,
  'rating': 8.65},
 {'name': 'shinsekai yori',
  'genre': 'Drama, Horror, Mystery, Sci-Fi, Supernatural',
  'type': 'TV',
  'episodes': 25.0,
  'rating': 8.53},
 {'name': 'higurashi no naku koro ni kai',
  'genre': 'Mystery, Psychological, Supernatural, Thriller',
  'type': 'TV',
  'episodes': 24.0,
  'rating': 8.41},
 {'name': 'bungou stray dogs 2nd season',
  'genre': 'Mystery, Seinen, Supernatural',
  'type': 'TV',
  'episodes': 12.0,
  'rating': 8.39},
 {'name': 'xxxholic kei',
  'genre': 'Comedy, Drama, Mystery, Psychological, Supernatural',
  'type': 'TV',
  'episodes': 13.0,
  'rating': 8.34},
 {'name': 'gankutsuou',
  'genre': 'Drama, Mystery, Sci-Fi, Supernatural, Thriller',
  'type': 'TV',
  'episodes