# Unsupervised content based recommendation system

## Import Libraries

In [1]:
# Standard library imports
import os # allows access to OS-dependent functionalities
import re #  regular expression matching operations similar to those found in Perl
import sys # to manipulate different parts of the Python runtime environment
import warnings # is used to display the message Warning
import pickle # serializing and deserializing a Python object structure.

# Third party libraries
from fastparquet import write # parquet format, aiming integrate into python-based big data work-flows
from fuzzywuzzy import fuzz # used for string matching

import numpy as np # functions for working in domain of linear algebra, fourier transform, matrices and arrays
import pandas as pd # data analysis and manipulation tool
import joblib # set of tools to provide lightweight pipelining in Python

# deal with sparse data libraries
from scipy.sparse import csr_matrix # Returns a copy of column i of the matrix, as a (m x 1) CSR matrix (column vector).

# visualization
#import seaborn as sns # data visualization library based on matplotlib.
import matplotlib.pyplot as plt # collection of functions that make matplotlib work like MATLAB.

## scikit Preprocessing data libraries
from sklearn.preprocessing import MinMaxScaler # Transform features by scaling each feature to a given range.

## scikit Feature Extraction libraries
from sklearn.feature_extraction.text import TfidfVectorizer # Convert a collection of raw documents to a matrix of TF-IDF features
from sklearn.feature_extraction.text import CountVectorizer # Convert a collection of text documents to a matrix of token counts.

## scikit Pairwise metrics libraries
#implements utilities to evaluate pairwise distances or affinity of sets of samples.
from sklearn.metrics.pairwise import sigmoid_kernel
from sklearn.metrics.pairwise import cosine_similarity 
from sklearn.metrics.pairwise import linear_kernel 

## scikit Cross validation iterators libraries
from sklearn.model_selection import GridSearchCV

# Unsupervised learner for implementing neighbor searches.
from sklearn.neighbors import NearestNeighbors

# setting display options
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', None)

# Utils libraries
from utils import cleaning
from utils import recommend
from utils import testing
from utils import training

#Preparing folder variables
os.chdir(os.path.dirname(sys.path[0])) # This command makes the notebook the main path and can work in cascade.
main_folder = sys.path[0]
data_folder = (main_folder + "/" + "data")
saved_models_folder = (data_folder + "/" + "saved_models")
raw_data = (data_folder + "/" + "_raw")
processed_data = (data_folder + "/" + "processed")
content_based_supervised_data = (data_folder + "/" + "processed" + "/" + "content_based_supervised")



## Cleaning and preparing the data

In [8]:
anime = cleaning.anime()
rating = cleaning.rating()

In [None]:
anime.head(2)

In [None]:
anime_cleaned = cleaning.clean_anime_df(anime)
anime_cleaned.head(1)



In [11]:
anime_cleaned.shape

(12121, 15)

In [12]:
anime_cleaned.head(1)

Unnamed: 0,anime_id,name,english_title,japanses_title,genre,type,source,duration,episodes,rating,score,rank,members,synopsis,cover
0,1,cowboy bebop,Cowboy Bebop,カウボーイビバップ,"Action, Adventure, Comedy, Drama, Sci-Fi, Space",TV,Original,24 min per ep,26,R - 17+ (violence & profanity),8.75,40.0,486824.0,"Crime is timeless. By the year 2071, humanity has expanded across the galaxy, filling the surface of other planets with settlements like those on Earth. These new societies are plagued by murder, drug use, and theft, and intergalactic outlaws are hunted by a growing number of tough bounty hunters.\r\n\r\nSpike Spiegel and Jet Black pursue criminals throughout space to make a humble living. Beneath his goofy and aloof demeanor, Spike is haunted by the weight of his violent past. Meanwhile, Jet manages his own troubled memories while taking care of Spike and the Bebop, their ship. The duo is joined by the beautiful con artist Faye Valentine, odd child Edward Wong Hau Pepelu Tivrusky IV, and Ein, a bioengineered Welsh Corgi.\r\n\r\nWhile developing bonds and working to catch a colorful cast of criminals, the Bebop crew's lives are disrupted by a menace from Spike's past. As a rival's maniacal plot continues to unravel, Spike must choose between life with his newfound family or revenge for his old wounds.\r\n\r\n[Written by MAL Rewrite]",https://cdn.myanimelist.net/images/anime/4/19644l.jpg


In [13]:
anime_features = cleaning.prepare_supervised_content_based(anime_cleaned)

In [14]:
anime_features.head(1)

Unnamed: 0,Action,Adventure,Comedy,Drama,Dementia,Mecha,Historical,School,Hentai,Horror,Demons,Ecchi,Fantasy,Shounen,Game,Mystery,Cars,Magic,Romance,Sci-Fi,Harem,Kids,Shoujo,Military,Super Power,Martial Arts,Music,Slice of Life,Sports,Supernatural,Parody,Vampire,Psychological,Samurai,Yaoi,Seinen,Josei,Thriller,Space,Shounen Ai,Police,Yuri,Shoujo Ai,Movie,Music.1,ONA,OVA,Special,TV
0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1


In [15]:
anime_features.shape

(12121, 49)

In [16]:
min_max = MinMaxScaler()
min_max_features = min_max.fit_transform(anime_features)

In [17]:
min_max_features.shape

(12121, 49)

In [18]:
np.round(min_max_features,2)

array([[1., 1., 1., ..., 0., 0., 1.],
       [1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 1., ..., 0., 0., 1.],
       ...,
       [0., 0., 0., ..., 0., 0., 1.],
       [0., 0., 1., ..., 0., 0., 1.],
       [0., 1., 0., ..., 0., 0., 0.]])

## Finding the best parameters for NearestNeighbors model

In [19]:
testing.param_NearestNeighbors(min_max_features)



{'algorithm': 'auto',
 'leaf_size': 30,
 'metric': 'minkowski',
 'n_neighbors': 1,
 'p': 1,
 'radius': 0.0}

## Building model with the best parameters

In [20]:
training.model_NearestNeighbors(min_max_features)

array([[    0,  1118,   376, ...,  1029,   955,  1034],
       [    1,  3154,  7607, ...,  3268,  1405,  1381],
       [ 3409, 12113,  3940, ...,  3231,   376,  3353],
       ...,
       [ 7973,  3292,   626, ...,  8364,  8387,  9359],
       [12119,  7536, 11332, ...,  1083,  7440,  2463],
       [ 7527, 12120,  9683, ...,  8052,  7518,  2190]], dtype=int64)

## Get recommendations

In [21]:
# We can get the recommendation as a dictionary
# We selec the name of the anime we want to find similitudes
# Then the genre we want (or write "All")
# Then the type we want (or write "All")
# Then the number of suggestions we have(we might get less if there not so many o none if there is no matches)

from utils import recommend
recommend.create_dict(recommend.print_similar_animes("Naruti"),["Shounen"],["TV"],"or",20)

I guess you misspelled the name
 Are you looking similitudes for the anime named [1mnaruto[0m? 
Here are the recommendations:
or


[{'name': 'yakitate   japan',
  'english_title': 'Yakitate!! Japan',
  'japanses_title': '焼きたて!! ジャぱん',
  'genre': 'Shounen',
  'type': 'TV',
  'source': 'Manga',
  'duration': '24 min per ep',
  'episodes': 69.0,
  'rating': 'PG-13 - Teens 13 or older',
  'score': 7.92,
  'rank': 687.0,
  'synopsis': "While countries such as France, England, and Germany all have their own internationally celebrated bread, Japan simply does not have one that can match in reputation.\r\n\r\nThus after discovering the wonders of breadmaking at a young age, Kazuma Azuma embarks on a quest to create Japan's own unique national bread. And being blessed with unusually warm hands that allow dough to ferment faster, Azuma is able to bring his baking innovations to another level.\r\n\r\nAs he begins working at the prestigious Japanese bakery chain, Pantasia, Azuma encounters other talented bakers and experiences firsthand the competitive world of baking. Along with his newfound friends and rivals, Azuma strives