In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from IPython.core.display import display, HTML
from pylab import rcParams

import string
import math

In [2]:
# let's define a few functions we need
# first off, to get an avg vector for arbitrary count of vectors
def average_input(args):
    # get number of total inputs
    count = len(args)
    if(count == 0):
        return
    if(count == 1):
        return args[0][3:]
    avg = []
    # we know all input vectors will be of the same length
    # cut out vector values for index, animeID, and title_english
    for i in range(3, len(args[0])):
        isum = sum([vector[i] for vector in args])
        avg.append(isum/count)
    return avg

In [3]:
# now, let's define our distance calculation
def cosine_distance(vector1, vector2):
    # cosTHETA = (v1 dot v2)/(mag_v1 * mag_v2)
    # dot_prod = sum(tup[0] * tup[1] for tup in zip(vector1, vector2))
    dot_prod = np.dot(vector1, vector2)
    mag1 = np.sqrt(np.dot(vector1, vector1))
    mag2 = np.sqrt(np.dot(vector2, vector2))
    return (dot_prod / (mag1 * mag2))

In [4]:
# yeeeet time to implement K-Nearest Neighbors
# note: query is our average vector for the input anime data
def knn(query, data, k):
    neighbois = []
    # time to yeet thru dem dataframe rows
    for index, row in data.iterrows():
        # get list for the current row
        # row includes animeID, title_english cols
        entry = list(row)[2:]
        # get the distance between query and current data point
        distance = cosine_distance(query, entry)
        # append distance and index to list
        neighbois.append((distance, index))
    # time to sort list least to greatest distance or ascending (default)
    neighbois = sorted(neighbois)
    # nearest k-neighbors, by index label (animeID)
    k_nearest_neighbois = [boi[1] for boi in neighbois[:k]]
    # noice
    return k_nearest_neighbois

In [5]:
# time to clean out the input vectors from our data
# aka we can't recommend the animes user gave
def inputs_done_gone(big_boi_data, args):
    # get list of all indices of user input animes
    index_list = [anime[0] for anime in args]
    # remove from data
    clean_boi = big_boi_data.loc[~big_boi_data.index.isin(index_list)]
    return clean_boi

In [6]:
# method to get corresponding animes given anime title(s)??
def get_anime(data, titles):
    animes = []
    # iterate thru given titles
    for title in titles:
        # get all possible entries by title
        poss_anime = data[[title in item for item in data['title_english']]]
        if(len(poss_anime.index) == 0): # no possible animes
            print("Anime \'%s\' not found. Proceeding without \'%s\'."%(title,title))
        elif(len(poss_anime.index) == 1): # only one possible anime
            anime_data = [poss_anime.index] + list(poss_anime.loc[poss_anime.index[0]])
            full_title = anime_data[2] # index 2 is title
            print("Selected anime \'%s\'."%full_title)
            # add anime to list of anime
            animes.append(anime_data)
        else: # 1 or more possible title
            print(poss_anime[["animeID","title_english"]])
            index = input("Please enter indexes from above separated with commas ',' or -1 if absent: \n Ex. enter\'4,2,0\' for indexes 4, 2, and 0  ")
#           while(sum([index == i for i in poss_anime.index]) == 0 and index != -1):
#               index = input("Please enter indexes from above separated with commas ',' or -1 if absent: \n Ex. enter\'4,2,0\' for indexes 4, 2, and 0  ")
            if(index == "-1"):
                print("Anime \'%s\' not found. Proceeding without \'%s\'."%(title,title))
            else:
                indices = index.split(",") # array of index values, as str
                if(len(indices) == 1):
                    index = int(indices[0])
                    anime_data = [index] + list(poss_anime.loc[index])
                    full_title = anime_data[2] # index 2 is title_english
                    print("Selected anime \'%s\'."%full_title)
                    # add anime to list of anime
                    animes.append(anime_data)
                else: # more than one index found
                    for i in indices:
                        index = int(i)
                        anime_data = [index] + list(poss_anime.loc[index])
                        full_title = anime_data[2] # index 2 is title_english
                        print("Selected anime \'%s\'."%full_title)
                        # add anime to list of anime
                        animes.append(anime_data)
    return animes

In [7]:
############################################################################################################################

In [8]:
# let's try it with our data lel
anime_df = pd.read_csv("data/normalized_princ_model_training_data.csv")
# remove Unnamed: 0 column
anime_df = anime_df.drop(columns=['Unnamed: 0'])
anime_df.head()

Unnamed: 0,animeID,title_english,principal component 1,principal component 2,principal component 3,principal component 4,principal component 5,principal component 6,principal component 7,principal component 8,...,principal component 291,principal component 292,principal component 293,principal component 294,principal component 295,principal component 296,principal component 297,principal component 298,principal component 299,principal component 300
0,1,Cowboy Bebop,1.311131,-1.299874,-4.241516,2.813546,3.393908,0.74805,0.997277,-2.66376,...,0.370829,0.259884,-1.085205,-0.38012,-0.51325,0.240995,-0.466804,0.379151,-0.290872,-0.173427
1,5,Cowboy Bebop: The Movie,-0.877062,-0.658764,-6.071744,-2.503581,0.472272,-1.036241,-1.964439,0.46638,...,0.479847,0.01866,0.334813,0.040371,0.091011,0.700828,0.340917,0.102872,-0.480773,0.488478
2,6,Trigun,-2.9588,0.186702,-1.310995,-1.056926,3.076349,2.560385,-0.645996,-0.407952,...,0.097136,0.209499,-0.240368,-0.797081,-0.105798,-0.01373,0.300802,-0.055284,-0.16422,-0.205732
3,7,Witch Hunter Robin,0.579969,-0.164921,-3.543831,0.887435,-0.469471,1.760493,0.305682,-1.53436,...,-0.157432,-0.139492,-0.372866,0.935684,-0.077048,-0.116114,-0.245878,0.61854,-0.916615,-0.161398
4,8,Beet the Vandel Buster,-1.146832,1.695643,-5.688726,-0.873938,0.49984,-0.64993,0.549448,0.12864,...,0.271601,0.173991,-0.644193,0.23211,-0.148602,0.334401,-0.522669,0.118988,-0.303244,0.841451


In [9]:
# read in output data (general info about animes) as well
output_df = pd.read_csv("data/relevant_output_data.csv")
# drop Unnamed: 0 column
output_df = output_df.drop(columns=["Unnamed: 0"])
output_df.head()

Unnamed: 0,animeID,title_english,synopsis
0,1,Cowboy Bebop,"In the year 2071, humanity has colonized sever..."
1,5,Cowboy Bebop: The Movie,"Another day, another bounty—such is the life o..."
2,6,Trigun,"Vash the Stampede is the man with a $$60,000,0..."
3,7,Witch Hunter Robin,Witches are individuals with special powers li...
4,8,Beet the Vandel Buster,It is the dark century and the people are suff...


In [10]:
# EXAMPLE 1
# From a single anime title: ['Attack on Titan']
input_titles = ['Attack on Titan']
# get relevant data about our anime
my_anime_list = get_anime(anime_df, input_titles)
# print all titles from my_anime_list
print([anime[2] for anime in my_anime_list])

      animeID                           title_english
1810    16498                         Attack on Titan
1916    19285         Attack on Titan: Since That Day
2072    23775  Attack on Titan: Crimson Bow and Arrow
2073    23777       Attack on Titan: Wings of Freedom
2119    25777                Attack on Titan Season 2
2304    31374            Attack on Titan: Junior High
2705    35760                Attack on Titan Season 3
Please enter indexes from above separated with commas ',' or -1 if absent: 
 Ex. enter'4,2,0' for indexes 4, 2, and 0  1810
Selected anime 'Attack on Titan'.
['Attack on Titan']


In [11]:
# need to clean our inputs from overall database
clean_anime_df = inputs_done_gone(anime_df, my_anime_list)
clean_anime_df.head()

Unnamed: 0,animeID,title_english,principal component 1,principal component 2,principal component 3,principal component 4,principal component 5,principal component 6,principal component 7,principal component 8,...,principal component 291,principal component 292,principal component 293,principal component 294,principal component 295,principal component 296,principal component 297,principal component 298,principal component 299,principal component 300
0,1,Cowboy Bebop,1.311131,-1.299874,-4.241516,2.813546,3.393908,0.74805,0.997277,-2.66376,...,0.370829,0.259884,-1.085205,-0.38012,-0.51325,0.240995,-0.466804,0.379151,-0.290872,-0.173427
1,5,Cowboy Bebop: The Movie,-0.877062,-0.658764,-6.071744,-2.503581,0.472272,-1.036241,-1.964439,0.46638,...,0.479847,0.01866,0.334813,0.040371,0.091011,0.700828,0.340917,0.102872,-0.480773,0.488478
2,6,Trigun,-2.9588,0.186702,-1.310995,-1.056926,3.076349,2.560385,-0.645996,-0.407952,...,0.097136,0.209499,-0.240368,-0.797081,-0.105798,-0.01373,0.300802,-0.055284,-0.16422,-0.205732
3,7,Witch Hunter Robin,0.579969,-0.164921,-3.543831,0.887435,-0.469471,1.760493,0.305682,-1.53436,...,-0.157432,-0.139492,-0.372866,0.935684,-0.077048,-0.116114,-0.245878,0.61854,-0.916615,-0.161398
4,8,Beet the Vandel Buster,-1.146832,1.695643,-5.688726,-0.873938,0.49984,-0.64993,0.549448,0.12864,...,0.271601,0.173991,-0.644193,0.23211,-0.148602,0.334401,-0.522669,0.118988,-0.303244,0.841451


In [12]:
# now, before we can run KNN, we need to get the average vector of our inputs
avg = average_input(my_anime_list)
print(avg)

[-3.9251733362043666, -0.8041347483370096, -5.5623882983996085, 0.8110395349756183, 4.520072598937092, 5.113624753545662, -4.3192040668193705, -3.485466478489911, -1.6756276158269556, -0.7077401210900308, -7.904183810420007, -1.0332857527066897, -0.5420518367389274, -2.153947836215888, -1.1392744581148908, 2.04429783896998, -0.26157081721937503, 2.2187082865629706, -5.004791862479135, -0.9495444152877452, 4.0606954783542415, 2.2371445465711037, -0.7204816428466857, -1.3879002402931788, -0.12130187936589175, -0.4092753231293427, 3.152928272457124, -5.3303966704816155, 3.750182669842185, -2.514902174574132, -3.90491575331256, -2.4563743902678064, -0.3039789560580741, 3.4543909083152746, -1.105112970526818, -2.9037680503242105, -1.4274128423633163, -1.7299034544432177, -0.09138812580439133, -0.4387118217574753, -1.273562197129022, -2.593453856475288, 1.6647164218097934, 2.7383963552877844, -1.8806975812417377, -2.9278022833035418, 0.7327798772939014, -0.1239428992778952, -0.26115195361222

In [13]:
# run KNN on our inputs: my_anime_list and k = 5 (for example)
rec_indices = knn(query=avg, data=clean_anime_df, k=5)
print(rec_indices)

[494, 2481, 2230, 680, 2839]


In [14]:
# Now, let's see what we were recommended!!
for i in rec_indices:
    details = list(output_df.loc[i])
    print("TITLE: %s"%details[1])
    print("SYNOPSIS: %s"%details[2])

TITLE: Handsome Girl
SYNOPSIS: Hagiwara Mio is a 14-year-old TV actress who has gotten popular. One day on a set, she meets Kumagai Ichiya, who tells her that her acting "stinks." Mio is hurt by his comments, but she finds that she can't get Ichiya out of her mind. It turns out that Ichiya is a promising director, who directed a music video for Mio's friend Sawaki Aya, an idol singer. After the success of this music video, Ichiya is asked to direct a movie, and he wants Mio to be the heroine, because he feels that she is a "handsome" girl like the actresses of old. How will Mio cope with her feelings toward Ichiya? (Source: T.H.E.M. Anime Review, edited)
TITLE: Rainy Cocoa in Hawaii
SYNOPSIS: Ame-iro Cocoa: Rainy Cocoa Goes to Hawaii!! Nozomu Tokura, who is Aoi's older brother, is appointed as the manager of the Hawaii store. With more new characters, will he be able to open up the store there successfully?
TITLE: THE IDOLM@STER CINDERELLA GIRLS Special Program
SYNOPSIS: The iDOLM@STER

In [15]:
############################################################################################################################

In [16]:
# EXAMPLE 2
# From a single series of anime: ['Attack on Titan', 'Attack on Titan: Since That Day', 'Attack on Titan: Crimson Bow and Arrow', 'Attack on Titan: Wings of Freedom', 'Attack on Titan Season 2', 'Attack on Titan: Junior High', 'Attack on Titan Season 3']
input_titles = ['Attack on Titan', 'Attack on Titan: Since That Day', 'Attack on Titan: Crimson Bow and Arrow', 
                   'Attack on Titan: Wings of Freedom', 'Attack on Titan Season 2', 'Attack on Titan: Junior High', 
                   'Attack on Titan Season 3']
# get relevant data about our anime
my_anime_list = get_anime(anime_df, input_titles)
# print all titles from my_anime_list
print([anime[2] for anime in my_anime_list])

      animeID                           title_english
1810    16498                         Attack on Titan
1916    19285         Attack on Titan: Since That Day
2072    23775  Attack on Titan: Crimson Bow and Arrow
2073    23777       Attack on Titan: Wings of Freedom
2119    25777                Attack on Titan Season 2
2304    31374            Attack on Titan: Junior High
2705    35760                Attack on Titan Season 3
Please enter indexes from above separated with commas ',' or -1 if absent: 
 Ex. enter'4,2,0' for indexes 4, 2, and 0  1810
Selected anime 'Attack on Titan'.
Selected anime 'Attack on Titan: Since That Day'.
Selected anime 'Attack on Titan: Crimson Bow and Arrow'.
Selected anime 'Attack on Titan: Wings of Freedom'.
Selected anime 'Attack on Titan Season 2'.
Selected anime 'Attack on Titan: Junior High'.
Selected anime 'Attack on Titan Season 3'.
['Attack on Titan', 'Attack on Titan: Since That Day', 'Attack on Titan: Crimson Bow and Arrow', 'Attack on Titan: Win

In [17]:
# need to clean our inputs from overall database
clean_anime_df = inputs_done_gone(anime_df, my_anime_list)
clean_anime_df.head()

Unnamed: 0,animeID,title_english,principal component 1,principal component 2,principal component 3,principal component 4,principal component 5,principal component 6,principal component 7,principal component 8,...,principal component 291,principal component 292,principal component 293,principal component 294,principal component 295,principal component 296,principal component 297,principal component 298,principal component 299,principal component 300
0,1,Cowboy Bebop,1.311131,-1.299874,-4.241516,2.813546,3.393908,0.74805,0.997277,-2.66376,...,0.370829,0.259884,-1.085205,-0.38012,-0.51325,0.240995,-0.466804,0.379151,-0.290872,-0.173427
1,5,Cowboy Bebop: The Movie,-0.877062,-0.658764,-6.071744,-2.503581,0.472272,-1.036241,-1.964439,0.46638,...,0.479847,0.01866,0.334813,0.040371,0.091011,0.700828,0.340917,0.102872,-0.480773,0.488478
2,6,Trigun,-2.9588,0.186702,-1.310995,-1.056926,3.076349,2.560385,-0.645996,-0.407952,...,0.097136,0.209499,-0.240368,-0.797081,-0.105798,-0.01373,0.300802,-0.055284,-0.16422,-0.205732
3,7,Witch Hunter Robin,0.579969,-0.164921,-3.543831,0.887435,-0.469471,1.760493,0.305682,-1.53436,...,-0.157432,-0.139492,-0.372866,0.935684,-0.077048,-0.116114,-0.245878,0.61854,-0.916615,-0.161398
4,8,Beet the Vandel Buster,-1.146832,1.695643,-5.688726,-0.873938,0.49984,-0.64993,0.549448,0.12864,...,0.271601,0.173991,-0.644193,0.23211,-0.148602,0.334401,-0.522669,0.118988,-0.303244,0.841451


In [18]:
# now, before we can run KNN, we need to get the average vector of our inputs
avg = average_input(my_anime_list)
print(avg)

[16.244867682773535, -3.3973259760639323, 2.543107296705242, -13.648966390385967, 7.043129726775206, 10.119563746152403, -7.0757840490611725, 0.3609445936853911, -3.986316777964671, 1.9216444437682376, -2.1174860850462496, 2.9211755903584384, -1.0606879142654202, 3.2202571040672887, -0.668091682043614, -0.351148712950864, 1.3752012318479436, -0.6872322714727535, -2.4929267515151494, -2.251819391169516, 2.4653917354224295, 1.4179103388547483, -3.117303974667795, -0.8001351131094526, 0.11669095618930843, 1.8976109507463104, 1.8849510502532671, -2.1193003901795717, 2.609352994588458, -2.073526005360022, 1.1702933318613016, 0.940081096342956, -2.3527068493550987, 0.8200550497450141, 0.39729822756216226, 0.3665851788025278, -1.136032342037544, -0.28412015651944883, -0.8994298542963308, -2.551942046518815, -1.3401346074665954, -0.7976784442697031, -0.28862257564612787, -0.03354585195580771, 0.6385066075536004, -1.1221714133726073, 0.5436353866090192, -0.7453556748963147, 0.26983997916966146,

In [19]:
# run KNN on our inputs: my_anime_list and k = 5 (for example)
rec_indices = knn(query=avg, data=clean_anime_df, k=5)
print(rec_indices)

[2349, 2610, 2415, 329, 214]


In [20]:
# Get recommendations
for i in rec_indices:
    details = list(output_df.loc[i])
    print("TITLE: %s"%details[1])
    print("SYNOPSIS: %s"%details[2])

TITLE: Mahou Shoujo Nante Mouiidesukara
SYNOPSIS: Yuzuka Hanami is a young, carefree girl who lives the most ordinary life imaginable. Although her father works around the clock and her mother is rarely home, she still enjoys herself and strives to be an excellent student. Miton, on the other hand, is an alien life-form with the ability to transform his master into a magical girl, a warrior who fights evil wherever it may appear. However, there are not as many enemies as there used to be, so Miton has been out of work for a while. Starving and homeless, he has taken up residence in a pile of garbage. As Yuzuka walks past him one day, Miton seizes the opportunity to offer his services to the young girl. Yuzuka reluctantly agrees, but when she transforms into a magical girl and discovers that her outfit is a swimsuit, she begins to have second thoughts about what she has gotten herself into! [Written by MAL Rewrite]
TITLE: Chronos Ruler
SYNOPSIS: ​Like many in her class, Koyuki Honda loo

In [21]:
############################################################################################################################

In [22]:
# EXAMPLE 3
# From a relatively similar assortment of anime: ['Attack on Titan', 'Attack on Titan Season 2', 
#   'Bungo Stray Dogs', 'My Hero Academia 3', 'Nanbaka', 'Nanbaka: Season 2', 'Nanbaka: Idiots with Student Numbers!',
#   'One Punch Man']
# let's get our input animes :D
input_titles = []

while(True):
    title = input("Enter anime name: \n (\'-1\' when done)   ")
    if(title == "-1"):
        break
    input_titles.append(title)

# print input titles
print(input_titles)

Enter anime name: 
 ('-1' when done)   Attack on Titan
Enter anime name: 
 ('-1' when done)   Bungo Stray Dogs
Enter anime name: 
 ('-1' when done)   My Hero Academia
Enter anime name: 
 ('-1' when done)   Nanbaka
Enter anime name: 
 ('-1' when done)   One Punch Man
Enter anime name: 
 ('-1' when done)   -1
['Attack on Titan', 'Bungo Stray Dogs', 'My Hero Academia', 'Nanbaka', 'One Punch Man']


In [23]:
# time to get our anime data from overall training data
my_anime_list = get_anime(anime_df, input_titles)

# print all titles from my_anime_list
print([anime[2] for anime in my_anime_list])

      animeID                           title_english
1810    16498                         Attack on Titan
1916    19285         Attack on Titan: Since That Day
2072    23775  Attack on Titan: Crimson Bow and Arrow
2073    23777       Attack on Titan: Wings of Freedom
2119    25777                Attack on Titan Season 2
2304    31374            Attack on Titan: Junior High
2705    35760                Attack on Titan Season 3
Please enter indexes from above separated with commas ',' or -1 if absent: 
 Ex. enter'4,2,0' for indexes 4, 2, and 0  1810,2119
Selected anime 'Attack on Titan'.
Selected anime 'Attack on Titan Season 2'.
      animeID       title_english
2316    31478    Bungo Stray Dogs
2424    32867  Bungo Stray Dogs 2
Please enter indexes from above separated with commas ',' or -1 if absent: 
 Ex. enter'4,2,0' for indexes 4, 2, and 0  2316
Selected anime 'Bungo Stray Dogs'.
      animeID       title_english
2365    31964    My Hero Academia
2503    33486  My Hero Academia 2

In [24]:
# need to clean our inputs from overall database
clean_anime_df = inputs_done_gone(anime_df, my_anime_list)

clean_anime_df.head()

Unnamed: 0,animeID,title_english,principal component 1,principal component 2,principal component 3,principal component 4,principal component 5,principal component 6,principal component 7,principal component 8,...,principal component 291,principal component 292,principal component 293,principal component 294,principal component 295,principal component 296,principal component 297,principal component 298,principal component 299,principal component 300
0,1,Cowboy Bebop,1.311131,-1.299874,-4.241516,2.813546,3.393908,0.74805,0.997277,-2.66376,...,0.370829,0.259884,-1.085205,-0.38012,-0.51325,0.240995,-0.466804,0.379151,-0.290872,-0.173427
1,5,Cowboy Bebop: The Movie,-0.877062,-0.658764,-6.071744,-2.503581,0.472272,-1.036241,-1.964439,0.46638,...,0.479847,0.01866,0.334813,0.040371,0.091011,0.700828,0.340917,0.102872,-0.480773,0.488478
2,6,Trigun,-2.9588,0.186702,-1.310995,-1.056926,3.076349,2.560385,-0.645996,-0.407952,...,0.097136,0.209499,-0.240368,-0.797081,-0.105798,-0.01373,0.300802,-0.055284,-0.16422,-0.205732
3,7,Witch Hunter Robin,0.579969,-0.164921,-3.543831,0.887435,-0.469471,1.760493,0.305682,-1.53436,...,-0.157432,-0.139492,-0.372866,0.935684,-0.077048,-0.116114,-0.245878,0.61854,-0.916615,-0.161398
4,8,Beet the Vandel Buster,-1.146832,1.695643,-5.688726,-0.873938,0.49984,-0.64993,0.549448,0.12864,...,0.271601,0.173991,-0.644193,0.23211,-0.148602,0.334401,-0.522669,0.118988,-0.303244,0.841451


In [25]:
# now, before we can run KNN, we need to get the average vector of our inputs
avg = average_input(my_anime_list)
print(avg)

[-0.26187169826713597, -0.6223437198450654, -2.7640645151453818, 0.5428536450825734, -0.5229595898450449, 1.006645087521119, -2.301206722150935, -1.446522413144313, 0.2189986570852312, 0.1002835427889214, -2.801272981140133, -1.4157450024607294, -0.09573101635886633, -0.44875795393383616, 0.6688287518669537, 0.5816557632532964, 0.514554189552213, 1.525388903865321, -1.3692356232106808, -0.7028450294757473, 1.372415586368385, 0.7696922847505063, -0.19754721696318614, -0.7359311349023903, -0.13385817787648022, -0.1360728321921176, 0.48197149037105236, -0.5875646846501377, 1.050864796427776, -1.8522477987267276, -0.8279269225197536, -0.16700013821103804, 0.1981378795636637, 1.0252756506116198, -0.7430749812945876, -0.7998707221471164, 0.017967266879349936, -0.5023632492948615, 0.353978177484429, -0.5425766874173292, -0.4868964569313585, -0.320905686697994, -0.24655430083363838, 0.8888570453894349, -0.3600135371906611, -1.4550306337161716, 0.5090195753714379, -0.24458776770428842, -0.36730

In [26]:
# run KNN on our inputs: my_anime_list and k = 7 (for example)
rec_indices = knn(query=avg, data=clean_anime_df, k=7)

print(rec_indices)

[1853, 2062, 704, 1399, 178, 2394, 2627]


In [27]:
# Now, let's see what we were recommended!!
for i in rec_indices:
    details = list(output_df.loc[i])
    print("TITLE: %s"%details[1])
    print("SYNOPSIS: %s"%details[2])

TITLE: Recently, my sister is unusual.
SYNOPSIS: Saikin, Imouto no Yousu ga Chotto Okashiinda ga. follows a family just starting to rebuild. When they marry, Mr. and Mrs. Kanzaki bring a teenage son and daughter along for the ride. But high school freshman Mitsuki Kanzaki is less than thrilled. Stinging from a history of absent and abusive father figures, she is slow to accept her stepfather and stepbrother. But after an accident lands Mitsuki in the hospital, she finds herself possessed by the ghost of Hiyori Kotobuki, a girl her age who was deeply in love with Mitsuki's stepbrother Yuuya. Hiyori cannot pass on to her final reward because of her unrequited love for Yuuya, meaning she's got to consummate it... in Mitsuki's body?! Now, Mitsuki's life depends on getting Hiyori to Heaven. But will she get used to sharing herself with a pushy, amorous ghost? Can she overcome her distrust of her new family? Can she bring herself to fulfill Hiyori's feelings for Yuuya? And might she be hidin

In [28]:
############################################################################################################################

In [29]:
# EXAMPLE 4
# From different animes: ['AKIRA', 'Desert Punk', 'Naruto', 'D.N.Angel', 'Rurouni Kenshin']
input_titles = ['AKIRA', 'Desert Punk', 'Naruto', 'D.N.Angel', 'Rurouni Kenshin']
# get relevant data about our anime
my_anime_list = get_anime(anime_df, input_titles)
# print all titles from my_anime_list
print([anime[2] for anime in my_anime_list])

Selected anime 'AKIRA'.
Selected anime 'Desert Punk'.
      animeID                                      title_english
7          20                                             Naruto
257       442  Naruto the Movie: Ninja Clash in the Land of Snow
438       936   Naruto the Movie 2: Legend of the Stone of Gelel
685      1735                                  Naruto: Shippuden
842      2472                        Naruto: Shippuden the Movie
1090     4134          Naruto Shippuden: Konoha Gakuen - Special
1115     4437              Naruto: Shippuden the Movie 2 -Bonds-
1553    10686    Chunin Exam on Fire! and Naruto vs. Konohamaru!
1705    13667                    Road to Ninja: Naruto the Movie
1823    16870                         The Last: Naruto the Movie
2182    28755                           Boruto: Naruto the Movie
Please enter indexes from above separated with commas ',' or -1 if absent: 
 Ex. enter'4,2,0' for indexes 4, 2, and 0  7
Selected anime 'Naruto'.
Selected anime 'D.N.

In [30]:
# need to clean our inputs from overall database
clean_anime_df = inputs_done_gone(anime_df, my_anime_list)
clean_anime_df.head()

Unnamed: 0,animeID,title_english,principal component 1,principal component 2,principal component 3,principal component 4,principal component 5,principal component 6,principal component 7,principal component 8,...,principal component 291,principal component 292,principal component 293,principal component 294,principal component 295,principal component 296,principal component 297,principal component 298,principal component 299,principal component 300
0,1,Cowboy Bebop,1.311131,-1.299874,-4.241516,2.813546,3.393908,0.74805,0.997277,-2.66376,...,0.370829,0.259884,-1.085205,-0.38012,-0.51325,0.240995,-0.466804,0.379151,-0.290872,-0.173427
1,5,Cowboy Bebop: The Movie,-0.877062,-0.658764,-6.071744,-2.503581,0.472272,-1.036241,-1.964439,0.46638,...,0.479847,0.01866,0.334813,0.040371,0.091011,0.700828,0.340917,0.102872,-0.480773,0.488478
2,6,Trigun,-2.9588,0.186702,-1.310995,-1.056926,3.076349,2.560385,-0.645996,-0.407952,...,0.097136,0.209499,-0.240368,-0.797081,-0.105798,-0.01373,0.300802,-0.055284,-0.16422,-0.205732
3,7,Witch Hunter Robin,0.579969,-0.164921,-3.543831,0.887435,-0.469471,1.760493,0.305682,-1.53436,...,-0.157432,-0.139492,-0.372866,0.935684,-0.077048,-0.116114,-0.245878,0.61854,-0.916615,-0.161398
4,8,Beet the Vandel Buster,-1.146832,1.695643,-5.688726,-0.873938,0.49984,-0.64993,0.549448,0.12864,...,0.271601,0.173991,-0.644193,0.23211,-0.148602,0.334401,-0.522669,0.118988,-0.303244,0.841451


In [31]:
# now, before we can run KNN, we need to get the average vector of our inputs
avg = average_input(my_anime_list)
print(avg)

[-2.224826639558806, -0.14939572585115837, -0.3207085675662861, 1.5508571260618205, 1.8910910075846075, 1.603978375201877, -2.3220950488165095, -0.16483840479131523, 1.0090639262336718, -0.9640535196876222, -2.342128233513717, -2.2274351408947295, -0.4523796284711438, -1.1989433312486788, 1.116371779125099, -0.09521308961251974, 0.5171272317193187, 0.12783013734184684, -0.018668464911993966, -0.5777996864537627, 0.8295077652313136, 1.2485711147570022, 0.0020874986107689785, 0.2857881970590078, 0.6589374317233148, 0.5534666275969952, 1.0278025963720903, -0.7584755771443745, 0.16907952317601424, -0.7944899277140657, -0.6796745784028255, -0.8102773709343551, 0.2824891046698507, 0.14947912259756685, -0.5125056394270427, 0.8018935293711449, -0.3490268161296086, -0.6688157892016167, 0.19172788238425892, -0.20287482370207646, 0.9255461155366056, -0.12812004628785653, -0.26268127204293973, 0.32786240577716935, 0.2359424285225161, -0.3298479897582868, 0.5993755751175602, -0.3427222393685404, -0

In [32]:
# run KNN on our inputs: my_anime_list and k = 5 (for example)
rec_indices = knn(query=avg, data=clean_anime_df, k=5)
print(rec_indices)

[1232, 2758, 2778, 1933, 2679]


In [33]:
# Now, let's see what we were recommended!!
for i in rec_indices:
    details = list(output_df.loc[i])
    print("TITLE: %s"%details[1])
    print("SYNOPSIS: %s"%details[2])

TITLE: Cinnamoroll: The Movie
SYNOPSIS: Mocha, Chiffon, Cappuccino, Espresso, and Milk go with Cinnamoroll in this adventure of a lifetime. Join them in their quest, meeting new friends like Anna (the girl who found Cinnamoroll/Cinnamon) and discovering new places like the Forest of Pastries and Bread and the Coffee Waterfalls.
TITLE: THE IDOLM@STER CINDERELLA GIRLS Theater (Web) 2nd Season
SYNOPSIS: Bonus segments of Cinderella Girls Gekijou 2nd Season streamed on The iDOLM@STER Cinderella Girls Mobage following the television broadcast.
TITLE: Working Buddies!
SYNOPSIS: The story centers around the cheerful calico Tapio Chatorazawa and the pessimistic Russian blue Kuehiko Roshihara. The two study in the same college, and also work together on various odd jobs. (Source: ANN)
TITLE: Robot Girls Z
SYNOPSIS: The story revolves around a new team of "Toei robot girls" named "Team Z" that is made up of three girls based on Toei's Mazinger Z franchise: Mazinger Z, Great Mazinger, and UFO Rob