In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelBinarizer, MultiLabelBinarizer, MinMaxScaler
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.neighbors import NearestNeighbors
from scipy.sparse import csr_matrix
from sklearn.decomposition import PCA

import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
import seaborn as sns
%matplotlib inline

pd.set_option("max_colwidth", None)


In [2]:
Columns_Used = ["MAL_ID", "Name", "Score", "Genders", "Type", "Episodes", "Premiered",
           "Studios", "Source", "Rating", "Members"]

In [3]:
anime_data=pd.read_csv('/Users/adrienbenoist/Desktop/Ironhack/GitHub/data-ft-par-labs/Projects/Anime dataset/anime.csv',usecols=Columns_Used)

In [4]:
def process_multilabel(series):
    series = series.split(",")
    if "Unknown" in series:
        series.remove("Unknown")
    return series

anime_data["Genders"] = anime_data["Genders"].map(process_multilabel)
anime_data["Studios"] = anime_data["Studios"].map(process_multilabel)
anime_data["Score"] = anime_data["Score"].replace("Unknown", 0).astype(float)
anime_data["Episodes"] = anime_data["Episodes"].replace("Unknown", 0).astype(int)

anime_data.head()


Unnamed: 0,MAL_ID,Name,Score,Genders,Type,Episodes,Premiered,Studios,Source,Rating,Members
0,1,Cowboy Bebop,8.78,"[Action, Adventure, Comedy, Drama, Sci-Fi, Space]",TV,26,Spring 1998,[Sunrise],Original,R - 17+ (violence & profanity),1251960
1,5,Cowboy Bebop: Tengoku no Tobira,8.39,"[Action, Drama, Mystery, Sci-Fi, Space]",Movie,1,Unknown,[Bones],Original,R - 17+ (violence & profanity),273145
2,6,Trigun,8.24,"[Action, Sci-Fi, Adventure, Comedy, Drama, Shounen]",TV,26,Spring 1998,[Madhouse],Manga,PG-13 - Teens 13 or older,558913
3,7,Witch Hunter Robin,7.27,"[Action, Mystery, Police, Supernatural, Drama, Magic]",TV,26,Summer 2002,[Sunrise],Original,PG-13 - Teens 13 or older,94683
4,8,Bouken Ou Beet,6.98,"[Adventure, Fantasy, Shounen, Supernatural]",TV,52,Fall 2004,[Toei Animation],Manga,PG - Children,13224


In [5]:
def preprocessing_category(df, column, is_multilabel=False):
    # Binarise labels
    lb = LabelBinarizer()
    if is_multilabel:
        lb = MultiLabelBinarizer()
        
    expandedLabelData = lb.fit_transform(df[column])
    labelClasses = lb.classes_

    # Create a pandas.DataFrame from our output
    category_df = pd.DataFrame(expandedLabelData, columns=labelClasses)
    del df[column]
    return pd.concat([df, category_df], axis=1)

anime_metadata = anime_data.copy()
anime_metadata = preprocessing_category(anime_metadata, "Type")
anime_metadata = preprocessing_category(anime_metadata, "Premiered")
anime_metadata = preprocessing_category(anime_metadata, "Studios", is_multilabel=True)
anime_metadata = preprocessing_category(anime_metadata, "Source")
anime_metadata = preprocessing_category(anime_metadata, "Rating")

Genders = anime_metadata["Genders"]
ID_NAME = anime_metadata[["MAL_ID", "Name"]]

del anime_metadata["Genders"]
del anime_metadata["MAL_ID"]
del anime_metadata["Name"]
del anime_metadata["Unknown"]

anime_metadata[["Score", "Episodes", "Members"]] = MinMaxScaler().fit_transform(anime_metadata[["Score", "Episodes", "Members"]])
anime_metadata = anime_metadata.values

In [7]:
from sklearn.feature_extraction.text import TfidfVectorizer

tfv = TfidfVectorizer(min_df=3,  max_features=None, 
            strip_accents='unicode', analyzer='word',token_pattern=r'\w{1,}',
            ngram_range=(1, 3),
            stop_words = 'english')

# Filling NaNs with empty string
genres_original = anime_data['Genders'].fillna('').astype(str)
genres_vector_tf_idf = tfv.fit_transform(genres_original)

genres_vector_one_hot = preprocessing_category(pd.DataFrame(Genders), "Genders", True).values

In [8]:
print("anime_metadata.shape:", anime_metadata.shape)
print("genres_vector_tf_idf.shape:", genres_vector_tf_idf.shape)
print("genres_vector_one_hot.shape:", genres_vector_one_hot.shape)

anime_metadata.shape: (17562, 1208)
genres_vector_tf_idf.shape: (17562, 2282)
genres_vector_one_hot.shape: (17562, 83)


In [9]:
def get_recommended(vector, query_index, n_neighbors=10):
    model_knn = NearestNeighbors(metric='cosine', n_neighbors=n_neighbors)
    model_knn.fit(csr_matrix(vector))

    distances, indices = model_knn.kneighbors(vector[query_index,:].reshape(1, -1), n_neighbors = n_neighbors)
    result = []
    for i in range(0, len(distances.flatten())):
        index = indices.flatten()[i]
        if index == query_index:
            continue
        result.append(anime_data.iloc[index])
        
    return pd.DataFrame(result)

In [10]:
# query_index = np.random.choice(anime_metadata.shape[0])
query_index = ID_NAME[ID_NAME.MAL_ID == 32281].index[0]
anime_data.iloc[[query_index]]

Unnamed: 0,MAL_ID,Name,Score,Genders,Type,Episodes,Premiered,Studios,Source,Rating,Members
11308,32281,Kimi no Na wa.,8.96,"[Romance, Supernatural, School, Drama]",Movie,1,Unknown,[CoMix Wave Films],Original,PG-13 - Teens 13 or older,1726660


In [11]:
get_recommended(anime_metadata, query_index, 11)

Unnamed: 0,MAL_ID,Name,Score,Genders,Type,Episodes,Premiered,Studios,Source,Rating,Members
1535,1689,Byousoku 5 Centimeter,7.73,"[Drama, Romance, Slice of Life]",Movie,3,Unknown,[CoMix Wave Films],Original,PG-13 - Teens 13 or older,695375
7521,16782,Kotonoha no Niwa,8.05,"[Slice of Life, Psychological, Drama, Romance]",Movie,1,Unknown,[CoMix Wave Films],Original,PG-13 - Teens 13 or older,574997
15158,38826,Tenki no Ko,8.41,"[Slice of Life, Drama, Romance, Fantasy]",Movie,1,Unknown,[CoMix Wave Films],Original,PG-13 - Teens 13 or older,475388
406,433,"Kumo no Mukou, Yakusoku no Basho",7.56,"[Drama, Military, Romance, Sci-Fi]",Movie,1,Unknown,[CoMix Wave Films],Original,PG-13 - Teens 13 or older,177428
5891,9760,Hoshi wo Ou Kodomo,7.58,"[Adventure, Romance, Fantasy]",Movie,1,Unknown,[CoMix Wave Films],Original,PG-13 - Teens 13 or older,164081
14230,37396,Shikioriori,7.17,"[Drama, Romance, Slice of Life]",Movie,3,Unknown,[CoMix Wave Films],Original,PG-13 - Teens 13 or older,97673
5785,9488,Cencoroll Connect,7.25,"[Action, Sci-Fi]",Movie,1,Unknown,[],Original,PG-13 - Teens 13 or older,29639
514,548,Wonderful Days,7.04,"[Action, Drama, Romance, Sci-Fi]",Movie,1,Unknown,[],Original,PG-13 - Teens 13 or older,31319
2790,3067,Saraba Uchuu Senkan Yamato: Ai no Senshi-tachi,7.08,"[Action, Military, Sci-Fi, Space, Drama]",Movie,1,Unknown,[],Original,PG-13 - Teens 13 or older,3289
4911,7144,Uchuu Senkan Yamato: Fukkatsu-hen,7.02,"[Action, Military, Adventure, Space, Drama]",Movie,1,Unknown,[],Original,PG-13 - Teens 13 or older,3440


In [12]:
get_recommended(genres_vector_tf_idf, query_index, 10)

Unnamed: 0,MAL_ID,Name,Score,Genders,Type,Episodes,Premiered,Studios,Source,Rating,Members
513,547,Wind: A Breath of Heart OVA,6.24,"[Romance, Supernatural, School, Drama]",OVA,3,Unknown,[],Visual novel,PG-13 - Teens 13 or older,2620
1382,1524,Fuujin Monogatari,7.22,"[Slice of Life, Fantasy, Supernatural, School, Drama]",TV,13,Fall 2004,[Production I.G],Original,G - All Ages,14453
8125,19695,Clover 4/3,0.0,"[School, Drama]",ONA,24,Unknown,[],Unknown,PG-13 - Teens 13 or older,453
7977,18881,Code Geass: Hangyaku no Lelouch - Kiseki no Birthday Omake Flash,5.77,"[Supernatural, School]",Special,1,Unknown,[Sunrise],Unknown,PG-13 - Teens 13 or older,8562
5504,8728,Code Geass: Hangyaku no Lelouch - Kiseki no Birthday,6.89,"[Supernatural, School]",Special,1,Unknown,[Sunrise],Original,PG-13 - Teens 13 or older,26884
1475,1624,To Heart 2 Special,6.38,"[Romance, School, Drama]",Special,1,Unknown,[OLM],Unknown,PG-13 - Teens 13 or older,3365
691,756,School Days ONA,5.96,"[Romance, School, Drama]",ONA,1,Unknown,[],Visual novel,R - 17+ (violence & profanity),31294
3592,4339,Ashita e Attack!,6.4,"[Sports, School, Drama]",TV,23,Spring 1977,[Nippon Animation],Original,PG-13 - Teens 13 or older,1504
17510,47537,Inou no AICis: ESP & High School Detective,0.0,"[Mystery, Supernatural, School]",ONA,0,Unknown,[],Original,Unknown,356


In [13]:
get_recommended(genres_vector_one_hot, query_index, 10)

Unnamed: 0,MAL_ID,Name,Score,Genders,Type,Episodes,Premiered,Studios,Source,Rating,Members
513,547,Wind: A Breath of Heart OVA,6.24,"[Romance, Supernatural, School, Drama]",OVA,3,Unknown,[],Visual novel,PG-13 - Teens 13 or older,2620
1475,1624,To Heart 2 Special,6.38,"[Romance, School, Drama]",Special,1,Unknown,[OLM],Unknown,PG-13 - Teens 13 or older,3365
691,756,School Days ONA,5.96,"[Romance, School, Drama]",ONA,1,Unknown,[],Visual novel,R - 17+ (violence & profanity),31294
944,1039,Mizuiro (2003),6.41,"[Romance, Supernatural, Drama]",OVA,2,Unknown,[OLM],Visual novel,PG-13 - Teens 13 or older,3293
11310,32287,Kanamewo,6.69,"[Romance, Supernatural, Drama, Shoujo Ai]",ONA,1,Unknown,[],Original,PG-13 - Teens 13 or older,12413
320,344,Futatsu no Spica,7.37,"[Sci-Fi, Supernatural, Drama, School]",TV,20,Fall 2003,[Group TAC],Manga,PG-13 - Teens 13 or older,8664
7574,17020,D.C.III: Da Capo III Special,6.35,"[Romance, Music, School, Drama]",Special,1,Unknown,[Kazami Gakuen Koushiki Douga-bu],Unknown,PG-13 - Teens 13 or older,4509
5613,9014,Kuttsukiboshi,6.12,"[Romance, Supernatural, Drama, Shoujo Ai]",OVA,2,Unknown,[Primastea],Original,R+ - Mild Nudity,35578
337,361,Memories Off,5.93,"[Romance, Drama]",OVA,3,Unknown,[],Visual novel,PG-13 - Teens 13 or older,5715


In [14]:
all_data = np.concatenate((anime_metadata, genres_vector_tf_idf.todense(), genres_vector_one_hot), axis=1)
all_data.shape

(17562, 3573)

In [15]:
get_recommended(all_data, query_index, 10)

Unnamed: 0,MAL_ID,Name,Score,Genders,Type,Episodes,Premiered,Studios,Source,Rating,Members
513,547,Wind: A Breath of Heart OVA,6.24,"[Romance, Supernatural, School, Drama]",OVA,3,Unknown,[],Visual novel,PG-13 - Teens 13 or older,2620
11310,32287,Kanamewo,6.69,"[Romance, Supernatural, Drama, Shoujo Ai]",ONA,1,Unknown,[],Original,PG-13 - Teens 13 or older,12413
7208,15039,Ano Hi Mita Hana no Namae wo Bokutachi wa Mada Shiranai. Movie,7.95,"[Slice of Life, Supernatural, Drama]",Movie,1,Unknown,[A-1 Pictures],Original,PG-13 - Teens 13 or older,220264
11133,31831,Fantasy,5.01,[],Movie,1,Unknown,[],Original,PG-13 - Teens 13 or older,522
11134,31832,Zawazawa,4.67,[],Movie,1,Unknown,[],Original,PG-13 - Teens 13 or older,578
15158,38826,Tenki no Ko,8.41,"[Slice of Life, Drama, Romance, Fantasy]",Movie,1,Unknown,[CoMix Wave Films],Original,PG-13 - Teens 13 or older,475388
7521,16782,Kotonoha no Niwa,8.05,"[Slice of Life, Psychological, Drama, Romance]",Movie,1,Unknown,[CoMix Wave Films],Original,PG-13 - Teens 13 or older,574997
6135,10259,Da Yu Hai Tang (Movie),7.58,"[Adventure, Supernatural, Drama, Romance]",Movie,1,Unknown,[B&T],Original,PG-13 - Teens 13 or older,30440
11806,33248,K: Seven Stories Movie 1 - R:B - Blaze,7.37,"[Action, Super Power, Supernatural, Drama]",Movie,1,Unknown,[GoHands],Original,PG-13 - Teens 13 or older,54668


In [16]:
%%time

reduced_all_data = PCA(n_components=250).fit_transform(all_data)
get_recommended(reduced_all_data, query_index, 10)

CPU times: user 45.1 s, sys: 1.86 s, total: 46.9 s
Wall time: 22.8 s


Unnamed: 0,MAL_ID,Name,Score,Genders,Type,Episodes,Premiered,Studios,Source,Rating,Members
513,547,Wind: A Breath of Heart OVA,6.24,"[Romance, Supernatural, School, Drama]",OVA,3,Unknown,[],Visual novel,PG-13 - Teens 13 or older,2620
7208,15039,Ano Hi Mita Hana no Namae wo Bokutachi wa Mada Shiranai. Movie,7.95,"[Slice of Life, Supernatural, Drama]",Movie,1,Unknown,[A-1 Pictures],Original,PG-13 - Teens 13 or older,220264
11310,32287,Kanamewo,6.69,"[Romance, Supernatural, Drama, Shoujo Ai]",ONA,1,Unknown,[],Original,PG-13 - Teens 13 or older,12413
8350,20903,Harmonie,7.31,"[Slice of Life, Psychological, Drama, School]",Movie,1,Unknown,[Studio Rikka],Original,PG-13 - Teens 13 or older,47480
6135,10259,Da Yu Hai Tang (Movie),7.58,"[Adventure, Supernatural, Drama, Romance]",Movie,1,Unknown,[B&T],Original,PG-13 - Teens 13 or older,30440
16091,40335,Kimi dake ni Motetainda.,0.0,"[Slice of Life, Drama, School]",Movie,1,Unknown,[Signal.MD],Original,PG-13 - Teens 13 or older,5359
10878,31245,Zutto Mae kara Suki deshita.: Kokuhaku Jikkou Iinkai,7.28,"[Romance, School]",Movie,1,Unknown,[Qualia Animation],Music,PG-13 - Teens 13 or older,142882
11806,33248,K: Seven Stories Movie 1 - R:B - Blaze,7.37,"[Action, Super Power, Supernatural, Drama]",Movie,1,Unknown,[GoHands],Original,PG-13 - Teens 13 or older,54668
9871,28805,Bakemono no Ko,8.33,"[Adventure, Supernatural]",Movie,1,Unknown,[Studio Chizu],Original,PG-13 - Teens 13 or older,306149


In [17]:
Cols_used = ["MAL_ID", "Name", "Genders", "sypnopsis"]
anime_data_Synopsis = pd.read_csv('/Users/adrienbenoist/Desktop/Ironhack/GitHub/data-ft-par-labs/Projects/Anime dataset/anime_with_synopsis.csv', usecols=Cols_used)
anime_data_Synopsis.head()

Unnamed: 0,MAL_ID,Name,Genders,sypnopsis
0,1,Cowboy Bebop,"Action, Adventure, Comedy, Drama, Sci-Fi, Space","In the year 2071, humanity has colonized several of the planets and moons of the solar system leaving the now uninhabitable surface of planet Earth behind. The Inter Solar System Police attempts to keep peace in the galaxy, aided in part by outlaw bounty hunters, referred to as ""Cowboys."" The ragtag team aboard the spaceship Bebop are two such individuals. Mellow and carefree Spike Spiegel is balanced by his boisterous, pragmatic partner Jet Black as the pair makes a living chasing bounties and collecting rewards. Thrown off course by the addition of new members that they meet in their travels—Ein, a genetically engineered, highly intelligent Welsh Corgi; femme fatale Faye Valentine, an enigmatic trickster with memory loss; and the strange computer whiz kid Edward Wong—the crew embarks on thrilling adventures that unravel each member's dark and mysterious past little by little. Well-balanced with high density action and light-hearted comedy, Cowboy Bebop is a space Western classic and an homage to the smooth and improvised music it is named after."
1,5,Cowboy Bebop: Tengoku no Tobira,"Action, Drama, Mystery, Sci-Fi, Space","other day, another bounty—such is the life of the often unlucky crew of the Bebop. However, this routine is interrupted when Faye, who is chasing a fairly worthless target on Mars, witnesses an oil tanker suddenly explode, causing mass hysteria. As casualties mount due to a strange disease spreading through the smoke from the blast, a whopping three hundred million woolong price is placed on the head of the supposed perpetrator. With lives at stake and a solution to their money problems in sight, the Bebop crew springs into action. Spike, Jet, Faye, and Edward, followed closely by Ein, split up to pursue different leads across Alba City. Through their individual investigations, they discover a cover-up scheme involving a pharmaceutical company, revealing a plot that reaches much further than the ragtag team of bounty hunters could have realized."
2,6,Trigun,"Action, Sci-Fi, Adventure, Comedy, Drama, Shounen","Vash the Stampede is the man with a $$60,000,000,000 bounty on his head. The reason: he's a merciless villain who lays waste to all those that oppose him and flattens entire cities for fun, garnering him the title ""The Humanoid Typhoon."" He leaves a trail of death and destruction wherever he goes, and anyone can count themselves dead if they so much as make eye contact—or so the rumors say. In actuality, Vash is a huge softie who claims to have never taken a life and avoids violence at all costs. With his crazy doughnut obsession and buffoonish attitude in tow, Vash traverses the wasteland of the planet Gunsmoke, all the while followed by two insurance agents, Meryl Stryfe and Milly Thompson, who attempt to minimize his impact on the public. But soon, their misadventures evolve into life-or-death situations as a group of legendary assassins are summoned to bring about suffering to the trio. Vash's agonizing past will be unraveled and his morality and principles pushed to the breaking point."
3,7,Witch Hunter Robin,"Action, Mystery, Police, Supernatural, Drama, Magic","ches are individuals with special powers like ESP, telekinesis, mind control, etc. Robin, a 15-year-old craft user, arrives from Italy to Japan to work for an organization named STN Japan Division (STN-J) as a replacement for one of STN-J's witch hunters who was recently killed. Unlike other divisions of STN, STN-J tries to capture the witches alive in order to learn why and how they became witches in the first place. (Source: ANN)"
4,8,Bouken Ou Beet,"Adventure, Fantasy, Shounen, Supernatural","It is the dark century and the people are suffering under the rule of the devil, Vandel, who is able to manipulate monsters. The Vandel Busters are a group of people who hunt these devils, and among them, the Zenon Squad is known to be the strongest busters on the continent. A young boy, Beet, dreams of joining the Zenon Squad. However, one day, as a result of Beet's fault, the Zenon squad was defeated by the devil, Beltose. The five dying busters sacrificed their life power into their five weapons, Saiga. After giving their weapons to Beet, they passed away. Years have passed since then and the young Vandel Buster, Beet, begins his adventure to carry out the Zenon Squad's will to put an end to the dark century."


In [18]:
query_index_2 = anime_data_Synopsis[anime_data_Synopsis.MAL_ID == 32281].index[0]
anime_data_Synopsis.iloc[[query_index_2]]

Unnamed: 0,MAL_ID,Name,Genders,sypnopsis
10213,32281,Kimi no Na wa.,"Romance, Supernatural, School, Drama","suha Miyamizu, a high school girl, yearns to live the life of a boy in the bustling city of Tokyo—a dream that stands in stark contrast to her present life in the countryside. Meanwhile in the city, Taki Tachibana lives a busy life as a high school student while juggling his part-time job and hopes for a future in architecture. One day, Mitsuha awakens in a room that is not her own and suddenly finds herself living the dream life in Tokyo—but in Taki's body! Elsewhere, Taki finds himself living Mitsuha's life in the humble countryside. In pursuit of an answer to this strange phenomenon, they begin to search for one another. Kimi no Na wa. revolves around Mitsuha and Taki's actions, which begin to have a dramatic impact on each other's lives, weaving them into a fabric held together by fate and circumstance."


In [19]:
from sklearn.feature_extraction.text import TfidfVectorizer

tfv = TfidfVectorizer(min_df=3,  max_features=None, 
            strip_accents='unicode', analyzer='word',token_pattern=r'\w{1,}',
            ngram_range=(1, 3),
            stop_words = 'english')

# Filling NaNs with empty string

synopsis_original = anime_data_Synopsis['sypnopsis'].fillna('').astype(str)
synopsis_vector_tf_idf = tfv.fit_transform(synopsis_original)
synopsis_vector_tf_idf.shape

(16214, 38694)

In [20]:
synopsis_vector_tf_idf

<16214x38694 sparse matrix of type '<class 'numpy.float64'>'
	with 613755 stored elements in Compressed Sparse Row format>

In [21]:
def get_recommended_another_df(vector, query_index, n_neighbors=10):
    model_knn = NearestNeighbors(metric='cosine', n_neighbors=n_neighbors)
    model_knn.fit(csr_matrix(vector))

    distances, indices = model_knn.kneighbors(vector[query_index,:].reshape(1, -1), n_neighbors = n_neighbors)
    result = []
    for i in range(0, len(distances.flatten())):
        index = indices.flatten()[i]
        if index == query_index:
            continue
        result.append(anime_data_Synopsis.iloc[index])
        
    return pd.DataFrame(result)

In [23]:
get_recommended_another_df(synopsis_vector_tf_idf, query_index_2, 10)


Unnamed: 0,MAL_ID,Name,Genders,sypnopsis
11021,33902,Suntory Minami Alps no Tennensui,"Drama, School","Collaboration commercials with Suntory and Kimi no Na wa.. The ""Mitsuha no Omoi"" (Mitsuha's Thoughts) commercial shows the character Mitsuha Miyamizu drinking SUNTORY Minami Alps Tennensui Yogurina, which is yogurt-flavored mineral water. The ""Taki no Omoi"" (Taki's Thoughts) commercial shows the character Taki Tachibana drinking SUNTORY Minami Alps Ten'nensui, which is mineral water. The ""Kasanaru Omoi"" (Overlap Thoughts) commercial shows both Taki and Mitsuha combining their thoughts through mixed dialogue with some alternative footage and a new song. (Source: ANN)"
2460,2931,Mokke,"Slice of Life, Supernatural, Mystery","This is a story about two sisters: Shizuru, is a high school student who is able to see ghosts while her younger sister, Mizuki, is haunted by these apparitions. Frustrated by their abilities, their parents decided to entrust the sisters into the care of their grandparents who live in the countryside. As they adapt to life in the countryside, Shizuru and Mizuki begin to learn about the importance of coexisting nature with these apparitions. (Source: ANN)"
6594,16690,My Life,"Slice of Life, Drama","high school girl, while pondering her future, sees it flash before her eyes."
6086,13283,One Off,Slice of Life,"slice of life of high school girls in the early summer. Shiozaki Haruno is a 17-year-old girl who loves her motor cycle, HONDA's Giorno. She lives in remote highlands and longs for a vivid life in a city. Encounters with people staying at Haruno's parents' boardinghouse encourage her to step forward to her dream."
7385,20835,Joshikousei Nobunaga-chan!!,"Comedy, Ecchi, School",The 16th-century shogun Oda Nobunaga wakes up in a high school girl's body.
6826,17741,Kimi no Iru Machi,"Slice of Life, Drama, Romance, Shounen","Haruto Kirishima lived a calm life out in the countryside, away from the fast-paced life of the city. Then Yuzuki Eba appeared in his life out of nowhere, having come from Tokyo to briefly live with her family. Their time together left him enamored with the memories of that short period before she just as abruptly disappeared from his life, and left him full of questions. Kimi no Iru Machi begins some time later, after Haruto moves to Tokyo to live with his sister, in order to pursue a career as a cook. In reality though he wishes to be with Yuzuki. Things don't start good though. When he arrives he is mistaken for a burglar and attacked by his sister's neighbour Mishima Asuka. After the misunderstanding is cleared his feelings begin to waver though. Is Eba, who keeps avoiding him for seemingly no reason, the one for him or is it Asuka?"
9650,30922,Kaze no Matasaburou (Movie),"Supernatural, Fantasy","girl moves from the city and goes to school in the country due to some circumstances in her father's job. Her new schoolmates keep her at a distance. There is a boy she'd like to get to know better, but she worries about how he might treat her and doesn't try to get closer. She begins to hate life in the countryside and longs to return to the city, but she ends up meeting a real god of wind. (Source: MAL News)"
6608,16742,Watashi ga Motenai no wa Dou Kangaetemo Omaera ga Warui!,"Slice of Life, Comedy, School","fter living 50 simulated high school lives and dating over 100 virtual boys, Tomoko Kuroki believes that she is ready to conquer her first year of high school. Little does she know that she is much less prepared than she would like to think. In reality, Tomoko is an introverted and awkward young girl, and she herself is the only one who doesn't realize it! With the help of her best friend, Yuu Naruse, and the support and love of her brother Tomoki, Tomoko attempts to brave the new world of high school life. Watashi ga Motenai no wa Dou Kangaetemo Omaera ga Warui! chronicles the life of a socially awkward and relatively friendless high school otaku as she attempts to overcome her personal barriers in order to live a fulfilling life."
4846,8917,Mouretsu Pirates,"Sci-Fi, Space","The story centers around a spirited high school girl named Marika. She keeps herself busy with the space yacht club and her part-time job at a high-class retro café. One day, two strangers suddenly appear and claim to be subordinates of her dead father. They demand that she assume command of the space pirate ship Bentenmaru. A privateer ship's compact was made during a war of independence a century ago, and according to that compact, the ship must be inherited by the captain's next direct descendant. Marika finds herself embarking on a new life as a space pirate."


In [24]:
%%time

reduced_all_data = PCA(n_components=250).fit_transform(synopsis_vector_tf_idf.todense())
get_recommended_another_df(reduced_all_data, query_index_2, 10)

CPU times: user 7min 25s, sys: 1min 31s, total: 8min 56s
Wall time: 7min 7s


Unnamed: 0,MAL_ID,Name,Genders,sypnopsis
6291,14741,Chuunibyou demo Koi ga Shitai!,"Slice of Life, Comedy, Drama, Romance, School","Everybody has had that stage in their life where they have thought themselves to be special, different from the masses of ordinary humans. They might go as far as seeing themselves capable of wielding mystical powers, or maybe even believe themselves to have descended from a fantasy realm. This ""disease"" is known as ""chuunibyou"" and is often the source of some of the most embarrassing moments of a person's life. For Yuuta Togashi, the scars that his chuunibyou has left behind are still fresh. Having posed as the ""Dark Flame Master"" during his middle school years, he looks back at those times with extreme embarrassment, so much so that he decides to attend a high school far away where nobody will recognize him. Putting his dark history behind him, he longs to live a normal high school life. Unfortunately, he hasn't escaped his past yet: enter Rikka Takanashi, Yuuta's new classmate and self-declared vessel of the ""Wicked Eye."" As this eccentric young girl crashes into Yuuta's life, his dream of an ordinary, chuunibyou-free life quickly crumbles away. In this hilarious and heartwarming story of a boy who just wants to leave his embarrassing memories behind, the delusions of old are far from a thing of the past."
355,390,Suzuka,"Sports, Drama, Romance, Shounen","Yamato Akitsuki travels to Tokyo alone to study in one of the high schools located within the area. He lives in with his aunt who operates a public bath solely for the ladies in the local district and begins his normal high-school life. One day, he chances upon a girl in school and is immediately mesmerized by her beauty. He is shocked when he realizes later that the girl, Suzuka, is actually living next door to him. From then on, Yamato's ordinary life begins to change little by little. (Source: ANN)"
6086,13283,One Off,Slice of Life,"slice of life of high school girls in the early summer. Shiozaki Haruno is a 17-year-old girl who loves her motor cycle, HONDA's Giorno. She lives in remote highlands and longs for a vivid life in a city. Encounters with people staying at Haruno's parents' boardinghouse encourage her to step forward to her dream."
9314,30127,Danchigai,"Comedy, Slice of Life","Haruki Nakano lives an average high school life, except for the fact that his mother is overseas and he shares an apartment with his four sisters. There's the oldest, Mutsuki, who has a bad habit of falling asleep in his bed, the junior high school student Yayoi, who hits him whenever something happens, and then the grade school twins, Uzuki and Satsuki, who love to play pranks on their older brother. Stuck in a house with four girls, Haruki has to deal with all sorts of trials. From going grocery shopping, watching scary movies, to kissing practice, life is never boring in Danchigai !"
6826,17741,Kimi no Iru Machi,"Slice of Life, Drama, Romance, Shounen","Haruto Kirishima lived a calm life out in the countryside, away from the fast-paced life of the city. Then Yuzuki Eba appeared in his life out of nowhere, having come from Tokyo to briefly live with her family. Their time together left him enamored with the memories of that short period before she just as abruptly disappeared from his life, and left him full of questions. Kimi no Iru Machi begins some time later, after Haruto moves to Tokyo to live with his sister, in order to pursue a career as a cook. In reality though he wishes to be with Yuzuki. Things don't start good though. When he arrives he is mistaken for a burglar and attacked by his sister's neighbour Mishima Asuka. After the misunderstanding is cleared his feelings begin to waver though. Is Eba, who keeps avoiding him for seemingly no reason, the one for him or is it Asuka?"
8115,24211,Orenchi no Furo Jijou,"Comedy, Fantasy, Josei","On his way home from school, Tatsumi sees a man collapsed near a lake. When he approaches him, Tatsumi notices something strange: the person in need of help is actually a beautiful merman named Wakasa! Because Wakasa's home has become too polluted to live in, Tatsumi graciously offers his bathtub as a refuge. With a boisterous merman as his new roommate, Tatsumi's normal life won't be returning anytime soon, not to mention Wakasa's aquatic friends—Takasu, Mikuni, and Maki—often show up uninvited, making them all quite a handful for the high school student. As he humors their curiosity for human life, Tatsumi sometimes finds himself enjoying their childish antics, but he will have to keep his cool if he intends to keep up with his daily life and newfound friendship."
10082,31914,Shoujo-tachi wa Kouya wo Mezasu,School,"Shoujo-tachi wa Kouya wo Mezasu is a series all about finding oneself and a direction in life... no matter how far off the beaten path it might be. Buntarou Hojo is a high school student who has a talent for writing, but no real direction in life or any plans for the future. His classmate, Sayuki Kuroda notices his talent, decides to help him find a way to use it properly by enlisting him in her bishoujo game development group. When Buntarou is cornered in the men's bathroom at school by Sayuki, he is surprised when he is asked out on what he thinks is a date, and even further surprised when he finds out that it's not a date, but a job interview. Reluctantly agreeing, the two start recruiting other members for their team but will they learn more about game creation, or life itself along the way?"
11200,34280,Gamers!,"Game, Comedy, Romance, School","Keita Amano is a typical high school gamer living out an average student's life. One day, however, he has an unexpected meeting with the cutest girl in school that makes him want to disappear without a trace! This girl, Karen Tendou, is an exemplary student who is proclaimed to be the school's idol. She discovers that Amano is a gamer, and this newfound knowledge incites a passionate desire within her to recruit him into the game club. Upon visiting the club, Amano is forcefully made aware of a side to gaming wildly different than the one he loves so dearly. Tendou's interest in Amano begins shaking up what was once an uneventful life, filling it with spontaneity, awkwardness, and a little bit of mayhem. As a result, every day becomes a comical battle for Amano's sanity as he tries to adapt to these wild, unexpected changes."
6608,16742,Watashi ga Motenai no wa Dou Kangaetemo Omaera ga Warui!,"Slice of Life, Comedy, School","fter living 50 simulated high school lives and dating over 100 virtual boys, Tomoko Kuroki believes that she is ready to conquer her first year of high school. Little does she know that she is much less prepared than she would like to think. In reality, Tomoko is an introverted and awkward young girl, and she herself is the only one who doesn't realize it! With the help of her best friend, Yuu Naruse, and the support and love of her brother Tomoki, Tomoko attempts to brave the new world of high school life. Watashi ga Motenai no wa Dou Kangaetemo Omaera ga Warui! chronicles the life of a socially awkward and relatively friendless high school otaku as she attempts to overcome her personal barriers in order to live a fulfilling life."


In [None]:
rating_data=pd.read_csv('/Users/adrienbenoist/Desktop/Ironhack/GitHub/data-ft-par-labs/Projects/Anime dataset/rating_complete.csv')

print ("rating_data.shape:", rating_data.shape)
print (rating_data.info())
rating_data.head()

In [None]:
users_count = rating_data.groupby("user_id").size().reset_index()
users_count.columns = ["user_id", "anime_count"]

print(users_count.shape)

filtered_users = users_count[users_count.anime_count >= 300]
users = set(filtered_users.user_id)

print(len(users))

In [None]:
rating_data = rating_data[rating_data.user_id.isin(users)]
print ("rating_data.shape:", rating_data.shape)
print (rating_data.info())

In [None]:
unique_users = {int(x): i for i,x in enumerate(rating_data.user_id.unique())}
unique_items = {int(x): i for i,x in enumerate(anime_data.MAL_ID.unique())}

print(len(unique_items), len(unique_users))
anime_collabolative_filter = np.zeros((len(unique_items), len(unique_users)))

for user_id, anime_id, rating in rating_data.values:
    anime_collabolative_filter[unique_items[anime_id], unique_users[user_id]] = rating

In [None]:
get_recommended(anime_collabolative_filter, query_index, 10)

In [None]:
fig = plt.figure(figsize = (8,8))
ax = fig.add_subplot(1,1,1) 
ax.set_xlabel('anime_metadata', fontsize = 15)
ax.set_ylabel('Principal Component 2', fontsize = 15)
ax.set_title('2 Component PCA', fontsize = 20)


unique_users = ['user_id', 'anime_id', 'rating']
unique_items = ['MAL_id', 'Genders', 'Score']
for target, color in zip(rating,Genders):
    indicesToKeep = rating_data['rating'] == target
    ax.scatter(finalDf.loc[indicesToKeep, 'principal component 1']
               , rating_data.loc[indicesToKeep, 'principal component 2']
               , c = color
               , s = 50)
ax.legend(targets)
ax.grid()

In [None]:
X = rating_data[['user_id', 'anime_id']].values
y = rating_data["rating"]
test_set_size = 100000 #10k for test set
train_indices = rating_data.shape[0] - test_set_size 
X_train, X_test, y_train, y_test = (
    X[:train_indices],
    X[train_indices:],
    y[:train_indices],
    y[train_indices:],
)

In [None]:
from sklearn.metrics import mean_absolute_error
score = mean_absolute_error(X_train, X_test)