In [1]:
# Import Pandas
import pandas as pd

#This loads the anime Metadata
metadata = pd.read_csv('anime.csv', low_memory=False)

#This prints the first three rows of the dataset
metadata.head(3)

Unnamed: 0,MAL_ID,Name,Score,Genres,English name,Japanese name,Type,Episodes,Aired,Premiered,...,Score-10,Score-9,Score-8,Score-7,Score-6,Score-5,Score-4,Score-3,Score-2,Score-1
0,1,Cowboy Bebop,8.78,"Action, Adventure, Comedy, Drama, Sci-Fi, Space",Cowboy Bebop,カウボーイビバップ,TV,26,"Apr 3, 1998 to Apr 24, 1999",Spring 1998,...,229170,182126,131625,62330,20688,8904,3184,1357,741,1580
1,5,Cowboy Bebop: Tengoku no Tobira,8.39,"Action, Drama, Mystery, Sci-Fi, Space",Cowboy Bebop:The Movie,カウボーイビバップ 天国の扉,Movie,1,"Sep 1, 2001",4,...,30043,49201,49505,22632,5805,1877,577,221,109,379
2,6,Trigun,8.24,"Action, Sci-Fi, Adventure, Comedy, Drama, Shounen",Trigun,トライガン,TV,26,"Apr 1, 1998 to Sep 30, 1998",Spring 1998,...,50229,75651,86142,49432,15376,5838,1965,664,316,533


In [2]:
# This calculates the mean of the ranked column
C = metadata['Ranked'].mean()
print(C)

7103.482860722013


In [3]:
# This calculates the score column of the 90th percentile, m
m = metadata['Score'].quantile(0.90)
print(m)

7.45


In [4]:
#This filters out all scores that are greater than or equal to 7.45 into a new DataFrame
q_anime = metadata.copy().loc[metadata['Score'] >= m]
q_anime.shape

(1764, 35)

In [5]:
metadata.shape

(17562, 35)

In [6]:
#This is a function that computes the weighted rating of each anime
def weighted_rating(x, m=m, C=C):
    v = x['Ranked']
    R = x['Score']
    
    return (v/(v+m) * R) + (m/(m+v) * C)

In [7]:
#This defines a new feature known as 'favourites' and calculate its value with a `weighted_rating()`
q_anime['Favourites'] = q_anime.apply(weighted_rating, axis=1)

In [8]:
#This sorts anime based on the score that was calculated above
q_anime = q_anime.sort_values('Score', ascending=False)

#This prints the top 15 animes name, score, rank and favourites
q_anime[['Name', 'Score', 'Ranked', 'Favorites']].head(15)

Unnamed: 0,Name,Score,Ranked,Favorites
3971,Fullmetal Alchemist: Brotherhood,9.19,1,183914
15926,Shingeki no Kyojin: The Final Season,9.17,2,44862
5683,Steins;Gate,9.11,3,148452
14963,Shingeki no Kyojin Season 3 Part 2,9.1,4,40985
9913,Gintama°,9.1,5,11868
6474,Hunter x Hunter (2011),9.1,6,147274
6006,Gintama',9.08,7,6567
741,Ginga Eiyuu Densetsu,9.07,8,13834
7261,Gintama': Enchousen,9.04,9,2586
12898,3-gatsu no Lion 2nd Season,9.0,10,11133


In [9]:
#This prints the genres of the first 5 animes.
metadata['Genres'].head()

0      Action, Adventure, Comedy, Drama, Sci-Fi, Space
1                Action, Drama, Mystery, Sci-Fi, Space
2    Action, Sci-Fi, Adventure, Comedy, Drama, Shounen
3    Action, Mystery, Police, Supernatural, Drama, ...
4            Adventure, Fantasy, Shounen, Supernatural
Name: Genres, dtype: object

In [10]:
#This imports the TfIdfVectorizer from scikit-learn
from sklearn.feature_extraction.text import TfidfVectorizer

#This defines a TF-IDF Vectorizer Object and removes all english stop words such as 'the', 'a'
tfidf = TfidfVectorizer(stop_words='english')

#This replaces NaN with an empty string
metadata['Name'] = metadata['Name'].fillna('')

#This constructs the required TF-IDF matrix by fitting and transforming the data
tfidf_matrix = tfidf.fit_transform(metadata['Name'])

#This outputs the shape of tfidf_matrix
tfidf_matrix.shape

(17562, 15509)

In [11]:
#This is an array mapping each feature integer indice to its feature name.
tfidf.get_feature_names()[5000:5010]

['higan',
 'higanjima',
 'higashi',
 'hige',
 'higeki',
 'higenashi',
 'higepiyo',
 'high',
 'highlander',
 'highlights']

In [12]:
#This imports the linear_kernel
from sklearn.metrics.pairwise import linear_kernel

#This computes the cosine similarity matrix
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

In [13]:
cosine_sim.shape

(17562, 17562)

In [14]:
cosine_sim[1]

array([0.71381058, 1.        , 0.        , ..., 0.        , 0.        ,
       0.        ])

In [15]:
#This makes a reverse map of indices and anime names
indices = pd.Series(metadata.index, index=metadata['Name']).drop_duplicates()

In [16]:
indices[:10]

Name
Cowboy Bebop                       0
Cowboy Bebop: Tengoku no Tobira    1
Trigun                             2
Witch Hunter Robin                 3
Bouken Ou Beet                     4
Eyeshield 21                       5
Hachimitsu to Clover               6
Hungry Heart: Wild Striker         7
Initial D Fourth Stage             8
Monster                            9
dtype: int64

In [17]:
#This is a function that takes in anime names as input and outputs most similar anime
def get_recommendations(Name, cosine_sim=cosine_sim):
    #This gets the index of the anime that matches the name
    idx = indices[Name]

    #This gets the pairwsie similarity scores of all animes with that anime
    sim_scores = list(enumerate(cosine_sim[idx]))

    #This sorts the anime based on the similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    #This gets the scores of the 10 most similar anime
    sim_scores = sim_scores[1:11]

    #This gets the anime indices
    anime_indices = [i[0] for i in sim_scores]

    #This returns the top 10 most similar anime
    return metadata['Name'].iloc[anime_indices]

In [18]:
get_recommendations('Pokemon')

9887                 Pokemon: Pikachu to Pokemon Ongakutai
1385                   Pokemon: Senritsu no Mirage Pokemon
4170                  Pokemon: Odoru Pokemon Himitsu Kichi
8054                                            Pokemon XY
11019                                         Pokemon XY&Z
16105                                       Pokemon (2019)
12464                                  Pokemon Generations
8241                                   Pokemon: The Origin
2014     Pokemon Movie 09: Pokemon Ranger to Umi no Ouj...
1016     Pokemon Movie 02: Maboroshi no Pokemon Lugia B...
Name: Name, dtype: object

In [19]:
#This loads the anime summaries
summary = pd.read_csv('animeSummary.csv')

#This converts IDs to int. and is needed for merging
summary['MAL_ID'] = summary['MAL_ID'].astype('int')
metadata['MAL_ID'] = metadata['MAL_ID'].astype('int')


#This merges the summaries into my main metadata dataframe
metadata = metadata.merge(summary, on='MAL_ID')

In [20]:
#This prints the first two anime of my newly merged metadata
metadata.head(2)

Unnamed: 0,MAL_ID,Name_x,Score_x,Genres_x,English name,Japanese name,Type,Episodes,Aired,Premiered,...,Score-6,Score-5,Score-4,Score-3,Score-2,Score-1,Name_y,Score_y,Genres_y,sypnopsis
0,1,Cowboy Bebop,8.78,"Action, Adventure, Comedy, Drama, Sci-Fi, Space",Cowboy Bebop,カウボーイビバップ,TV,26,"Apr 3, 1998 to Apr 24, 1999",Spring 1998,...,20688,8904,3184,1357,741,1580,Cowboy Bebop,8.78,"Action, Adventure, Comedy, Drama, Sci-Fi, Space","In the year 2071, humanity has colonized sever..."
1,5,Cowboy Bebop: Tengoku no Tobira,8.39,"Action, Drama, Mystery, Sci-Fi, Space",Cowboy Bebop:The Movie,カウボーイビバップ 天国の扉,Movie,1,"Sep 1, 2001",4,...,5805,1877,577,221,109,379,Cowboy Bebop: Tengoku no Tobira,8.39,"Action, Drama, Mystery, Sci-Fi, Space","other day, another bounty—such is the life of ..."


 # 4th Year A.I. Report
   ## Recommender Systems
   ### Mark Hurley – N00182059


### Introduction

‘Every aspect of learning or any other feature of intelligence can in principle be so precisely described that a machine can be made to simulate it. An attempt will be made to find how to make machines use language, form abstractions, and concepts, solve kinds of problems now reserved for humans, and improve themselves.’ – John McCarthy 1950 Inventor of the term Artificial Intelligence.
First off I’ll with the definition of AI (Artificial Intelligence) this is Intelligence that comes from machines and machinery. So far scientists and researchers in this field are trying to replicate human speech and intelligence with this and ultimately try and give a machine the ability to think for itself. 
A brief history of recommender systems (RS’s): The first Recommender System concept was used in 1979, in a system called Grundy, which was basically a computer librarian that suggested a book for the users to read. Then during the early 1990’s the first commercial Recommender System known as Tapestry. Then during the early 1990’s a recommender system known as GroupLens which helped people find their preferred articles at the University of Minnesota. Moving into the late 1990’s Amazon implemented the Amazon Collaborative Filtering. Since then Collaborative Filtering Recommender Systems became very popular. Then in 2006 one of the most popular recommender system competitions ran by Netflix gave away 1 million dollars to who ever could make the best recommendation system.




### Applications of AI

The field of AI can be broken down into mainly 5 different fields: Computer Science, Engineering, Philosophy, Medicine and Language. The section I am focusing on for this project is Language. 
Computer Science: Covers the development of algorithms and approach’s that solve problems that couldn’t be interacted with.
Engineering: Covers the building of Robotics that can recognise, adapt and react to their environment and change they act accordingly.
Philosophy: Covers the debate of what it means to be intelligent and if humans can actually make something  with true intelligence.
Medicine: Covers the use of AI applications that are used by Doctors and researchers to help diagnosing a multitude of patients.
Language: Covers the work of linguists who want to use AI to help machines recognize human speech and speech patterns and so that they can respond to requests that are spoken.


#### Strengths/Limitations
One of the strengths of AI is the reduction of human error and will to more results based purely on data and statistics. It could also help prevent the loss of human lives by instead replacing dangerous jobs with machines that could fill that purpose instead eliminating the risk factor, it would also help get the more mundane and repetitive jobs done more effectively since there would be no humans to get tired of that particular job and only put in half the amount of work. Machines are also available 24/7 and would not need to stop working as often as humans(apart from maintenance time) since humans can only work around 6 – 8 hours a day on average. AI can also make decisions faster than humans which means its more efficient at tasks.

Some of the limitations with AI includes the cost of actually creating and maintaining these AI. Another problem is that it would eventually leave plenty of people with no jobs since in the long run it would be more cost effective to have machines and AI to do the work of people. Also AI doesn’t have any feelings or empathy/sympathy which means it could be cruel and unforgiving all for the sake of efficiency.


## Recommender Systems
### •	Content-Based Approach
Content-Based recommender systems tend to use extra information about users and items. This filtering technique makes use of the items elements to recommend different things like what the user enjoys and depending on their past activities or criticism. Using the example of a movie recommender system, we can use extra information like the users age, sex, job etc. and the additional information used for movies such as the actors, writers and directors. The central thinking behind content-based systems is to attempt to make a model that is based on the available features that can explain the users interactions with various items.
### •	Collaborative Approach
Collaborative recommender systems are a method of filtering recommendations to the user based on their past interests, searches and watches that have been recorded. This method also tends to find what other users with similar tastes would like to have recommended to them and in order to achieve they group the users with these similar tastes into the same group and recommend each user based on what their groups are like. The central thinking that oversees the collaborative methods is that through the past interactions that the users have with items when refined through this system it becomes enough to find similar users or items to make predictions depending on these facts and understandings.



## Anime Recommender
### •	Description
The recommender system I decided to develop was a content based recommender system that would recommend various Anime to people based on what titles people are searching, it’ll recommend similar Anime based on title or genre. I decided to pick an Anime recommender system as I believe it’s important for people to be able to find new Anime to watch in an easy and efficient way.
### •	Implementation
When it came to creating my recommender system I followed the guide from DataCamp, while replacing that sample data set of movies with a new data set of various Anime including movies and tv shows. I also modified this new recommender system to recommend anime that have a similar name and genre. 

I started off by loading my Anime dataset using Pandas Dataframe library, this library is used for manipulation of data and analysis. Next I had it print out the first three titles to make sure it loaded my dataset.

Once panda was imported and my dataset was working and loaded correctly I then moved on to calculate the mean of all of the Anime ranks and then I calculated the scores of each anime in the 90th percentile.

Now that I have the mean figured out I then filter out the scores that are greater than or equal to 7.45. The .copy method is used to make sure that the new q_anime Dataframe that’s made is completely separate from the original anime dataset which means that changes made to it won’t affect the original dataset.

Next I used a formula to compute the weighted rating of each anime. Then I defined a new feature which I called favourites which will be calculated by applying that function to my DataSet.

Following that I then sorted out my dataset in a descending order based on the score of the Anime, so it would display the highest ranked anime and then go down through all of the top 15 highest scored anime of all time. Then I print out the first 5 gneres of the first 5 anime in the dataset and using this information I will pair other anime with a similar overall score and related genre in order to recommend a new anime to the user. 

I do this by computing the Term Frequency-Inverse Document Frequency TF-IDF for every document. I start by importing the TF-IDF module using scikit-learn, then I remove stop words and replace non number values with a blank string. Lastly I construct the TF-IDF matrix on the data.

Next I use the cosine similarity to calculate a quantity of numbers that finds similarities between two anime. Then I’ll use sklearn’s linear_kernel instead od the cosine similarities because it’s faster.

Then I will define a function that will take the name of an anime as an input and have it output 10 similar anime. Next I will get the index of the anime given it’s title, then I’ll get the list of cosine similarity scores for that individual anime then convert that into a lost of tuples where the first component is its position and the second is it’s similarity score. Next I’ll get the top 10 components of the list and return the appropriate names to the indices of the top components.  

After getting that all up and working I then work on loading and merging in a new dataset that features additional information about each anime such as the summaries and displaying the first 2 summaries to ensure all is working correctly.


## References
Kumar, S. (2019, November 25. )Advantages and Disadvantages of Artificial Intelligence. Towards Data Science. https://towardsdatascience.com/advantages-and-disadvantages-of-artificial-intelligence-182a5ef6588c

Dey, V. (2021, August 25). Collaborative Filtering Vs Content-Based Filtering for Recommender Systems. Analytics India Mag. https://analyticsindiamag.com/collaborative-filtering-vs-content-based-filtering-for-recommender-systems/

Qomariyah, N. N. (2020, November 3). Definition and History of recommender systems. Binus University International. . https://towardsdatascience.com/advantages-and-disadvantages-of-artificial-intelligence-182a5ef6588c

Sharma, A. (2020, May 29). Beginner Tutorial: Recommender Systems in Python. Datacamp. https://www.datacamp.com/community/tutorials/recommender-systems-python
