In [None]:
import pandas as pd
import numpy as np
import nltk
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer
import spacy
en_core = spacy.load('en_core_web_lg')


df1 = pd.read_csv('anime_df.csv',index_col=0) # Original Dataframe #

In [None]:
#!python -m spacy download en_core_web_lg
#!pip install spacy

In [None]:
df1.head()

In [None]:
def process_rating(s):
    return s.split('Rating: ')[1]

def process_genres(s):
    
    genres = s.split('Genres: ')[1]
    return genres



In [None]:
df2 = df1 # Cleaned up the genres and ratings columns #

df2['Rating'] = df2['Rating'].apply(process_rating)

df2.head()

In [None]:
df3 = df2[df2['Genres'].str.contains('Genres:')] # Clean Genres with all correct scrapes #

df3['Genres']= df3['Genres'].apply(process_genres)

In [None]:
df3.groupby('Title').count() # 66 total anime titles #

In [None]:
df3['Rating'] = df3['Rating'].astype(str).astype(float)

In [None]:
average_ratings = df3.groupby('Title')[['Rating']].mean()

In [None]:
average_ratings = average_ratings.rename(columns = {'Rating':'Average_Rating'})

In [None]:
df_merge = pd.merge(df3,average_ratings, on='Title')

In [None]:
df_merge[:3]

In [None]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

analyzer = SentimentIntensityAnalyzer()
sentiment_score_list = []
for review in df_merge['Review']:
    
    sentiment_score_list.append(analyzer.polarity_scores(str(review))['compound'])

In [None]:
df_merge['Sentiment_Score'] = np.array(sentiment_score_list)
average_sentiment = df_merge.groupby('Title')[['Sentiment_Score']].mean()

In [None]:
anime_df_merge = pd.merge(df_merge,average_sentiment, on='Title')

In [None]:
anime_df_merge = anime_df_merge.rename(columns = {'Sentiment_Score_x':'Sentiment_Score','Sentiment_Score_y':'Average_Sentiment'})

In [None]:
anime_df_merge[:3]

In [None]:
title_list = anime_df_merge['Title'].unique()
title_list

In [None]:
def get_category():
    
    print('You can either choose to enter your top 3 Genres or 1 Anime Title.')
    print('1. Genres')
    print('2. Title')
    cat_resp = input()
    
    return cat_resp

def get_suggestions(user_category):
    
    if user_category == 'Genres':
        
        print('Here is a list of genres you can pick from:')
        print()
        print('Action, Adventure, Cars, Comedy, Demons, Drama, Fantasy, Game, Historical, Horror, Magic, Martial Arts, Mecha, Music, Mystery, Romance, School, Sci-Fi, Shounen, Slice of Life, Space, Sports, Super Power, Supernatural')
        suggest_resp = input('Enter your top 3 genres:')
    else:
        
        suggest_resp = input('Please enter your show of choice!')
        
        if suggest_resp not in title_list:
            
            print('Sorry we do not currently have that title in our database =(')
            return get_suggestions(user_category)
        

    return suggest_resp
    


In [None]:
get_suggestions('Title')

In [None]:
anime_df_merge['Genres'][anime_df_merge['Title']!='Kimetsu no Yaiba'][:1]

In [None]:
def recommendation_system():
    
    print('Let us find you a new anime to watch!')
    user_category = get_category()
    user_suggestions = get_suggestions(user_category)
    
    if user_category == 'Genres':
        print('Genres was chosen!')
        similarity_score_list = []
        preference = en_core(str(user_suggestions))
        
        for genres in anime_df_merge['Genres']:
            
            each_genres = en_core(str(genres))
            similarity_score_list.append(preference.similarity(each_genres))
            
        anime_df_merge['similarity_score'] = np.array(similarity_score_list)    
        df_by_genres = anime_df_merge.sort_values(by=['similarity_score','Average_Sentiment'], ascending=False)
        top_3_anime = df_by_genres['Title'].unique()[:3]
        print('Here are your top 3 Anime Recommendations based on the genres you chose!')
        
        for name in top_3_anime:
            print (name)
        
    
    else:
        print('Title was chosen!')
        similarity_score_list = []
        user_preference = anime_df_merge['Genres'][anime_df_merge['Title']==user_suggestions][:1]
        preference = en_core(str(user_preference))
        
        for genres in anime_df_merge['Genres']:
            
            each_genres = en_core(str(genres))
            similarity_score_list.append(preference.similarity(each_genres))
        
        anime_df_merge['similarity_score'] = np.array(similarity_score_list)
        df_by_genres = anime_df_merge.sort_values(by=['similarity_score','Average_Sentiment'], ascending=False)
        df_by_genres = df_by_genres[df_by_genres['Title']!=user_suggestions]
        top_3_anime = df_by_genres['Title'].unique()[:3]
        print('Here are your top 3 Anime Recommendations based on the anime you chose!')
        
        for name in top_3_anime:
            print(name)
            
        
        
        
        
        


In [None]:
recommendation_system()