### Import Libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics.pairwise import cosine_similarity
from scipy import sparse
import scipy.stats as stats
import math
from sklearn.model_selection import train_test_split



### Functions for Basic Model

In [2]:
def make_basic_recommendations(basic_ranking, username, data):
    # Load the basic_ranking file and return the list without the manga dropped by user
    if username in data['member'].unique():
        index_read = data[data['member'] == username]['index']
        
        # Drop the title read
        basic_ranking = basic_ranking.drop(index=index_read)
        
        # Rank the titles
        basic_ranking['ranking'] = np.arange(basic_ranking.shape[0])
        
        #Return result
        return basic_ranking 
    else:
        # Rank the titles
        basic_ranking['ranking'] = np.arange(basic_ranking.shape[0])
        
        # Return result
        return basic_ranking 

### Functions for Basic Similarity Metrics

In [3]:
def calculate_basic_similarity_matrix(data, savepath):
    # get genres, themes and demographic col
    genres_themes_demographic_col = [col for col in data.columns if ('themes' in col) or ('genres' in col) or ('demographic' in col)]

    # Get DataFrame With genres_themes_col and manga title index
    bsm_raw_data = data[genres_themes_demographic_col + ['manga_length', 'clusters', 'index']].groupby('index').sum()

    # Calculate the similarity matrix using themes
    bsm = pd.DataFrame(cosine_similarity(bsm_raw_data), index = bsm_raw_data.index, columns=bsm_raw_data.index)
    
    # Save bsm to csv
    bsm.to_csv(savepath)
    
    # Return bsm
    return bsm

def bsm_recommendation(manga_read_list, basic_similarity_matrix):
    
    # Assign manga_read_list to index_read
    index_read = manga_read_list
    
    # Get up a dictionary for the results
    results = {}
    
    # Get the score for each read title
    for title in index_read:
        results[title] = basic_similarity_matrix[title]

    # Change into dataframe
    results_df = pd.DataFrame.from_dict(results)

    # Sum the scores by titles
    results_df['bsm_score'] = results_df.sum(axis=1)

    # Sort by the bsm_score
    results_df.sort_values('bsm_score', ascending=False, inplace=True)
    
    # Drop titles read
    #results_df = results_df.drop(index=index_read)
    
    # Rank the titles
    results_df['ranking'] = np.arange(results_df.shape[0])
    
    # Return results
    return results_df[['bsm_score', 'ranking']]

### Functions for Titles-Based Collaborative Filtering

In [4]:
def centering_to_mean(data):
    return (data.T - data.mean(axis=1)).T

###########################################################################################################################

def calculate_titles_based_similarity_matrix(manga, savepath):
    # Centre each user scores
    manga_mc = centering_to_mean(manga.T)

    # Fill missing values as 0 as user has not read the title yet
    sparse_manga_mc = sparse.csr_matrix(manga_mc.fillna(0))

    # Calculate the cosine similarity matrix between the titles and input into a dataframe
    manga_similarity = cosine_similarity(sparse_manga_mc)
    manga_similarity = pd.DataFrame(manga_similarity, index=manga_mc.index, columns=manga_mc.index)
    manga_similarity.to_csv(savepath)
    
    # Return the manga_similarity
    return manga_similarity
    
###########################################################################################################################

def cal_user_rating(similarity_matrix, title_index, user_score_all_movies, threshold):
    # Get the similarity matrix coefficients for the title with other movies
    title_similarity_coefficients = similarity_matrix[title_index].drop(title_index)
    #print(title_similarity_coefficients.shape)
    
    # Extract only the values above the threshold
    title_similarity_coefficients = title_similarity_coefficients[title_similarity_coefficients > threshold]
    #print(title_similarity_coefficients.shape)
    
    # Calculate the titles weights
    title_weights = title_similarity_coefficients/title_similarity_coefficients.sum()
    #print(title_weights.shape)
    
    # Fill all NA with 0
    user_score_all_movies = user_score_all_movies.loc[title_similarity_coefficients.index].fillna(0)
    #print(user_score_all_movies.shape)
    
    # Return the calculated score for this movie and user
    return np.dot(title_weights, user_score_all_movies)

###########################################################################################################################

def calculate_titles_based_cf_scores(manga, manga_similarity, usernames = None, threshold=0):
    # If manga_similarity is None, load from savefile
#     if not manga_similarity:
#         # Get manga similarity matrix
#         manga_similarity = pd.read_csv("../data_production/title_cf_sm.csv")
#         # Set columns "index" as index
#         manga_similarity.set_index('index', inplace=True)
#         # Change the columns of titles from string to integer
#         manga_similarity.rename(columns = {i: int(i) for i in manga_similarity.columns}, inplace=True)
    
    
    # Create empty dictionary for storing usernames and scores for each title
    recommender = {}
    
    # Set the manga_T and members key depending on the type of input from usernames
    if usernames == None:
        # Set dataset for calculating scores with similarity matrix
        manga_T = manga.T
        # Store users_name in the recommender dictionaary
        recommender['members'] = manga.index
        print(recommender)

    elif (type(usernames) == str) or type(usernames) == list:
        # Set dataset for calculating scores with similarity matrix
        manga_T = manga.T[usernames]
        # Store user_name in the recommender dictionaary
        recommender['members'] = usernames

    else:
        print("'usernames' is not of type 'str', 'list' or 'NoneType'")
        return(None)

    # Start the prediction calculations
    count = 1
    for title in manga.columns:
        recommender[title] = cal_user_rating(manga_similarity, title, manga_T, threshold)
        print(f"Completion:\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", end='\r')
        print(f"Completion: {round(count/len(manga.columns)*100, 5)}%", end='\r')
        count+=1

    # Store predicted ratings in dataframe
    recommender_df = pd.DataFrame.from_dict(recommender, orient='index').T.set_index('members')
    
    #  Save to csv in row format
    for user in recommender_df.index:
        print(user)
        recommender_df.loc[[user]].to_csv(f'../data_titles_based_cf_user_scores/{user}.csv')

###########################################################################################################################

def cf_recommend_manga_to_user(username):
    # Read the predicted scores for username, drop username
    recommender_df = pd.read_csv(f'../data_titles_based_cf_user_scores/{username}.csv', )
    recommender_df = recommender_df.drop(columns='members')
    
    # Change the columns of titles from string to integer
    recommender_df.rename(columns = {i: int(i) for i in recommender_df.columns}, inplace=True)
  
    # Transpose and Sort by the scores
    recommender_df = recommender_df.T.sort_values(0, ascending=False)
    recommender_df.rename(columns={0:'title_cf_score'}, inplace=True)
    
    # Select Only titles with positive scores
    recommender_df = recommender_df[recommender_df['title_cf_score']>0]
    
    # generate Ranking
    recommender_df['ranking'] = np.arange(recommender_df.shape[0])
    
    # Return results
    return recommender_df

### Other Functions

In [5]:
def get_recommendations(data, bsm):
    no_of_recommended_titles = 10
    valid_response = False
    cross_overpoint = 6
    no_titles_read = -1
    ismember=False
    member_list = data['member'].unique()
    while not valid_response:
        print("Are you a new user? Please answer 'yes' or 'no'")
        new_user = str.strip(input("User Response: "))

        if (new_user.lower() == 'no') or (new_user.lower() == 'yes'):
            valid_response = True
        if new_user.lower() =='no':
            username = str.strip(input("Enter Your Username: "))
            if username in member_list:
                # Set ismember to True
                ismember = True
                # Check how many titles the user read.
                no_titles_read = data[(data['member']==username) & (data['read_status']!='Dropped')]['index'].nunique()
                read_titles = data[data['member']==username]['index'].values
            else:
                print("Invalid user. Try Again.")


    # If user is new or if an existing user read less than 1 titles (not dropped)
    if new_user.lower()=='yes' or (no_titles_read < 1 and no_titles_read >= 0):
        recommended_titles = basic_ranking[0: no_of_recommended_titles]

    # If member is existing user and read less than cross over point
    elif no_titles_read < cross_overpoint and ismember:
        recommended_titles = bsm_recommendation(read_titles, bsm)

    elif no_titles_read >= cross_overpoint and ismember:
        recommended_titles = cf_recommend_manga_to_user(username)  

    else:
        recommended_titles = None

    if recommended_titles is not None:
        # Drop titles that are read and recommend the remaining top k titles
        recommended_titles = recommended_titles[~recommended_titles.index.isin(read_titles)][0: no_of_recommended_titles]     

        # Convert indexes to tiles and link
        recommended_titles = recommended_titles.merge(manga_titles, left_index=True, right_index=True)
        
        # Return recommendations
        return recommended_titles.loc[: , ['manga_title', 'manga_link']].set_index('manga_title')
    
# Create template for user upload
def create_user_template(data):
    data[['index', 'manga_title']].drop_duplicates().to_csv("../data_production/user_input_template.csv", index=False)
    

def upload_file_and_get_recommendation(bsm):
    # Get the file and name the member as temp_user
    user_input = input("Input the file path: ")
    user_data = pd.read_csv(user_input)
    user_data['member'] = 'temp_user'
    
    # Set the number of top recommendation to return
    no_of_recommended_titles = 10
    
    # Set cross_overpoint
    cross_overpoint = 6

    # Convert the user data into a pivot format
    user_data_pivot = user_data.pivot(index='member', columns='index', values='Score')

    # Get the number of titles read
    no_titles_read = user_data.dropna().shape[0]

    # Set username as temp_user
    username='temp_user'

    # Get read titles
    read_titles = user_data.dropna()['index'].values

    # If user is new or if an existing user read less than 1 titles (not dropped)
    if no_titles_read < 1 and no_titles_read >= 0:
        recommended_titles = basic_ranking[0: no_of_recommended_titles]

    # If member is existing user and read less than cross over point
    elif no_titles_read < cross_overpoint:
        recommended_titles = bsm_recommendation(read_titles, bsm)

    elif no_titles_read >= cross_overpoint:
        recommended_titles = cf_recommend_manga_to_user(username)  

    # Drop titles that are read and recommend the remaining top k titles
    recommended_titles = recommended_titles[~recommended_titles.index.isin(read_titles)][0: no_of_recommended_titles]     

    # Convert indexes to tiles and link
    recommended_titles = recommended_titles.merge(manga_titles, left_index=True, right_index=True)

    # Return recommendations
    return recommended_titles.loc[: , ['manga_title', 'manga_link']].set_index('manga_title')

### Load all necessary files

In [6]:
# Load the data_merged file
data = pd.read_csv('../data_cleaned/data_merged.csv', keep_default_na=False, na_values=['-', '', ' '])
data['manga_length'] = data['manga_length'].astype(str)
data['clusters'] = data['clusters'].astype(str)

# Load the manga_titles
manga_titles = pd.read_csv('../data_cleaned/manga_titles_cleaned.csv')
manga_titles.set_index('index', inplace=True)

# Read basic ranking file
basic_ranking = pd.read_csv('../data_production/basic_ranking.csv', index_col='index')

# Read basic Similarity Matrix
bsm = pd.read_csv("../data_production/title_bsm.csv", index_col='index')
bsm.rename(columns = {i: int(i) for i in bsm.columns}, inplace=True)

# Read Collaborative Filter Similarity Matrix
cfsm = pd.read_csv("../data_production/title_cf_sm.csv", index_col='index')
cfsm.rename(columns = {i: int(i) for i in cfsm.columns}, inplace=True)

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


In [9]:
# Get recommendations
get_recommendations(data, bsm)

Are you a new user? Please answer 'yes' or 'no'
User Response: no
Enter Your Username: Zat_Zwielicht


Unnamed: 0_level_0,manga_link
manga_title,Unnamed: 1_level_1
Star Wars: Jedi no Kikan,https://myanimelist.net/manga/1116/Star_Wars__...
Shitsugyou Kenja no Nariagari: Kirawareta Sainou wa Sekai Saikyou deshita,https://myanimelist.net/manga/129173/Shitsugyo...
Helck,https://myanimelist.net/manga/77637/Helck
Class ga Isekai Shoukan sareta Naka Ore dake Nokotta n desu ga,https://myanimelist.net/manga/111380/Class_ga_...
Violence Action,https://myanimelist.net/manga/105182/Violence_...
Shinigami ni Sodaterareta Shoujo wa Shikkoku no Ken wo Mune ni Idaku,https://myanimelist.net/manga/121165/Shinigami...
Suicide Parabellum,https://myanimelist.net/manga/65813/Suicide_Pa...
Kujonin,https://myanimelist.net/manga/109299/Kujonin
Angel Beats! The 4-koma: Bokura no Sensen Koushinkyoku♪,https://myanimelist.net/manga/19672/Angel_Beat...
Zelda no Densetsu: Fushigi no Kinomi - Jikuu no Shou,https://myanimelist.net/manga/1422/Zelda_no_De...


In [81]:
# Create template for user to edit and upload
create_user_template(data)

In [7]:
# Give recommendations based on input file
upload_file_and_get_recommendation(bsm)

Input the file path: C:\Users\Tan Kar Gim\Desktop\Capstone\kg.csv


Unnamed: 0_level_0,manga_link
manga_title,Unnamed: 1_level_1
Monster,https://myanimelist.net/manga/1/Monster
Vinland Saga,https://myanimelist.net/manga/642/Vinland_Saga
Oyasumi Punpun,https://myanimelist.net/manga/4632/Oyasumi_Punpun
Grand Blue,https://myanimelist.net/manga/70345/Grand_Blue
Soul Eater,https://myanimelist.net/manga/908/Soul_Eater
Chainsaw Man,https://myanimelist.net/manga/116778/Chainsaw_Man
Shingeki no Kyojin,https://myanimelist.net/manga/23390/Shingeki_n...
Tokyo Ghoul,https://myanimelist.net/manga/33327/Tokyo_Ghoul
Hunter x Hunter,https://myanimelist.net/manga/26/Hunter_x_Hunter
Death Note,https://myanimelist.net/manga/21/Death_Note
