In [141]:
import numpy as np
import pandas as pd
import math
import string
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import word_tokenize
from nltk import pos_tag
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import MinMaxScaler

In [142]:
def preprocess(anime_list): #TODO : Gérer les cas limites 
    anime_list = anime_list.copy()

    ## Dropping columns
    columns_to_keep = ['anime_id', 'Name', 'Genres', 'Synopsis', 'Episodes', 'Aired', 'Studios', 'Duration', 'Rating', 'Type', 'Source']
    anime_list = anime_list[columns_to_keep]

    ## Dealing with Genres : use one-hot encoding 
    all_genres = set()
    for genres in anime_list['Genres']:
        all_genres.update(genres.split(', '))
    for genre in all_genres:
        anime_list["Genre " +genre] = anime_list['Genres'].apply(lambda x: 1 if genre in x.split(', ') else 0)
    anime_list.drop(columns=['Genres'], inplace=True)

    ## Dealing with Episodes and Duration : calculate total length
    anime_list['Episodes'] = pd.to_numeric(anime_list['Episodes'], errors='coerce').fillna(0) #0 if UNKNOWN episodes
    hours = anime_list['Duration'].str.extract(r'(\d+) hr', expand=False).astype(float)
    minutes = anime_list['Duration'].str.extract(r'(\d+) min', expand=False).astype(float)
    hours.fillna(0, inplace=True)
    minutes.fillna(0, inplace=True)
    anime_list['Duration'] = hours * 60 + minutes #0 if UNKNOWN duration
    anime_list['Total_Duration'] = anime_list['Duration'] * anime_list['Episodes']

    ## Dealing with Aired => get starting date
    anime_list['Start_Date'] = pd.to_datetime(anime_list['Aired'].str.split(' to ').str[0], errors='coerce')
    anime_list.drop(columns=['Aired'], inplace=True)

    ## Dealing with Studios => use one-hot encoding
    '''
    all_studios = set()
    for studio in anime_list['Studios']:
        all_studios.update(studio.split(', '))
    for studio in all_studios:
        anime_list["Studio " + studio] = anime_list['Studios'].apply(lambda x: 1 if studio in x.split(', ') else 0)
    anime_list.drop(columns=['Studios'], inplace=True)
    '''

    ## Dealing with Rating => use one-hot encoding
    all_ratings = set()
    for rating in anime_list['Rating']:
        all_ratings.update(rating.split(', '))
    for rating in all_ratings:
        anime_list["Rating " + rating] = anime_list['Rating'].apply(lambda x: 1 if rating in x.split(', ') else 0)
    anime_list.drop(columns=['Rating'], inplace=True)

    ## Dealing with Type => use one-hot encoding
    all_types = set()
    for type in anime_list['Type']:
        all_types.update(type.split(', '))
    for type in all_types:
        anime_list["Type " + type] = anime_list['Type'].apply(lambda x: 1 if type in x.split(', ') else 0)
    anime_list.drop(columns=['Type'], inplace=True)

    ## Dealing with Source => use one-hot encoding
    all_sources = set()
    for source in anime_list['Source']:
        all_sources.update(source.split(', '))
    for source in all_sources:
        anime_list["Source " + source] = anime_list['Source'].apply(lambda x: 1 if source in x.split(', ') else 0)
    anime_list.drop(columns=['Source'], inplace=True)

    ## Dealing with synopsis
    anime_list['Synopsis'] = anime_list['Synopsis'].apply(lambda x: x.translate(str.maketrans('', '', string.punctuation)).lower())
    stop_words = set(stopwords.words('english'))
    anime_list['Synopsis'] = anime_list['Synopsis'].apply(lambda x  : ' '.join([word for word in x.split() if word not in stop_words]))
    lemmatizer = WordNetLemmatizer()
    anime_list['Synopsis'] = anime_list['Synopsis'].apply(lambda x: ' '.join([lemmatizer.lemmatize(word) for word in x.split()]))
    #anime_list['Synopsis'] = anime_list['Synopsis'].apply(lambda x: ' '.join([word for word, pos in pos_tag(word_tokenize(x)) if pos.startswith(('JJ', 'NN', 'VB', 'RB'))])) # add a step to filter names

    return anime_list

In [143]:
def show(anime_list):
    nbl, nbc = anime_list.shape
    print("\nNombre de lignes :", nbl)
    print("\nNombre de colonnes :", nbc)
    print("\nInfos\n")
    print(anime_list.info())
    print("\nDescribe\n")
    print(anime_list.describe())
    print("\nHead\n")
    print(anime_list.head(40))

In [144]:
#fav_anime_list = [21, 16498, 31964, 38000, 136]
fav_anime_list = [21]
anime_list = pd.read_parquet('anime/anime.parquet')
anime_list = preprocess(anime_list)
#show(anime_list)

In [145]:
def adjust_dispersion(df, factor=0.01):
    ## Update df, which have values between 0 and 1, to adjust dispersion relatively to 0.5 to a fixed factor, while keeping the values between 0 and 1

    # Calculate the current mean absolute deviation from 0.5
    current_mad = np.abs(df['similarity'] - 0.5).mean()
    
    # Scale the values to achieve the desired dispersion relative to 0.5
    scaled_values = df['similarity'] + (0.5 - df['similarity']) * (factor / current_mad)
    
    # Ensure values are between 0 and 1
    scaled_values = np.clip(scaled_values, 0, 1)
    
    df['similarity'] = scaled_values
    
    return df

In [146]:
def recommend_anime(similarities_tab):
    sorted_df = similarities_tab.sort_values(by='similarity', ascending=False)
    top_anime_ids = sorted_df.head(30)['anime_id'].tolist()
    recommended_animes = []
    for anime_id in top_anime_ids:
        anime_name = anime_list.loc[anime_list['anime_id'] == anime_id, 'Name'].iloc[0]
        recommended_animes.append({'anime_id': anime_id, 'Name': anime_name})
    return pd.DataFrame(recommended_animes)

In [147]:
def recommend_anime_global(similarities_tab):
    sorted_df = similarities_tab.sort_values(by='total_similarity', ascending=False)
    top_anime_ids = sorted_df.head(30)['anime_id'].tolist()
    recommended_animes = []
    for anime_id in top_anime_ids:
        anime_name = anime_list.loc[anime_list['anime_id'] == anime_id, 'Name'].iloc[0]
        recommended_animes.append({'anime_id': anime_id, 'Name': anime_name})
    return pd.DataFrame(recommended_animes)

In [148]:
### Synopsis


def extract_keywords(anime_ids, anime_list):
    # récupérer tous les synopsis des animes favoris
    fav_anime_synopsis = anime_list.loc[anime_list['anime_id'].isin(anime_ids), 'Synopsis'].tolist()
    # concaténer l'ensemble de ces synopsis
    fav_anime_synopsis = ' '.join(fav_anime_synopsis)
    # récupérer les mots clés
    fav_anime_keywords = fav_anime_synopsis.split()
    fav_anime_keywords = [word.translate(str.maketrans('', '', string.punctuation)).lower() for word in fav_anime_keywords]
    stop_words = set(stopwords.words('english'))
    fav_anime_keywords = [word for word in fav_anime_keywords if word not in stop_words]
    lemmatizer = WordNetLemmatizer()
    fav_anime_keywords = [lemmatizer.lemmatize(word) for word in fav_anime_keywords]
    return ' '.join(fav_anime_keywords)


def recommendation_synopsis_based(fav_anime_list, anime_list):
    # Extraction de mots-clés des synopsis des animes favoris
    fav_anime_keywords = extract_keywords(fav_anime_list, anime_list)

    anime_ids = anime_list.loc[~anime_list['anime_id'].isin(fav_anime_list), 'anime_id'].values

    # Calcul de la similarité cosinus entre les mots-clés générés des animes favoris et les synopsis de tous les autres animes
    tfidf_vectorizer = TfidfVectorizer()
    tfidf_matrix_other_anime = tfidf_vectorizer.fit_transform(anime_list.loc[~anime_list['anime_id'].isin(fav_anime_list), 'Synopsis'])
    tfidf_matrix_fav_anime = tfidf_vectorizer.transform([fav_anime_keywords]) 
    cosine_similarities = cosine_similarity(tfidf_matrix_other_anime, tfidf_matrix_fav_anime)

    return pd.DataFrame({'anime_id': anime_ids, 'similarity': cosine_similarities.flatten()})


synopsis_cosine_similarities_tab = recommendation_synopsis_based(fav_anime_list, anime_list)
print(synopsis_cosine_similarities_tab)

synopsis_cosine_similarities_tab = adjust_dispersion(synopsis_cosine_similarities_tab)

print(synopsis_cosine_similarities_tab)
recommended_animes = recommend_anime(synopsis_cosine_similarities_tab)
print(recommended_animes[['anime_id', 'Name']])

       anime_id  similarity
0             1    0.013469
1             5    0.026405
2             6    0.027187
3             7    0.005047
4             8    0.031917
...         ...         ...
24897     55731    0.000000
24898     55732    0.000000
24899     55733    0.000000
24900     55734    0.000000
24901     55735    0.000000

[24902 rows x 2 columns]
       anime_id  similarity
0             1    0.023371
1             5    0.036043
2             6    0.036809
3             7    0.015120
4             8    0.041444
...         ...         ...
24897     55731    0.010176
24898     55732    0.010176
24899     55733    0.010176
24900     55734    0.010176
24901     55735    0.010176

[24902 rows x 2 columns]
    anime_id                                               Name
0      12859                                  One Piece Film: Z
1      36215  One Piece: Episode of East Blue - Luffy to 4-n...
2      38234                       One Piece Movie 14: Stampede
3       5252        

In [149]:
### Genre
##Limit case : check that every anime in fav_anime_list has not Genre_UNKNOWN, if not delete this anime from the list to build fav_genres_prop

def recommendation_genre_based(fav_anime_list, anime_list):
    anime_ids = anime_list.loc[~anime_list['anime_id'].isin(fav_anime_list), 'anime_id'].values

    if not fav_anime_list:
        return pd.DataFrame({'anime_id': anime_ids, 'similarity': 0})
    
    similarities = []

    fav_genres = anime_list.loc[anime_list['anime_id'].isin(fav_anime_list), anime_list.filter(regex='^Genre').columns].sum()
    fav_genres_prop = fav_genres / fav_genres.sum()

    other_anime_genres = anime_list.loc[~anime_list['anime_id'].isin(fav_anime_list), anime_list.filter(regex='^Genre').columns]
    for _, row in other_anime_genres.iterrows():
        genre_similarity = sum(row[genre] * fav_genres_prop[genre] for genre in fav_genres_prop.index)
        similarities.append(genre_similarity)
       
    return pd.DataFrame({'anime_id': anime_ids, 'similarity': similarities})

fav_anime_list = [anime_id for anime_id in fav_anime_list if anime_list.loc[anime_list['anime_id'] == anime_id, 'Genre UNKNOWN'].values[0] == 0]

genre_cosine_similarities_tab = recommendation_genre_based(fav_anime_list, anime_list)
genre_cosine_similarities_tab = adjust_dispersion(genre_cosine_similarities_tab)

print(genre_cosine_similarities_tab)

recommended_animes = recommend_anime(genre_cosine_similarities_tab)
print(recommended_animes[['anime_id', 'Name']])

       anime_id  similarity
0             1    0.337689
1             5    0.337689
2             6    0.662311
3             7    0.337689
4             8    0.662311
...         ...         ...
24897     55731    0.337689
24898     55732    0.986932
24899     55733    0.986932
24900     55734    0.013068
24901     55735    0.013068

[24902 rows x 2 columns]
    anime_id                                               Name
0      27825                         Long Zhi Gu: Poxiao Qibing
1      38198          Nanatsu no Taizai: Eiyuu-tachi wa Hashagu
2      31821                    Arslan Senki (TV): Fuujin Ranbu
3      19951          Hunter x Hunter Movie 2: The Last Mission
4      51162  One Piece: Otoshidama Special - Tokubetsu Hou ...
5      40734                             Yao Shen Ji 4th Season
6      52368                                      AOTU Shijie 4
7       6633               Queen's Blade: Gyokuza wo Tsugu Mono
8      37254         Last Period: Owarinaki Rasen no Monogatar

In [150]:
### Rating
##Limit case : check that every anime in fav_anime_list has not Rating UNKNOWN, if not delete this anime from the list to build fav_ratings_prop

def recommendation_rating_based(fav_anime_list, anime_list):
    anime_ids = anime_list.loc[~anime_list['anime_id'].isin(fav_anime_list), 'anime_id'].values

    if not fav_anime_list:
        return pd.DataFrame({'anime_id': anime_ids, 'similarity': 0})
    
    similarities = []

    fav_ratings = anime_list.loc[anime_list['anime_id'].isin(fav_anime_list), anime_list.filter(regex='^Rating').columns].sum()
    fav_ratings_prop = fav_ratings / fav_ratings.sum()

    print(fav_ratings_prop)

    other_anime_ratings = anime_list.loc[~anime_list['anime_id'].isin(fav_anime_list), anime_list.filter(regex='^Rating').columns]
    for _, row in other_anime_ratings.iterrows():
        rating_similarity = sum(row[rate] * fav_ratings_prop[rate] for rate in fav_ratings_prop.index)
        similarities.append(rating_similarity)
       
    return pd.DataFrame({'anime_id': anime_ids, 'similarity': similarities})

fav_anime_list = [anime_id for anime_id in fav_anime_list if anime_list.loc[anime_list['anime_id'] == anime_id, 'Rating UNKNOWN'].values[0] == 0]

rating_cosine_similarities_tab = recommendation_rating_based(fav_anime_list, anime_list)
rating_cosine_similarities_tab = adjust_dispersion(rating_cosine_similarities_tab)

print(rating_cosine_similarities_tab)

recommended_animes = recommend_anime(rating_cosine_similarities_tab)
print(recommended_animes[['anime_id', 'Name']])

Rating G - All Ages                      0.0
Rating PG - Children                     0.0
Rating PG-13 - Teens 13 or older         1.0
Rating R - 17+ (violence & profanity)    0.0
Rating R+ - Mild Nudity                  0.0
Rating Rx - Hentai                       0.0
Rating UNKNOWN                           0.0
dtype: float64
       anime_id  similarity
0             1        0.01
1             5        0.01
2             6        0.99
3             7        0.99
4             8        0.01
...         ...         ...
24897     55731        0.99
24898     55732        0.99
24899     55733        0.99
24900     55734        0.99
24901     55735        0.99

[24902 rows x 2 columns]
    anime_id                                               Name
0      55735                                     Shijuuku Nichi
1      19053                           Kaitou Jigoma Ongaku-hen
2      47375                                Mei Shaonu Da Picha
3      18967          Zukkoke Sannin-gumi: Zukkoke J

In [151]:
### Type 
##Limit case : check that every anime in fav_anime_list has not "Type UNKNOWN", if not delete this anime from the list to build fav_types_prop

def recommendation_type_based(fav_anime_list, anime_list):
    anime_ids = anime_list.loc[~anime_list['anime_id'].isin(fav_anime_list), 'anime_id'].values

    if not fav_anime_list:
        return pd.DataFrame({'anime_id': anime_ids, 'similarity': 0})
    
    similarities = []

    fav_types = anime_list.loc[anime_list['anime_id'].isin(fav_anime_list), anime_list.filter(regex='^Type').columns].sum()
    fav_types_prop = fav_types / fav_types.sum()

    other_anime_types = anime_list.loc[~anime_list['anime_id'].isin(fav_anime_list), anime_list.filter(regex='^Type').columns]
    for _, row in other_anime_types.iterrows():
        type_similarity = sum(row[type] * fav_types_prop[type] for type in fav_types_prop.index)
        similarities.append(type_similarity)
       
    return pd.DataFrame({'anime_id': anime_ids, 'similarity': similarities})

fav_anime_list = [anime_id for anime_id in fav_anime_list if anime_list.loc[anime_list['anime_id'] == anime_id, 'Type UNKNOWN'].values[0] == 0]

print(fav_anime_list)

type_cosine_similarities_tab = recommendation_type_based(fav_anime_list, anime_list)
type_cosine_similarities_tab = adjust_dispersion(type_cosine_similarities_tab)

print(type_cosine_similarities_tab)

recommended_animes = recommend_anime(type_cosine_similarities_tab)
print(recommended_animes[['anime_id', 'Name']])

[21]
       anime_id  similarity
0             1        0.99
1             5        0.01
2             6        0.99
3             7        0.99
4             8        0.99
...         ...         ...
24897     55731        0.01
24898     55732        0.01
24899     55733        0.01
24900     55734        0.01
24901     55735        0.01

[24902 rows x 2 columns]
    anime_id                                               Name
0          1                                       Cowboy Bebop
1      46599                                  Xiaohu Da Guanjia
2      46575                    Kuaile Baobei: Cheng Chang Riji
3      46574                        Kuaile Baobei: Huanle Jiaqi
4      46573                     Kuaile Baobei: Duocai Shenghuo
5      19221  Ore no Nounai Sentakushi ga, Gakuen Love Comed...
6      46572                       Kuaile Baobei: Xingqu Mofang
7      46571                       Kuaile Baobei: Huoli Jiating
8      46570                       Kuaile Baobei: Bai Bia

In [152]:
### Source 
##Limit case : check that every anime in fav_anime_list has not "Source Unknown", if not delete this anime from the list to build fav_sources_prop

def recommendation_source_based(fav_anime_list, anime_list):
    anime_ids = anime_list.loc[~anime_list['anime_id'].isin(fav_anime_list), 'anime_id'].values

    if not fav_anime_list:
        return pd.DataFrame({'anime_id': anime_ids, 'similarity': 0})
    
    similarities = []

    fav_sources = anime_list.loc[anime_list['anime_id'].isin(fav_anime_list), anime_list.filter(regex='^Source').columns].sum()
    fav_sources_prop = fav_sources / fav_sources.sum()

    other_anime_sources = anime_list.loc[~anime_list['anime_id'].isin(fav_anime_list), anime_list.filter(regex='^Source').columns]
    for _, row in other_anime_sources.iterrows():
        source_similarity = sum(row[source] * fav_sources_prop[source] for source in fav_sources_prop.index)
        similarities.append(source_similarity)
       
    return pd.DataFrame({'anime_id': anime_ids, 'similarity': similarities})

fav_anime_list = [anime_id for anime_id in fav_anime_list if anime_list.loc[anime_list['anime_id'] == anime_id, 'Source Unknown'].values[0] == 0]
print(fav_anime_list)

source_cosine_similarities_tab = recommendation_source_based(fav_anime_list, anime_list)
source_cosine_similarities_tab = adjust_dispersion(source_cosine_similarities_tab)

print(source_cosine_similarities_tab)

recommended_animes = recommend_anime(source_cosine_similarities_tab)
print(recommended_animes[['anime_id', 'Name']])

[21]
       anime_id  similarity
0             1        0.01
1             5        0.01
2             6        0.99
3             7        0.01
4             8        0.99
...         ...         ...
24897     55731        0.01
24898     55732        0.01
24899     55733        0.01
24900     55734        0.01
24901     55735        0.01

[24902 rows x 2 columns]
    anime_id                                               Name
0      14227                             Tonari no Kaibutsu-kun
1      40801  Aisei Tenshi Love Mary: Akusei Jutai - The Ani...
2      33069  Dimension W: Short Track/Robot wa Sentou no Yu...
3      33071                    Bungou Stray Dogs: Hitori Ayumu
4      50695                                           MF Ghost
5      50696               One Piece: Barto no Himitsu no Heya!
6       6203                                      Sasameki Koto
7      33074                          Lupin III (2015) Specials
8       6198  Detective Conan OVA 08: High School Girl De

In [160]:
### Duration
## Limit case : check that every anime in fav_anime_list has not UNKNOWN for episodes or duration, if not delete this anime from the list to build avg_fav_duration

def recommendation_duration_based(fav_anime_list, anime_list):

    anime_ids = anime_list.loc[~anime_list['anime_id'].isin(fav_anime_list), 'anime_id'].values

    if not fav_anime_list:
        return pd.DataFrame({'anime_id': anime_ids, 'similarity': 0})
    
    
    similarities = []

    avg_fav_duration = anime_list.loc[anime_list['anime_id'].isin(fav_anime_list), 'Total_Duration'].mean()

    other_anime_durations = anime_list.loc[~anime_list['anime_id'].isin(fav_anime_list), 'Total_Duration']

    for duration in other_anime_durations:
        if duration != 0:
            relative_difference = abs(duration - avg_fav_duration) / max(duration, avg_fav_duration)
            duration_similarity = 1 - relative_difference
        else:
            duration_similarity = 0.5 #similarity equals 0.5 if duration equals 0 (meaning UNKNOW number of episodes or UNKNOW duration)
        similarities.append(duration_similarity)

    return pd.DataFrame({'anime_id': anime_ids, 'similarity': similarities})

#Filtering anime with missing information
fav_anime_list = [anime_id for anime_id in fav_anime_list if anime_list.loc[anime_list['anime_id'] == anime_id, 'Episodes'].values[0] != 0]
fav_anime_list = [anime_id for anime_id in fav_anime_list if anime_list.loc[anime_list['anime_id'] == anime_id, 'Duration'].values[0] != 0]

duration_cosine_similarities_tab = recommendation_duration_based(fav_anime_list, anime_list)
duration_cosine_similarities_tab = adjust_dispersion(duration_cosine_similarities_tab)
print(duration_cosine_similarities_tab)

recommended_animes = recommend_anime(duration_cosine_similarities_tab)
print(recommended_animes[['anime_id', 'Name']])

       anime_id  similarity
0             1    0.839975
1             5    0.166639
2             6    0.839975
3             7    0.874369
4             8    0.610412
...         ...         ...
24894     55731    0.500000
24895     55732    0.500000
24896     55733    0.500000
24897     55734    0.018479
24898     55735    0.018479

[24899 rows x 2 columns]
    anime_id                                               Name
0      48904                                 Digital Tokoro-san
1      46521                                     Mimi Zhao Mama
2       1137                                          Mushrambo
3      14333                                     Tanken Driland
4      45741                          Dou Dou Hu Shuxue Wangguo
5      46156                                  Tian Jiang Xiaozi
6      45738                                         Dou Dou Hu
7      45740                        Dou Dou Hu Hanzi Daxue Tang
8      45746                       Dou Dou Hu Zheng Shi Tianxi

In [154]:
def preprocess_fav_anime_list(fav_anime_list, anime_list, feature):
    if feature == 'genre':
        filtered_fav_anime_list = [anime_id for anime_id in fav_anime_list if anime_list.loc[anime_list['anime_id'] == anime_id, 'Genre UNKNOWN'].values[0] == 0]
    elif feature == 'duration':
        filtered_fav_anime_list = [anime_id for anime_id in fav_anime_list if anime_list.loc[anime_list['anime_id'] == anime_id, 'Episodes'].values[0] != 0 and anime_list.loc[anime_list['anime_id'] == anime_id, 'Duration'].values[0] != 0]
    elif feature == 'type':
        filtered_fav_anime_list = [anime_id for anime_id in fav_anime_list if anime_list.loc[anime_list['anime_id'] == anime_id, 'Type UNKNOWN'].values[0] == 0]
    elif feature == 'source':
        filtered_fav_anime_list = [anime_id for anime_id in fav_anime_list if anime_list.loc[anime_list['anime_id'] == anime_id, 'Source Unknown'].values[0] == 0]
    elif feature == 'rating':
        filtered_fav_anime_list = [anime_id for anime_id in fav_anime_list if anime_list.loc[anime_list['anime_id'] == anime_id, 'Rating UNKNOWN'].values[0] == 0]
    else:
        filtered_fav_anime_list = fav_anime_list
    return filtered_fav_anime_list

In [164]:
## Final combination

fav_anime_list = [21, 16498, 31964, 38000, 136]
#fav_anime_list = [21]

anime_list = pd.read_parquet('anime/anime.parquet')
anime_list = preprocess(anime_list)

genre_cosine_similarities_tab = recommendation_genre_based(preprocess_fav_anime_list(fav_anime_list, anime_list, 'genre'), anime_list)
genre_cosine_similarities_tab = adjust_dispersion(genre_cosine_similarities_tab)

duration_cosine_similarities_tab = recommendation_duration_based(preprocess_fav_anime_list(fav_anime_list, anime_list, 'duration'), anime_list)
duration_cosine_similarities_tab = adjust_dispersion(duration_cosine_similarities_tab)

synopsis_cosine_similarities_tab = recommendation_synopsis_based(preprocess_fav_anime_list(fav_anime_list, anime_list, 'synopsis'), anime_list)
synopsis_cosine_similarities_tab = adjust_dispersion(synopsis_cosine_similarities_tab)

rating_cosine_similarities_tab = recommendation_source_based(preprocess_fav_anime_list(fav_anime_list, anime_list, 'rating'), anime_list)
rating_cosine_similarities_tab = adjust_dispersion(rating_cosine_similarities_tab)

type_cosine_similarities_tab = recommendation_type_based(preprocess_fav_anime_list(fav_anime_list, anime_list, 'type'), anime_list)
type_cosine_similarities_tab = adjust_dispersion(type_cosine_similarities_tab)

source_cosine_similarities_tab = recommendation_source_based(preprocess_fav_anime_list(fav_anime_list, anime_list, 'source'), anime_list)
source_cosine_similarities_tab = adjust_dispersion(source_cosine_similarities_tab)

'''
print(genre_cosine_similarities_tab)
print(duration_cosine_similarities_tab)
print(synopsis_cosine_similarities_tab)
print(type_cosine_similarities_tab)
print(source_cosine_similarities_tab)
print(rating_cosine_similarities_tab)
'''


combined_tab = pd.merge(genre_cosine_similarities_tab, duration_cosine_similarities_tab, on='anime_id', suffixes=('_genre', '_duration'))
combined_tab = pd.merge(combined_tab, synopsis_cosine_similarities_tab, on='anime_id', suffixes=('_', '_synopsis'))
combined_tab = pd.merge(combined_tab, type_cosine_similarities_tab, on='anime_id', suffixes=('', '_type'))
combined_tab = pd.merge(combined_tab, source_cosine_similarities_tab, on='anime_id', suffixes=('', '_source'))
combined_tab = pd.merge(combined_tab, rating_cosine_similarities_tab, on='anime_id', suffixes=('', '_rating'))

#print(combined_tab)


# Calculate total similarity
combined_tab['total_similarity'] = (
    0.05 * combined_tab['similarity_genre'] +
    0.01 * combined_tab['similarity_duration'] +
    0.90 * combined_tab['similarity'] + #synopsis
    0.02 * combined_tab['similarity_type'] +
    0.01 * combined_tab['similarity_source'] + 
    0.01 * combined_tab['similarity_rating']
)

recommended_animes = recommend_anime_global(combined_tab)
print(recommended_animes[['anime_id', 'Name']])


    anime_id                                               Name
0      25777                        Shingeki no Kyojin Season 2
1      51019           Kimetsu no Yaiba: Katanakaji no Sato-hen
2      11061                             Hunter x Hunter (2011)
3      36215  One Piece: Episode of East Blue - Luffy to 4-n...
4      36456                   Boku no Hero Academia 3rd Season
5      35760                        Shingeki no Kyojin Season 3
6      12859                                  One Piece Film: Z
7      38524                 Shingeki no Kyojin Season 3 Part 2
8      36702  Shingeki no Kyojin Season 2 Movie: Kakusei no ...
9      33486                   Boku no Hero Academia 2nd Season
10     38234                       One Piece Movie 14: Stampede
11      1237  One Piece: Oounabara ni Hirake! Dekkai Dekkai ...
12     40028               Shingeki no Kyojin: The Final Season
13     31374                        Shingeki! Kyojin Chuugakkou
14     40906              Dragon Quest: 