In [48]:
import pandas as pd
import re
import warnings

warnings.simplefilter(action='ignore', category=FutureWarning)

def text_cleaning(text):
    text = re.sub(r'&quot;', '', text)
    text = re.sub(r'.hack//', '', text)
    text = re.sub(r'&#039;', '', text)
    text = re.sub(r'A&#039;s', '', text)
    text = re.sub(r'I&#039;', 'I\'', text)
    text = re.sub(r'&amp;', 'and', text)
    
    return text

r_cols = ['user_id', 'anime_id', 'rating']
df_ratings = pd.read_csv('data/rating.csv', usecols=r_cols, encoding = 'utf-8')

#eliminar los rating -1 que significaria que no han valorado
df_ratings = df_ratings[df_ratings['rating'] != -1]

c_cols = ['anime_id', 'name','type']
df_anime = pd.read_csv('data/anime.csv', usecols=c_cols,encoding = 'utf-8')
df_anime = df_anime[df_anime['type'] != 'music']
df_anime['name'] = df_anime['name'].apply(text_cleaning)
anime_dict = dict(zip(df_anime['anime_id'], df_anime['name']))

# Crear un diccionario: {anime_id: name}
df_ratings['name'] = df_ratings['anime_id'].map(anime_dict)
df_ratings.tail()


Unnamed: 0,user_id,anime_id,rating,name
1833494,17770,18661,9,Kamisama Hajimemashita OVA
1833496,17770,21507,5,Soul Eater NOT!
1833499,17770,27891,8,Sword Art Online II: Debriefing
1833515,73517,11061,10,Hunter x Hunter (2011)
1833516,73517,2476,1,School Days


In [41]:
user_rating = df_ratings.pivot_table(index =['user_id'],columns = ['name'], values='rating')
user_rating.head()


name,0,001,009 Re:Cyborg,009-1,009-1: RandB,00:08,07-Ghost,1+2=Paradise,100%,100-man-nen Chikyuu no Tabi: Bander Book,...,lilac (bombs Jun Togawa),makemagic,s.CRY.ed,vivi,xxxHOLiC,xxxHOLiC Kei,xxxHOLiC Movie: Manatsu no Yoru no Yume,xxxHOLiC Rou,xxxHOLiC Shunmuki,◯
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,,,,,,,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
5,,,,,,,,,,,...,,,,,2.0,,,,,
7,,,,,,,,,,,...,,,,,,,,,,


In [None]:
#correlacion de recomendacion entre todos los animes 
corrAnime = user_rating.corr(method ='pearson', min_periods = 100)
corrAnime.head()


name,0,001,009 Re:Cyborg,009-1,009-1: RandB,00:08,07-Ghost,1+2=Paradise,100%,100-man-nen Chikyuu no Tabi: Bander Book,...,lilac (bombs Jun Togawa),makemagic,s.CRY.ed,vivi,xxxHOLiC,xxxHOLiC Kei,xxxHOLiC Movie: Manatsu no Yoru no Yume,xxxHOLiC Rou,xxxHOLiC Shunmuki,◯
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,,,,,,,,,,,...,,,,,,,,,,
001,,,,,,,,,,,...,,,,,,,,,,
009 Re:Cyborg,,,,,,,,,,,...,,,,,,,,,,
009-1,,,,,,,,,,,...,,,,,,,,,,
009-1: RandB,,,,,,,,,,,...,,,,,,,,,,


In [None]:
#rating del usuario seleccionado 
print(corrAnime.shape)
myRating = user_rating.loc[73517].dropna()
myRating

(8939, 8939)


name
Hunter x Hunter (2011)    10.0
School Days                1.0
Name: 73517, dtype: float64

In [None]:
#posibles recomendaciones segun usuario seleccionado 
simCandidates = pd.Series()
print(simCandidates)
for i in range(0,len(myRating.index)):
    print("añadiendo animes similares a" + myRating.index[i] + "...")
    sims = corrAnime[myRating.index[i]].dropna()
    sims= sims.map( lambda x:x*myRating[i])
    simCandidates = pd.concat([simCandidates, sims])
    
print("ordenando...")
simCandidates.sort_values(inplace = True, ascending =False)
print(simCandidates.head(10))

Series([], dtype: object)
añadiendo animes similares aHunter x Hunter (2011)...
añadiendo animes similares aSchool Days...
ordenando...
Hunter x Hunter (2011)                           10.000000
Kimi to Boku.                                     4.845536
Shinryaku! Ika Musume                             4.274217
Gintama                                           4.244667
Mobile Suit Gundam Wing                           4.165699
Fullmetal Alchemist: The Sacred Star of Milos     4.137548
Kingdom 2nd Season                                4.129043
Uchouten Kazoku                                   4.047548
Diamond no Ace: Second Season                     4.021637
Initial D First Stage                             3.970368
dtype: float64


In [None]:
#orden segun la valoracion del gusto del usuario seleccionado 
simCandidates = simCandidates.groupby(simCandidates.index).sum()
simCandidates.sort_values(inplace = True, ascending = False)
simCandidates.head(10)


Hunter x Hunter (2011)                           10.055210
Kimi to Boku.                                     5.036541
Shinryaku! Ika Musume                             4.441850
Mobile Suit Gundam Wing                           4.382745
Fullmetal Alchemist: The Sacred Star of Milos     4.329202
Gintama                                           4.316864
Phi Brain: Kami no Puzzle                         4.275950
Initial D First Stage                             4.206342
Jigoku Shoujo                                     4.143791
Kingdom 2nd Season                                4.129043
dtype: float64

In [None]:
#eliminacion de los candidatos del rating del usuario 
filteredSims = simCandidates.drop(myRating.index)
filteredSims.head(10)

Kimi to Boku.                                    5.036541
Shinryaku! Ika Musume                            4.441850
Mobile Suit Gundam Wing                          4.382745
Fullmetal Alchemist: The Sacred Star of Milos    4.329202
Gintama                                          4.316864
Phi Brain: Kami no Puzzle                        4.275950
Initial D First Stage                            4.206342
Jigoku Shoujo                                    4.143791
Kingdom 2nd Season                               4.129043
Afro Samurai: Resurrection                       4.108236
dtype: float64