In [19]:
import pandas as pd 
import numpy as np

In [20]:
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 50)

In [21]:
df = pd.read_csv('data/jikan_final.csv')

In [22]:
df.shape

(26720, 36)

In [23]:
df.nunique()

mal_id             26564
url                26564
images             26364
trailer             4920
approved               1
titles             26564
title              26563
title_english      10998
title_japanese     25462
title_synonyms     12463
type                   9
source                17
episodes             250
status                 3
airing                 2
aired              16116
duration             333
rating                 6
score                559
scored_by           8712
rank               16055
popularity         20364
members            11508
favorites           1901
synopsis           21510
background          2556
season                 4
year                  65
broadcast            623
producers           4701
licensors            265
studios             1681
genres               962
explicit_genres        1
themes               948
demographics           8
dtype: int64

In [369]:
user = pd.read_csv("data/user.csv")

In [370]:
user.nunique()

User ID        35141
Username       35140
Anime ID       14037
Anime Title    14067
Score             10
dtype: int64

In [371]:
user.shape

(3752106, 5)

In [27]:
df.drop_duplicates(inplace=True)

In [372]:
user.drop_duplicates(inplace=True)

In [373]:
counts1 = user['Anime ID'].value_counts()
filtered_user = user[user["Anime ID"].isin(counts1[counts1>=5].index)]

In [374]:
filtered_user.shape

(3743711, 5)

In [375]:
filtered_user.nunique()

User ID        35139
Username       35138
Anime ID       10347
Anime Title    10378
Score             10
dtype: int64

In [32]:
not_yet_aired = df[df.status == "Not yet aired"]

In [33]:
df1 = df[df['mal_id'].isin(filtered_user['Anime ID'])]

In [34]:
df1.shape

(10347, 36)

In [35]:
df1.isna().sum()

mal_id                0
url                   0
images                0
trailer               0
approved              0
titles                0
title                 0
title_english      3830
title_japanese       12
title_synonyms        0
type                  0
source                0
episodes             22
status                0
airing                0
aired                 0
duration              0
rating               10
score                 3
scored_by             3
rank               1426
popularity            0
members               0
favorites             0
synopsis             72
background         8527
season             6310
year               6310
broadcast             0
producers             0
licensors             0
studios               0
genres                0
explicit_genres       0
themes                0
demographics          0
dtype: int64

In [36]:
import warnings
warnings.filterwarnings('ignore')


In [37]:
df1.dropna(subset=['synopsis','rating'],inplace=True)

In [38]:
import ast 
df1.producers = df1.producers.apply(ast.literal_eval)
df1.images = df1.images.apply(ast.literal_eval)
df1.trailer = df1.trailer.apply(ast.literal_eval)
df1.titles = df1.titles.apply(ast.literal_eval)
df1.aired = df1.aired.apply(ast.literal_eval)
df1.broadcast = df1.broadcast.apply(ast.literal_eval)
df1.licensors = df1.licensors.apply(ast.literal_eval)
df1.studios = df1.studios.apply(ast.literal_eval)
df1.genres = df1.genres.apply(ast.literal_eval)
df1.themes = df1.themes.apply(ast.literal_eval)
df1.demographics = df1.demographics.apply(ast.literal_eval)


In [39]:
def extract_info(row):
    # Extract producer names
    producer_names = [producer['name'] for producer in row['producers']]
    licensors_names = [licensor['name'] for licensor in row['licensors']]
    studios_names = [studio['name'] for studio in row['studios']]
    genres = [genre['name'] for genre in row['genres']]
    themes = [theme['name'] for theme in row['themes']]
    demographics = [dg['name'] for dg in row['demographics']]
    
    # Extract embed_url from trailer
    embed_url = row['trailer']['embed_url'] if row['trailer'] else None
    aired = row['aired']['string'] if row['aired'] else None
    # Extract large_image_url from images
    large_image_url = row['images']['jpg']['large_image_url'] if row['images'] else None
    
    return pd.Series([producer_names, licensors_names,studios_names,genres,themes,demographics,embed_url,aired, large_image_url])

# Apply the function to each row of the DataFrame
df1[['producers','licensors','studios','genres','themes','demographics','trailer','aired','image']] = df1.apply(extract_info, axis=1)


In [46]:
df1.shape

(10265, 38)

In [47]:

df1 = df1[~df1['genres'].apply(lambda x: x == [])]

In [48]:
df1.shape

(9895, 38)

In [49]:
df1 = df1.reset_index(drop=True)

In [50]:
import re

pattern = r"\[Written by MAL Rewrite\]|\(.*Source:.*\)" 

# Removing the pattern using regular expressions
df1['synopsis'] = df1['synopsis'].str.replace(pattern, '', regex=True).values


In [51]:
def remove_newline_numbers(text):
    text = text.replace('\n', ' ')
    text = re.sub(r'\d+', ' ', text)
    text = re.sub(r'[^\w\s]', ' ', text)
    return text.lower()

In [52]:
df1['synopsis_cleaned'] = df1.synopsis.apply(remove_newline_numbers)

In [53]:
import spacy
nlp = spacy.load('en_core_web_sm')
df1['synopsis_cleaned'] = df1['synopsis_cleaned'].apply(lambda x: " ".join([token.lemma_ for token in nlp(x) if not token.is_stop]))

In [54]:
rating_map = {
    "PG-13 - Teens 13 or older": "PG-13",
    "R - 17+ (violence & profanity)": "R17",
    "Rx - Hentai": "Rx",
    "R+ - Mild Nudity": "R+",
    "G - All Ages": "G",
    "PG - Children": "PG"
}

# Use the map to replace the values in the 'rating' column
df1['rating'] = df1['rating'].replace(rating_map)

In [55]:
df1['themes'] = df1['themes'].apply(lambda x:["unknown_theme"] if x == [] else x )
df1['demographics'] = df1['demographics'].apply(lambda x:["unknown_demographics"] if x == [] else x )



In [56]:
def get_season(x):
    spring = ["Mar","Apr","May"]
    summer = ["Jun","Jul","Aug"]
    fall = ["Sep","Oct","Nov"]
    winter = ["Dec","Jan","Feb"]
    y = x[:3]
    if y in spring:
        return "spring"
    elif y in winter:
        return "winter"
    elif y in fall:
        return "fall"
    elif y in summer:
        return "summer"

In [57]:
df1.season = df1.aired.apply(get_season)

In [58]:
df1.year = df1.aired.str.split(',').str[1].str[1:5]

In [59]:
def fill_na(row):
    if pd.isna(row['year']):
        if len(row['aired']) == 4:
            return row['aired']
        elif len(row['aired']) == 12:
            return row['aired'][:4]
        else:
            return row['aired'][4:8]
    else:
        return row['year']

# Apply the function to each row of the DataFrame
df1['year'] = df1.apply(fill_na, axis=1)

In [60]:
df1.season = df1.season.fillna("unknownseason")

In [117]:
data = df1[['mal_id', 'url', 'trailer', 'title',
       'title_english', 'type', 'source',
       'episodes', 'status', 'aired', 'duration', 'rating', 'score',
       'scored_by', 'rank', 'popularity', 'members', 'favorites', 'synopsis','synopsis_cleaned',
       'background', 'season', 'year', 'producers', 'licensors',
       'studios', 'genres', 'themes', 'demographics',
       'image']] 

In [118]:
data['producers'] = data['producers'].apply(lambda x: ','.join(x))
data['licensors'] = data['licensors'].apply(lambda x: ','.join(x))
data['genres'] = data['genres'].apply(lambda x: ','.join(x))
data['studios'] = data['studios'].apply(lambda x: ','.join(x))
data['themes'] = data['themes'].apply(lambda x: ','.join(x))
data['demographics'] = data['demographics'].apply(lambda x: ','.join(x))

In [119]:
data = data[~(data.genres.str.contains("Hentai")|data.genres.str.contains("Erotica")|data.genres.str.contains("Boys Love")|data.genres.str.contains("Girls Love"))]

In [120]:
# Assuming df is your DataFrame and 'genres' is your column with the genres
genre_counts = {}
for row in data['genres']:
    for genre in row.split(','):
        if genre in genre_counts:
            genre_counts[genre] += 1
        else:
            genre_counts[genre] = 1

print(genre_counts)

{'Action': 3239, 'Award Winning': 192, 'Sci-Fi': 2025, 'Adventure': 2006, 'Drama': 1702, 'Mystery': 668, 'Supernatural': 899, 'Fantasy': 2435, 'Sports': 386, 'Comedy': 3501, 'Romance': 1490, 'Slice of Life': 655, 'Suspense': 276, 'Ecchi': 699, 'Gourmet': 82, 'Avant Garde': 134, 'Horror': 318}


In [121]:
data = data[data.favorites != 0]


In [122]:
data = data.reset_index(drop=True)

In [123]:
genres_df = data.genres.str.get_dummies(sep=',')

studios_df = data.studios.str.get_dummies(sep=',')
themes_df = data.themes.str.get_dummies(sep=',')
demographics_df = data.demographics.str.get_dummies(sep=',')


In [124]:
status_df = data.status.str.get_dummies()
season_df = data.season.str.get_dummies()
type_df = data.type.str.get_dummies()
source_df = data.source.str.get_dummies()
rating_df = data.rating.str.get_dummies()

In [219]:
data.year = data.year.astype('int')

In [125]:
from sklearn.feature_extraction.text import TfidfVectorizer

In [126]:
vectorizer = TfidfVectorizer()  # Adjust max_features as needed
overview_matrix = vectorizer.fit_transform(data['synopsis_cleaned'])

In [127]:
overview_matrix.shape

(8282, 30104)

In [128]:
overview_matrix = overview_matrix.toarray()
overview_df = pd.DataFrame(overview_matrix)

In [129]:
data.shape

(8282, 30)

In [130]:
from sklearn.decomposition import PCA
num_components = 1000

# Apply PCA for dimensionality reduction
pca = PCA(n_components=num_components)
pca_data = pca.fit_transform(overview_df)

In [131]:
pca_data = pd.DataFrame(pca_data)

In [132]:
pca_data.shape

(8282, 1000)

In [245]:
combined_features = pd.concat([pca_data,source_df,type_df,genres_df,demographics_df,themes_df],axis=1)

In [246]:
combined_features.shape

(8282, 1100)

In [247]:
from sklearn.metrics.pairwise import cosine_similarity
similarity_matrix = cosine_similarity(combined_features)

In [248]:
def recommend(anime):
    index = data[(data['title'] == anime) | (data['title_english'] == anime)].index[0]
    distances = sorted(list(enumerate(similarity_matrix[index])),reverse=True,key= lambda x:x[1])
    for i in distances[1:20]:
        
        print(data.iloc[i[0]].title,"---",i[1])

In [249]:
similarity_matrix.shape

(8282, 8282)

In [250]:
recommend("Kimetsu no Yaiba")

Kimetsu no Yaiba: Katanakaji no Sato-hen --- 0.9029745973111379
Kimetsu no Yaiba: Yuukaku-hen --- 0.8993539104236159
Kimetsu no Yaiba: Mugen Ressha-hen --- 0.8920817240419622
Nokemono-tachi no Yoru --- 0.8140860267253746
Senkaiden Houshin Engi --- 0.8076703168190676
Jujutsu Kaisen --- 0.805488671097358
Kuroshitsuji II --- 0.7689676364706065
Vanitas no Karte Part 2 --- 0.7655582068597917
Orient: Awajishima Gekitou-hen --- 0.7640430319220438
Orient --- 0.7610618337923427
Vanitas no Karte --- 0.759925404830874
Kuroshitsuji: Book of Circus --- 0.7599226106825091
Sengoku Youko: Yonaoshi Kyoudai-hen --- 0.7559346361598986
Kuroshitsuji --- 0.7529338159596949
Kimetsu no Yaiba Movie: Mugen Ressha-hen --- 0.7519118146342139
Ragna Crimson --- 0.737570486515731
Chainsaw Man --- 0.7337489082872317
Yu☆Gi☆Oh! Zexal Second --- 0.7327128184040176
Jigokuraku --- 0.7271858415391513


In [251]:
recommend("Kuroko no Basket 2nd Season")

Kuroko no Basket --- 0.9691568972602083
Kuroko no Basket 3rd Season --- 0.9687037260848341
Slam Dunk --- 0.9439673446049535
Diamond no Ace --- 0.93550232388149
Ahiru no Sora --- 0.934448680491423
Diamond no Ace: Second Season --- 0.9340136132602974
Haikyuu!! To the Top Part 2 --- 0.9302775634989314
Diamond no Ace: Act II --- 0.9265694734604045
Haikyuu!! Second Season --- 0.9256945364715556
Haikyuu!! --- 0.9216046686427005
Haikyuu!! Karasuno Koukou vs. Shiratorizawa Gakuen Koukou --- 0.9214556282627179
Whistle! --- 0.9211156944114792
Days (TV) --- 0.9187966364459417
Haikyuu!! To the Top --- 0.9161523672294373
Boukyaku Battery (TV) --- 0.9146680564328475
Eyeshield 21 --- 0.855457549995078
Major 2nd 2nd Season --- 0.8516103184401271
Area no Kishi --- 0.8505417088329088
Blue Lock --- 0.8493604356880786


In [252]:
recommend("One Piece")

Magi: Sinbad no Bouken (TV) --- 0.9452598135230995
Nanatsu no Taizai: Mokushiroku no Yonkishi --- 0.9443174234699682
Magi: The Labyrinth of Magic --- 0.9436854209752565
Dragon Quest: Dai no Daibouken (TV) --- 0.9430641750624058
Magi: The Kingdom of Magic --- 0.9374976055778587
Fairy Tail (2014) --- 0.9346276844349698
Bleach: Sennen Kessen-hen --- 0.933754737332413
Bleach: Sennen Kessen-hen - Ketsubetsu-tan --- 0.9325351125259214
Nanatsu no Taizai: Seisen no Shirushi --- 0.9319725272022331
Nanatsu no Taizai: Imashime no Fukkatsu --- 0.9303485466587573
Fairy Tail: Final Series --- 0.9302970043233261
Hunter x Hunter --- 0.9298131615300326
Hunter x Hunter (2011) --- 0.928512205875408
Fairy Tail --- 0.9280766630761886
Nanatsu no Taizai: Kamigami no Gekirin --- 0.9280511388452618
Nanatsu no Taizai: Funnu no Shinpan --- 0.9244205861938114
Dragon Quest: Dai no Daibouken (2020) --- 0.9243487350851174
Bleach --- 0.9235907784165057
Nanatsu no Taizai --- 0.9220673434788279


In [253]:
import pickle
pickle.dump(data,open('anime.pkl','wb'))
pickle.dump(similarity_matrix,open('similarity.pkl','wb'))

In [376]:
filtered_user = filtered_user[filtered_user['Anime ID'].isin(data.mal_id)]

In [377]:
filtered_user.nunique()

User ID        34960
Username       34959
Anime ID        8282
Anime Title     8312
Score             10
dtype: int64

In [378]:
counts = filtered_user['User ID'].value_counts()
filtered_user = filtered_user[filtered_user["User ID"].isin(counts[counts>50].index)]

In [379]:
filtered_user.nunique()

User ID        16838
Username       16837
Anime ID        8282
Anime Title     8312
Score             10
dtype: int64

In [380]:
filtered_user = filtered_user.reset_index(drop=True)

In [381]:
filtered_user = filtered_user.iloc[:, [0,2,3,4]]

In [382]:
filtered_user.rename(columns={'User ID':'user_id','Anime ID':'anime_id'},inplace=True)

In [383]:

user_ids = pd.Categorical(filtered_user["user_id"])
filtered_user["user_id_encoded"] = user_ids.codes

anime_ids = pd.Categorical(filtered_user["anime_id"])
filtered_user["anime_id_encoded"] = anime_ids.codes

In [384]:
print(type(X_train['anime_id_encoded'][0]))

<class 'numpy.int16'>


In [385]:
from sklearn.preprocessing import MinMaxScaler

In [386]:
minmax = MinMaxScaler()
filtered_user["Score_scaled"] = minmax.fit_transform(filtered_user[["Score"]])

In [387]:
filtered_user.shape

(3316985, 7)

In [388]:
from sklearn.model_selection import train_test_split

X_train, X_val, y_train, y_val = train_test_split(
    filtered_user[["user_id_encoded", "anime_id_encoded"]], filtered_user["Score_scaled"], test_size=0.2, random_state=40 , shuffle=True
)


In [389]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import regularizers

In [452]:
num_users = len(set(X_train["user_id_encoded"]))  # Assuming unique user IDs
num_animes = len(set(X_train["anime_id_encoded"]))  # Assuming unique anime IDs
embedding_dim = 32  # Adjust dimensionality as needed

In [453]:
y_train

665344     0.666667
2122242    0.555556
3245820    0.666667
3185269    0.444444
578360     0.555556
             ...   
841010     0.666667
1928184    0.888889
2914311    0.555556
1947867    0.555556
1977670    0.444444
Name: Score_scaled, Length: 2653588, dtype: float64

In [454]:
user_input = keras.layers.Input(name='user_encoded',shape=(1,))
anime_input = keras.layers.Input(name='anime_encoded',shape=(1,))

In [455]:
user_embeddings = keras.layers.Embedding(num_users, embedding_dim, name='user_embedding')(user_input)
anime_embeddings = keras.layers.Embedding(num_animes, embedding_dim,name='anime_embedding')(anime_input)

In [456]:
dot_product = keras.layers.Dot(name='dot_product',axes=2)([user_embeddings, anime_embeddings])
flattened = keras.layers.Flatten()(dot_product)

In [457]:
dense = keras.layers.Dense(64, activation='relu')(flattened)

In [458]:
output = keras.layers.Dense(1, activation="sigmoid")(dense)  # Optional bias can be added before this layer

In [459]:
model = keras.Model(
    inputs=[user_input, anime_input], outputs=output
)

In [460]:
model.compile(
    optimizer="adam", loss="mse", metrics=["mse", "mae"]  # Add more metrics as needed
)

In [461]:
model.summary()

Model: "model_9"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 user_encoded (InputLayer)   [(None, 1)]                  0         []                            
                                                                                                  
 anime_encoded (InputLayer)  [(None, 1)]                  0         []                            
                                                                                                  
 user_embedding (Embedding)  (None, 1, 32)                538816    ['user_encoded[0][0]']        
                                                                                                  
 anime_embedding (Embedding  (None, 1, 32)                265024    ['anime_encoded[0][0]']       
 )                                                                                          

In [462]:
model.fit(
    [X_train['user_id_encoded'], X_train['anime_id_encoded']],  # Separate user and anime IDs
    y_train,
    epochs=5,  # Adjust as needed
    batch_size=64,  # Adjust as needed
    validation_data=([X_val['user_id_encoded'], X_val['anime_id_encoded']], y_val),
)


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.src.callbacks.History at 0x1afe93c23d0>

In [463]:
# Assume you have test data
X_test_user = X_val['user_id_encoded']
X_test_item = X_val['anime_id_encoded']

# Make predictions
predictions = model.predict([X_test_user, X_test_item])




In [464]:
print(tf.__version__)

2.15.0


In [465]:
model.save('model.h5') 

In [466]:
anime_ids = np.array(list(set(filtered_user.anime_id_encoded)))

In [467]:
anime_size = anime_ids.shape[0]
anime_size

8282

In [468]:
pickle.dump(filtered_user,open('user.pkl','wb'))

In [470]:
anime_id = data[data.title == "Naruto"].index[0]

In [471]:
def user_anime_recommendations(user_id,anime_id,model,similarity_matrix,filtered_user,data):
    
    anime_ids = np.array(list(set(filtered_user.anime_id_encoded)))
    anime_size = anime_ids.shape[0]

    user_ids = np.array([user_id]*anime_size)
    predictions = model.predict([user_ids,anime_ids])
    
    p = predictions.flatten()
    s = similarity_matrix[anime_id]
    
    ratings = 0.5*s + 0.5*p
    top_anime_index = ratings.argsort()[-30:][::-1]
    
    watched_anime = filtered_user[filtered_user.user_id_encoded == user_id]['anime_id_encoded']
    mask = np.isin(top_anime_index, watched_anime)
    top_unwatched_anime_index = top_anime_index[~mask]
    
    recommended_animes = []
    for i in top_unwatched_anime_index:
        anime_data = data.iloc[i]
        recommended_animes.append(anime_data['title'])
        
    return recommended_animes
    
    


In [472]:
user_anime_recommendations(860,243,model,similarity_matrix,filtered_user,data)



['Akane Maniax',
 'Nekopara OVA',
 'Kud Wafter',
 'To Heart 2 AD',
 'To Heart 2 Adnext',
 'To Heart 2 AD Plus',
 'Yahari Ore no Seishun Love Comedy wa Machigatteiru. Zoku OVA',
 'Tenchi Muyou! Ryououki: Omatsuri Zenjitsu no Yoru!',
 'Mikakunin de Shinkoukei: Kamoniku tte Midori-ppoi Aji ga Suru no ne.',
 'Kidou Senshi SD Gundam no Gyakushuu',
 'To Heart 2 OVA',
 'FLCL',
 'Kaleido Star: Legend of Phoenix - Layla Hamilton Monogatari',
 'Kujibiki Unbalance',
 'Top wo Nerae! Gunbuster',
 'Choujikuu Seiki Orguss 02',
 'Top wo Nerae 2! Diebuster',
 'Dragon Half',
 'Plastic Little',
 'School Days: Valentine Days',
 'True Love Story',
 'Lime-iro Senkitan: Nankoku Yume Roman',
 'Eromanga-sensei OVA',
 'Photon',
 'Ginga Ojousama Densetsu Yuna: Kanashimi no Siren',
 'Shingeki no Kyojin: Kuinaki Sentaku',
 'Saber Marionette J Again']

In [473]:
# Assume 'user_id' is 203
user_id = 909

# Create an array of the user ID repeated for each anime
user_ids = np.array([user_id]*anime_size)

# Make predictions for all animes for this user
predictions = model.predict([user_ids, anime_ids])
top_anime_index = predictions.flatten().argsort()[-20:][::-1]




In [476]:
a = filtered_user[filtered_user.anime_id_encoded.isin(top_anime_index)][['anime_id']]
rec_anime = a.anime_id.unique()
data[data.mal_id.isin(rec_anime)]['title']

25      Rurouni Kenshin: Meiji Kenkaku Romantan - Tsui...
406                                              Mushishi
1414                                Byousoku 5 Centimeter
1821    Galaxy Angel Music Collection: Shouen to Shien...
2073                Kara no Kyoukai Movie 1: Fukan Fuukei
2652     Kara no Kyoukai Movie 2: Satsujin Kousatsu (Zen)
2653            Kara no Kyoukai Movie 3: Tsuukaku Zanryuu
2751            Clannad: Mou Hitotsu no Sekai, Tomoyo-hen
2825                Kara no Kyoukai Movie 4: Garan no Dou
2826                 Kara no Kyoukai Movie 5: Mujun Rasen
3046      Kara no Kyoukai Movie 7: Satsujin Kousatsu (Go)
3441                           Gintama: Shiroyasha Koutan
5242                   Mushishi Zoku Shou: Odoro no Michi
5260    Aoki Hagane no Arpeggio: Ars Nova Movie 2 - Ca...
5333                         Koukaku Kidoutai: Shin Movie
5457                  Mushishi Zoku Shou: Suzu no Shizuku
5458                                             Gintama°
5833          

In [475]:
from sklearn.metrics.pairwise import cosine_similarity

# Assume 'anime_id' is 21
anime_id = 10

# Get the embedding of the anime
anime_embedding = model.get_layer('anime_embedding').get_weights()[0]
target_anime_embedding = anime_embedding[anime_id]

# Calculate the cosine similarity between the target anime and all animes
similarities = cosine_similarity([target_anime_embedding], anime_embedding)

# Get the indices of the top 10 similar animes
top_10_indices = similarities[0].argsort()[-10:][::-1]

# Get the IDs of the top 10 similar animes
top_10_anime_ids = anime_ids[top_10_indices]

# Now 'top_10_anime_ids' contains the IDs of the top 10 animes similar to the target anime
a = filtered_user[filtered_user.anime_id_encoded.isin(top_10_anime_ids)][['anime_id']]
rec_anime = a.anime_id.unique()
data[data.mal_id.isin(rec_anime)]['title']

10                                                 Naruto
231                                                Bleach
393     Naruto Movie 1: Dai Katsugeki!! Yuki Hime Shin...
796     Naruto Movie 2: Dai Gekitotsu! Maboroshi no Ch...
1451                                   Naruto: Shippuuden
1780    Naruto Movie 3: Dai Koufun! Mikazuki Jima no A...
1983                           Naruto: Shippuuden Movie 1
2855                  Naruto: Shippuuden Movie 2 - Kizuna
4856    One Piece: Episode of Merry - Mou Hitori no Na...
6060        Koutetsujou no Kabaneri Movie 2: Moeru Inochi
Name: title, dtype: object

In [25]:
# Assuming df is your DataFrame and 'genres' is your column with the genres
genres = set()
for row in animes['genres']:
    genres.update(row.split(','))

print(genres)



{'Suspense', 'Slice of Life', 'Adventure', 'Action', 'Gourmet', 'Romance', 'Horror', 'Drama', 'Sci-Fi', 'Ecchi', 'Erotica', 'Hentai', 'Fantasy', 'Boys Love', 'Supernatural', 'Comedy', 'Avant Garde', 'Girls Love', 'Award Winning', 'Sports', 'Mystery'}


In [54]:
# Assuming df is your DataFrame and 'genres' is your column with the genres
genre_counts = {}
for row in animes['genres']:
    for genre in row.split(','):
        if genre in genre_counts:
            genre_counts[genre] += 1
        else:
            genre_counts[genre] = 1

print(genre_counts)


{'Action': 2769, 'Award Winning': 167, 'Sci-Fi': 1650, 'Adventure': 1594, 'Drama': 1571, 'Mystery': 585, 'Supernatural': 834, 'Fantasy': 2047, 'Sports': 323, 'Comedy': 2826, 'Romance': 1332, 'Slice of Life': 472, 'Suspense': 260, 'Ecchi': 640, 'Gourmet': 65, 'Avant Garde': 58, 'Horror': 275, 'Girls Love': 85, 'Boys Love': 122, 'Hentai': 553, 'Erotica': 46}
