In [None]:
import pandas as pd

In [None]:
data = pd.read_csv("/content/drive/MyDrive/anime_data_17000.csv")
data.head()

Unnamed: 0,mal_id,name,synopsis,type,source,image,genre
0,1,Cowboy Bebop,"Crime is timeless. By the year 2071, humanity ...",TV,Original,https://cdn.myanimelist.net/images/anime/4/196...,"Action, Award Winning, Sci-Fi"
1,5,Cowboy Bebop: Tengoku no Tobira,"Another day, another bounty—such is the life o...",Movie,Original,https://cdn.myanimelist.net/images/anime/1439/...,"Action, Sci-Fi"
2,6,Trigun,"Vash the Stampede is the man with a $$60,000,0...",TV,Manga,https://cdn.myanimelist.net/images/anime/1130/...,"Action, Adventure, Sci-Fi"
3,7,Witch Hunter Robin,"Though hidden away from the general public, Wi...",TV,Original,https://cdn.myanimelist.net/images/anime/10/19...,"Action, Drama, Mystery, Supernatural"
4,8,Bouken Ou Beet,It is the dark century and the people are suff...,TV,Manga,https://cdn.myanimelist.net/images/anime/7/215...,"Action, Adventure, Fantasy"


In [None]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Baca file CSV
file_path = '/content/drive/MyDrive/anime_data_17000.csv' 
df = pd.read_csv(file_path)

# Pastikan kolom penting ada
required_columns = ['mal_id', 'name', 'type', 'source']
if not all(col in df.columns for col in required_columns):
    raise ValueError(f"CSV harus memiliki kolom: {required_columns}")

# Menggabungkan fitur untuk digunakan dalam model
df['combined_features'] = df['name'].fillna('') + " " + \
                          df['type'].fillna('') + " " + \
                          df['source'].fillna('') + " " + \
                          df['genre'].fillna('')


# Menggunakan TF-IDF untuk mengubah teks menjadi vektor
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(df['combined_features'])

# Menghitung cosine similarity antar anime
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

# Fungsi untuk merekomendasikan anime berdasarkan nama (pencarian parsial didukung)
def recommend_anime_partial(name, cosine_sim=cosine_sim, df=df, top_n=100):
    # Ubah input menjadi lowercase
    name = name.lower()

    # Filter dataset untuk mencocokkan substring pada kolom 'name'
    matching_anime = df[df['name'].str.contains(name, case=False)]

    # Jika tidak ada yang cocok
    if matching_anime.empty:
        return f"Anime yang mengandung '{name}' tidak ditemukan dalam dataset."

    # Pilih anime pertama dari hasil pencarian sebagai basis rekomendasi
    idx = matching_anime.index[0]
    selected_anime_name = matching_anime.iloc[0]['name']  # Nama anime yang ditemukan

    # Mendapatkan skor kesamaan untuk anime yang dipilih
    sim_scores = list(enumerate(cosine_sim[idx]))

    # Mengurutkan berdasarkan skor kesamaan
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Mendapatkan indeks anime yang paling mirip
    sim_indices = [i[0] for i in sim_scores[1:top_n+1]]

    # Mendapatkan rekomendasi
    recommendations = df.iloc[sim_indices][['mal_id', 'name', 'type', 'source','genre']]

    return f"Rekomendasi berdasarkan anime '{selected_anime_name}':\n", recommendations


anime_name = "cowboy"
result, recommendations = recommend_anime_partial(anime_name)
print(result)
print(recommendations)

Rekomendasi berdasarkan anime 'Cowboy Bebop':

       mal_id                                           name        type  \
1           5                Cowboy Bebop: Tengoku no Tobira       Movie   
7585    17205               Cowboy Bebop: Ein no Natsuyasumi     Special   
3445     4037                Cowboy Bebop: Yose Atsume Blues  TV Special   
3131     3572                                      Macross F          TV   
5143     7791                                         K-On!!          TV   
...       ...                                            ...         ...   
6174    10389                               Momo e no Tegami       Movie   
196       219  Kidou Senkan Nadesico: The Prince of Darkness       Movie   
694       759                               Tokyo Godfathers       Movie   
939      1034        Ouritsu Uchuugun: Honneamise no Tsubasa       Movie   
15035   38826                                    Tenki no Ko       Movie   

             source                     

In [None]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from scipy.sparse import save_npz, load_npz, csr_matrix
import joblib  # Untuk menyimpan dan memuat model

# File CSV
file_path = '/content/drive/MyDrive/anime_data_17000.csv'
model_path = 'anime_recommender_model.pkl'

# Baca CSV
df = pd.read_csv(file_path)

# Pastikan kolom penting ada
required_columns = ['mal_id', 'name', 'type', 'source', 'genre']
if not all(col in df.columns for col in required_columns):
    raise ValueError(f"CSV harus memiliki kolom: {required_columns}")

# Menggabungkan fitur
df['combined_features'] = df['name'].fillna('') + " " + \
                          df['type'].fillna('') + " " + \
                          df['source'].fillna('') + " " + \
                          df['genre'].fillna('')

# **Proses Pelatihan**
def train_and_save_model(df, model_path):
    # Menggunakan TF-IDF Vectorizer
    tfidf = TfidfVectorizer(stop_words='english')
    tfidf_matrix = tfidf.fit_transform(df['combined_features'])  # Sparse matrix

    # Simpan TF-IDF matrix sebagai file sparse
    save_npz('tfidf_matrix.npz', tfidf_matrix)

    # Simpan model dan metadata (vectorizer dan data)
    model_data = {
        'tfidf_vectorizer': tfidf,
        'tfidf_matrix_path': 'tfidf_matrix.npz',
        'dataframe': df,
        'cosine_sim': cosine_sim
    }
    joblib.dump(model_data, model_path)
    print(f"Model berhasil disimpan ke: {model_path}")


In [None]:

# Fungsi untuk merekomendasikan anime
def recommend_anime_partial(name, model_path, top_n=100):
    # Muat model
    model_data = joblib.load(model_path)
    tfidf = model_data['tfidf_vectorizer']
    tfidf_matrix = load_npz(model_data['tfidf_matrix_path'])  # Muat TF-IDF sparse matrix
    df = model_data['dataframe']

    # Ubah input menjadi lowercase
    name = name.lower()

    # Filter dataset untuk mencocokkan substring pada kolom 'name'
    matching_anime = df[df['name'].str.contains(name, case=False)]

    # Jika tidak ada yang cocok
    if matching_anime.empty:
        return f"Anime yang mengandung '{name}' tidak ditemukan dalam dataset."

    # Pilih anime pertama dari hasil pencarian sebagai basis rekomendasi
    idx = matching_anime.index[0]
    selected_anime_name = matching_anime.iloc[0]['name']  # Nama anime yang ditemukan

    # Mendapatkan skor kesamaan untuk anime yang dipilih
    sim_scores = cosine_similarity(tfidf_matrix[idx], tfidf_matrix).flatten()

    # Mengurutkan berdasarkan skor kesamaan
    sim_indices = sim_scores.argsort()[-top_n-1:-1][::-1]  # Top N hasil

    # Mendapatkan rekomendasi
    recommendations = df.iloc[sim_indices][['mal_id', 'name', 'type', 'source', 'genre']]

    return f"Rekomendasi berdasarkan anime '{selected_anime_name}':\n", recommendations

In [None]:
# **Latih dan Simpan Model**
train_and_save_model(df, model_path)

Model berhasil disimpan ke: anime_recommender_model.pkl


In [None]:
# **Contoh Penggunaan Model yang Disimpan**
anime_name = "beet"  # Ganti dengan potongan nama anime dari data Anda
result, recommendations = recommend_anime_partial(anime_name, model_path)
print(result)
print(recommendations)

Rekomendasi berdasarkan anime 'Bouken Ou Beet':

       mal_id                               name   type    source
1022     1123           Bouken Ou Beet Excellion     TV     Manga
5154     7818                          Yousei Ou    OVA     Manga
86        107                    Ou Dorobou Jing     TV     Manga
3192     3655                       Nabari no Ou     TV     Manga
860       953                         Juu Ou Sei     TV     Manga
...       ...                                ...    ...       ...
4768     6849  Bouken Dankichi: Hyouryuu no Maki  Movie  Original
7656    17505                        Mushibugyou     TV     Manga
14212   37520                             Dororo     TV     Manga
494       527                            Pokemon     TV      Game
15626   39696        Niigata no Edamame-ou Shiba    ONA  Original

[100 rows x 4 columns]


In [None]:
def search_anime_by_filters(genre=None, anime_type=None, source=None, model_path='anime_recommender_model.pkl', top_n=100):
    # Muat model dan dataset
    model_data = joblib.load(model_path)
    df = model_data['dataframe']

    # Filter dataset berdasarkan kriteria yang diberikan
    filtered_df = df
    if genre:
        filtered_df = filtered_df[filtered_df['genre'].str.contains(genre, case=False, na=False)]
    if anime_type:
        filtered_df = filtered_df[filtered_df['type'].str.contains(anime_type, case=False, na=False)]
    if source:
        filtered_df = filtered_df[filtered_df['source'].str.contains(source, case=False, na=False)]

    # Jika tidak ada hasil
    if filtered_df.empty:
        return f"Tidak ditemukan anime dengan kriteria yang diberikan: genre='{genre}', type='{anime_type}', source='{source}'."

    # Menampilkan hasil
    return f"Anime yang cocok dengan kriteria Anda:\n", filtered_df[['mal_id', 'name', 'type', 'source']].head(top_n)


# **Contoh Penggunaan**
# Cari berdasarkan genre
result, recommendations = search_anime_by_filters(genre="Action", top_n=10)
print(result)
print(recommendations)

# Cari berdasarkan type
result, recommendations = search_anime_by_filters(anime_type="TV", top_n=10)
print(result)
print(recommendations)

# Cari berdasarkan source
result, recommendations = search_anime_by_filters(source="Manga", top_n=10)
print(result)
print(recommendations)

# Cari kombinasi genre dan type
result, recommendations = search_anime_by_filters(genre="Fantasy", anime_type="Movie", top_n=5)
print(result)
print(recommendations)


Anime yang cocok dengan kriteria Anda:

    mal_id                             name   type       source
0        1                     Cowboy Bebop     TV     Original
1        5  Cowboy Bebop: Tengoku no Tobira  Movie     Original
2        6                           Trigun     TV        Manga
3        7               Witch Hunter Robin     TV     Original
4        8                   Bouken Ou Beet     TV        Manga
8       18           Initial D Fourth Stage     TV        Manga
10      20                           Naruto     TV        Manga
11      21                        One Piece     TV        Manga
15      25                        Sunabouzu     TV        Manga
17      27                    Trinity Blood     TV  Light novel
Anime yang cocok dengan kriteria Anda:

    mal_id                        name type    source
0        1                Cowboy Bebop   TV  Original
2        6                      Trigun   TV     Manga
3        7          Witch Hunter Robin   TV  Original


In [None]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from scipy.sparse import save_npz, load_npz, csr_matrix
import joblib  # Untuk menyimpan dan memuat model

# File CSV
file_path = '/content/drive/MyDrive/anime_data_17000.csv'  # Ganti dengan path file CSV Anda
model_path = 'anime_recommender_model.pkl'  # Lokasi untuk menyimpan model

# Baca CSV
df = pd.read_csv(file_path)

# Pastikan kolom penting ada
required_columns = ['mal_id', 'name', 'type', 'source', 'genre']
if not all(col in df.columns for col in required_columns):
    raise ValueError(f"CSV harus memiliki kolom: {required_columns}")

# Menggabungkan fitur
df['combined_features'] = df['name'].fillna('') + " " + \
                          df['type'].fillna('') + " " + \
                          df['source'].fillna('') + " " + \
                          df['genre'].fillna('')

# **Proses Pelatihan**
def train_and_save_model(df, model_path):
    # Menggunakan TF-IDF Vectorizer
    tfidf = TfidfVectorizer(stop_words='english')
    tfidf_matrix = tfidf.fit_transform(df['combined_features'])  # Sparse matrix

    # Simpan TF-IDF matrix sebagai file sparse
    save_npz('tfidf_matrix.npz', tfidf_matrix)

    # Simpan model dan metadata (vectorizer dan data)
    model_data = {
        'tfidf_vectorizer': tfidf,
        'tfidf_matrix_path': 'tfidf_matrix.npz',
        'dataframe': df
    }
    joblib.dump(model_data, model_path)
    print(f"Model berhasil disimpan ke: {model_path}")







In [None]:

# Fungsi untuk merekomendasikan anime
def recommend_anime_partial(name, model_path, top_n=100):
    # Muat model
    model_data = joblib.load(model_path)
    tfidf = model_data['tfidf_vectorizer']
    tfidf_matrix = load_npz(model_data['tfidf_matrix_path'])  # Muat TF-IDF sparse matrix
    df = model_data['dataframe']

    # Ubah input menjadi lowercase
    name = name.lower()

    # Filter dataset untuk mencocokkan substring pada kolom 'name'
    matching_anime = df[df['name'].str.contains(name, case=False)]

    # Jika tidak ada yang cocok
    if matching_anime.empty:
        return f"Anime yang mengandung '{name}' tidak ditemukan dalam dataset."

    # Pilih anime pertama dari hasil pencarian sebagai basis rekomendasi
    idx = matching_anime.index[0]
    selected_anime_name = matching_anime.iloc[0]['name']  # Nama anime yang ditemukan

    # Mendapatkan skor kesamaan untuk anime yang dipilih
    sim_scores = cosine_similarity(tfidf_matrix[idx], tfidf_matrix).flatten()

    # Mengurutkan berdasarkan skor kesamaan
    sim_indices = sim_scores.argsort()[-top_n-1:-1][::-1]  # Top N hasil

    # Mendapatkan rekomendasi
    recommendations = df.iloc[sim_indices][['mal_id', 'name', 'type', 'source']]

    return f"Rekomendasi berdasarkan anime '{selected_anime_name}':\n", recommendations

In [None]:
# **Latih dan Simpan Model**
# train_and_save_model(df, model_path)

# **Contoh Penggunaan Model yang Disimpan**
anime_name = "nar"  # Ganti dengan potongan nama anime dari data Anda
result, recommendations = recommend_anime_partial(anime_name, model_path)
print(result)
print(recommendations)

Rekomendasi berdasarkan anime 'Naruto':

       mal_id                             name   type source
7527    16870       The Last: Naruto the Movie  Movie  Manga
1574     1735               Naruto: Shippuuden     TV  Manga
2267     2472       Naruto: Shippuuden Movie 1  Movie  Manga
9825    28755         Boruto: Naruto the Movie  Movie  Manga
12418   34566  Boruto: Naruto Next Generations     TV  Manga
...       ...                              ...    ...    ...
4262     5678                          Kobato.     TV  Manga
5619     9088                  Saiyuuki Gaiden    OVA  Manga
3041     3460                  Bosco Adventure     TV   Book
4744     6811           InuYasha: Kanketsu-hen     TV  Manga
1057     1161        Maze☆Bakunetsu Jikuu (TV)     TV  Manga

[100 rows x 4 columns]


**VERSIO KEDUA**