In [2]:
import pandas as pd
import numpy as np

In [3]:
anime_description = pd.read_csv('data/anime_with_synopsis.csv')

In [15]:
anime_description['sypnopsis_length'] = anime_description['sypnopsis'].apply(lambda x: len(str(x)))

In [None]:
# Filter out the anime with less than 100 characters and no synopsis information
anime_description = anime_description[(anime_description.sypnopsis_length>100) & 
                                      (anime_description.sypnopsis.str.contains("No synopsis information has been")==False)]

In [24]:
anime_description

Unnamed: 0,MAL_ID,Name,Score,Genres,sypnopsis,sypnopsis_length
0,1,Cowboy Bebop,8.78,"Action, Adventure, Comedy, Drama, Sci-Fi, Space","In the year 2071, humanity has colonized sever...",1063
1,5,Cowboy Bebop: Tengoku no Tobira,8.39,"Action, Drama, Mystery, Sci-Fi, Space","other day, another bounty—such is the life of ...",857
2,6,Trigun,8.24,"Action, Sci-Fi, Adventure, Comedy, Drama, Shounen","Vash the Stampede is the man with a $$60,000,0...",1005
3,7,Witch Hunter Robin,7.27,"Action, Mystery, Police, Supernatural, Drama, ...",ches are individuals with special powers like ...,435
4,8,Bouken Ou Beet,6.98,"Adventure, Fantasy, Shounen, Supernatural",It is the dark century and the people are suff...,721
...,...,...,...,...,...,...
16205,48466,Kyoukai Senki,Unknown,"Action, Mecha","In the year 2061 AD, Japan has lost its sovere...",540
16206,48470,D_Cide Traumerei,Unknown,"Action, Adventure, Drama, Magic, Fantasy",The stage is Shibuya. When Ryuuhei Oda was in ...,478
16207,48471,Tsuki to Laika to Nosferatu,Unknown,"Sci-Fi, Space, Vampire",The first astronaut in human history was a vam...,935
16210,48483,Mieruko-chan,Unknown,"Comedy, Horror, Supernatural",ko is a typical high school student whose life...,548


# sentence to embedding

In [None]:
import numpy as np 
from sentence_transformers import SentenceTransformer

# 載入模型
model = SentenceTransformer('all-mpnet-base-v2')

# 範例句子
sentences = anime_description.sypnopsis.tolist()

# 將句子轉成 embedding
embeddings = model.encode(sentences)

# 將 embedding 儲存到檔案
np.save('data/anime_description_embeddings.npy', embeddings)


In [34]:
embeddings.shape

(11722, 768)

# qdrent table create

In [79]:
from qdrant_client import QdrantClient
from qdrant_client.http.models import VectorParams

# 連線到本地 Qdrant
client = QdrantClient(host="localhost", port=6333)


In [75]:
# 建立一個向量集合（collection）
client.recreate_collection(
    collection_name="anime_description_collection",
    vectors_config=VectorParams(size=768, distance="Cosine")  # size 對應 embedding 維度
)

print("Qdrant collection created successfully!")

  client.recreate_collection(


Qdrant collection created successfully!


In [76]:
# 查看 qdrant 的 collections
print(client.get_collections())

collections=[CollectionDescription(name='anime_description_collection')]


# update embeddings to qdrant

In [77]:
from qdrant_client.http.models import PointStruct
from tqdm import tqdm  # 進度條套件

# 假設已經有 embedding 向量

metadatas = anime_description[['MAL_ID', 'Name']].to_dict(orient="records")

def batch_upsert(client, collection_name, embeddings, metadatas, batch_size=100):
    """
    分批將向量和 metadata 上傳到 Qdrant，MAL_ID 當作 id，並顯示進度條
    """
    total = len(embeddings)
    for start in tqdm(range(0, total, batch_size), desc="Uploading batches"):
        end = min(start + batch_size, total)
        batch_embeddings = embeddings[start:end]
        batch_metadata = metadatas[start:end]

        points = [
            PointStruct(id=metadata['MAL_ID'], vector=vec, payload=metadata)
            for vec, metadata in zip(batch_embeddings, batch_metadata)
        ]
        client.upsert(collection_name=collection_name, points=points)

# 範例使用
batch_upsert(client, "anime_description_collection", embeddings, metadatas, batch_size=100)


Uploading batches: 100%|██████████| 118/118 [00:11<00:00, 10.54it/s]


# del collection

In [61]:
# 刪除整個 collection
collection_name = 'anime_description_collection'

client.delete_collection(collection_name=collection_name)

print(f"Collection '{collection_name}' 已刪除！")

Collection 'anime_description_collection' 已刪除！


# 查詢相似動畫

In [48]:
# Dragon Ball 七龍珠為例
anime_description[anime_description.Name.str.contains('Dragon Ball')].head(5)

Unnamed: 0,MAL_ID,Name,Score,Genres,sypnopsis,sypnopsis_length
192,223,Dragon Ball,8.0,"Adventure, Comedy, Fantasy, Martial Arts, Shou...",Gokuu Son is a young boy who lives in the wood...,1153
193,225,Dragon Ball GT,6.48,"Action, Sci-Fi, Adventure, Comedy, Super Power...",Emperor Pilaf finally has his hands on the Bla...,711
458,502,Dragon Ball Movie 1: Shen Long no Densetsu,6.83,"Action, Adventure, Comedy, Fantasy, Shounen, S...","lling of Dragon Ball's origins, this is a diff...",287
716,813,Dragon Ball Z,8.16,"Action, Adventure, Comedy, Fantasy, Martial Ar...",Five years after winning the World Martial Art...,1016
784,891,Dragon Ball Movie 2: Majinjou no Nemurihime,6.79,"Action, Adventure, Comedy, Fantasy, Sci-Fi, Sh...",Goku and Kuririn are given an assignment by Ka...,221


In [69]:
from qdrant_client import QdrantClient

# 初始化
collection_name = "anime_description_collection"
input_mal_id = 223

search_result = client.retrieve(
    collection_name=collection_name,
    ids=[input_mal_id],
    with_payload=True,
    with_vectors=True
)

if not search_result:
    raise ValueError(f"MAL_ID {input_mal_id} not found in collection.")

# 拿到 embedding
query_vector = search_result[0].vector

# 2️⃣ 搜尋最相似的 10 個
similar_results = client.search(
    collection_name=collection_name,
    query_vector=query_vector,
    limit=10,
    with_payload=True
)

# 3️⃣ 顯示結果
print(f"Top 10 similar MAL_IDs to {input_mal_id}:")
for r in similar_results:
    mal_id = r.payload["MAL_ID"]
    name = r.payload.get("Name", "")
    score = r.score
    print(f"MAL_ID: {mal_id}, Name: {name}, Score: {score:.4f}")


Top 10 similar MAL_IDs to 223:
MAL_ID: 223, Name: Dragon Ball, Score: 1.0000
MAL_ID: 6033, Name: Dragon Ball Kai, Score: 0.7925
MAL_ID: 813, Name: Dragon Ball Z, Score: 0.7782
MAL_ID: 36946, Name: Dragon Ball Super Movie: Broly, Score: 0.6664
MAL_ID: 225, Name: Dragon Ball GT, Score: 0.6609
MAL_ID: 14837, Name: Dragon Ball Z Movie 14: Kami to Kami, Score: 0.6578
MAL_ID: 30694, Name: Dragon Ball Super, Score: 0.6364
MAL_ID: 2409, Name: Gokudou-kun Manyuuki, Score: 0.6321
MAL_ID: 894, Name: Dragon Ball Z Movie 01: Ora no Gohan wo Kaese!!, Score: 0.6295
MAL_ID: 20189, Name: Boku no Son Gokuu, Score: 0.6082


  similar_results = client.search(


In [1]:
from anime_recommender import AnimeRecommender

# 建立推薦系統
recommender = AnimeRecommender()
recommender.setup_system()

# 使用 MAL_ID 進行推薦
mal_id = 223  # Dragon Ball
recommendations = recommender.recommend_by_mal_id(mal_id, limit=10)
recommender.display_recommendations(recommendations)

  from .autonotebook import tqdm as notebook_tqdm


=== 動漫推薦系統設定 ===

1. 處理動漫資料...
載入資料: data/anime_with_synopsis.csv
原始資料筆數: 16214
篩選後資料筆數: 11722

2. 生成文本向量...
載入向量: data/anime_description_embeddings.npy
向量載入完成，形狀: (11722, 768)

3. 設定向量資料庫...
連接到 Qdrant: localhost:6333
連線建立成功
使用既有集合: anime_description_collection_test

=== 系統設定完成 ===

=== 推薦結果 (共 10 部) ===
 1. Dragon Ball (MAL_ID: 223) - 相似度: 1.0000
 2. Dragon Ball Kai (MAL_ID: 6033) - 相似度: 0.7925
 3. Dragon Ball Z (MAL_ID: 813) - 相似度: 0.7782
 4. Dragon Ball Super Movie: Broly (MAL_ID: 36946) - 相似度: 0.6664
 5. Dragon Ball GT (MAL_ID: 225) - 相似度: 0.6609
 6. Dragon Ball Z Movie 14: Kami to Kami (MAL_ID: 14837) - 相似度: 0.6578
 7. Dragon Ball Super (MAL_ID: 30694) - 相似度: 0.6364
 8. Gokudou-kun Manyuuki (MAL_ID: 2409) - 相似度: 0.6321
 9. Dragon Ball Z Movie 01: Ora no Gohan wo Kaese!! (MAL_ID: 894) - 相似度: 0.6295
10. Boku no Son Gokuu (MAL_ID: 20189) - 相似度: 0.6082


In [10]:
anime_description[anime_description.Name.str.contains('Conan')]

Unnamed: 0,MAL_ID,Name,Score,Genres,sypnopsis
203,235,Detective Conan,8.16,"Adventure, Mystery, Comedy, Police, Shounen","Shinichi Kudou, a high school student of astou..."
270,302,Mirai Shounen Conan,8.09,"Adventure, Drama, Sci-Fi",Conan was the only child born on Remnant Islan...
691,779,Detective Conan Movie 01: The Timed Skyscraper,7.87,"Adventure, Mystery, Comedy, Police, Shounen",Conan Edogawa is facing a dilemma: Ran Mouri h...
692,780,Detective Conan Movie 02: The Fourteenth Target,7.9,"Adventure, Mystery, Comedy, Police, Shounen",mysterious attacker has appeared and is assaul...
693,781,Detective Conan Movie 03: The Last Wizard of t...,8.04,"Adventure, Mystery, Comedy, Police, Shounen",Kaitou Kid dares to challenge the police once ...
...,...,...,...,...,...
13864,38770,Detective Conan Movie 23: The Fist of Blue Sap...,7.82,"Action, Mystery, Comedy, Police, Drama, Shounen",23rd Detective Conan Movie.
13878,38788,Detective Conan: Amuro Secret Call,6.24,"Mystery, Comedy, Police",Special two-part bonus video included on the L...
14520,39764,Detective Conan Movie 24: Hiiro no Dangan,Unknown,"Action, Mystery, Comedy, Police, Drama, Shounen",24th Detective Conan Movie.
14816,40348,Shounen Sunday CM: Detective Conan,5.55,"Mystery, Shounen",Detective Conan television commercial for Shou...
