In [4]:
import pandas as pd
import numpy as np

# Generate synthetic ratings for 100 users and 300 manga
df_combined = pd.read_csv('../model/final_manga.csv')
num_users = 100
num_manga = len(df_combined)

ratings = pd.DataFrame({
    'user_id': np.random.randint(1, num_users+1, size=1000),
    'manga_id': np.random.randint(1, num_manga+1, size=1000),
    'rating': np.random.randint(1, 6, size=1000)  # ratings between 1 and 5
})

ratings.head()


Unnamed: 0,user_id,manga_id,rating
0,18,825,5
1,37,665,4
2,38,894,4
3,30,247,5
4,9,493,1


In [5]:
ratings.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 3 columns):
 #   Column    Non-Null Count  Dtype
---  ------    --------------  -----
 0   user_id   1000 non-null   int32
 1   manga_id  1000 non-null   int32
 2   rating    1000 non-null   int32
dtypes: int32(3)
memory usage: 11.8 KB


In [6]:
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split
from surprise import accuracy

# Define the reader
reader = Reader(rating_scale=(1, 5))

# Load dataframe
data = Dataset.load_from_df(ratings[['user_id', 'manga_id', 'rating']], reader)

# Split train/test
trainset, testset = train_test_split(data, test_size=0.2)

# Use SVD (Matrix Factorization)
cf_model = SVD()
cf_model.fit(trainset)

# Evaluate
predictions = cf_model.test(testset)
print("RMSE:", accuracy.rmse(predictions))


RMSE: 1.4387
RMSE: 1.4387349763560835


In [7]:
def get_cf_recommendations(user_id, top_n=5):
    """Return top N manga recommendations for a given user using CF."""
    # Predict ratings for all manga
    all_manga_ids = df_combined.index + 1  # assuming manga_id matches index
    predictions = [cf_model.predict(user_id, manga_id) for manga_id in all_manga_ids]

    # Sort by estimated rating
    predictions.sort(key=lambda x: x.est, reverse=True)
    top_preds = predictions[:top_n]

    # Return manga details
    manga_indices = [int(pred.iid)-1 for pred in top_preds]
    return df_combined.iloc[manga_indices][['title', 'genres', 'image_url']]


In [11]:
from sklearn.metrics.pairwise import cosine_similarity

# Already computed tfidf_matrix & cosine_sim earlier
# cosine_sim[i][j] = similarity between manga i and manga j

def hybrid_recommendations(user_id, input_title, alpha=0.5, top_n=5):
    """
    Combine content-based and collaborative filtering recommendations.
    alpha: weight for content-based (0.5 = equal balance)
    """
    # ---- Content-Based Scores ----
    if input_title not in df_combined['title'].values:
        raise ValueError(f"'{input_title}' not found in dataset.")
    
    idx = df_combined[df_combined['title'] == input_title].index[0]
    cb_scores = cosine_sim[idx]  # vector of similarity to all manga

    # ---- Collaborative Filtering Scores ----
    all_manga_ids = df_combined.index + 1
    cf_scores = np.array([cf_model.predict(user_id, manga_id).est for manga_id in all_manga_ids])

    # ---- Combine ----
    final_scores = alpha * cb_scores + (1 - alpha) * cf_scores

    # Sort and get top N
    top_indices = final_scores.argsort()[::-1][1:top_n+1]

    return df_combined.iloc[top_indices][['title', 'genres', 'image_url']]


In [12]:
hybrid_recs = hybrid_recommendations(user_id=10, input_title="Naruto", alpha=0.6, top_n=5)
print(hybrid_recs)



NameError: name 'cosine_sim' is not defined

In [2]:
import requests

url=f"https://api.jikan.moe/v4/manga?page={1}&limit=25"
response = requests.get(url)
response.raise_for_status()
data = response.json()["data"]

In [3]:
print(data)

[{'mal_id': 1, 'url': 'https://myanimelist.net/manga/1/Monster', 'images': {'jpg': {'image_url': 'https://cdn.myanimelist.net/images/manga/3/258224.jpg', 'small_image_url': 'https://cdn.myanimelist.net/images/manga/3/258224t.jpg', 'large_image_url': 'https://cdn.myanimelist.net/images/manga/3/258224l.jpg'}, 'webp': {'image_url': 'https://cdn.myanimelist.net/images/manga/3/258224.webp', 'small_image_url': 'https://cdn.myanimelist.net/images/manga/3/258224t.webp', 'large_image_url': 'https://cdn.myanimelist.net/images/manga/3/258224l.webp'}}, 'approved': True, 'titles': [{'type': 'Default', 'title': 'Monster'}, {'type': 'Japanese', 'title': 'MONSTER'}, {'type': 'English', 'title': 'Monster'}], 'title': 'Monster', 'title_english': 'Monster', 'title_japanese': 'MONSTER', 'title_synonyms': [], 'type': 'Manga', 'chapters': 162, 'volumes': 18, 'status': 'Finished', 'publishing': False, 'published': {'from': '1994-12-05T00:00:00+00:00', 'to': '2001-12-20T00:00:00+00:00', 'prop': {'from': {'day

In [4]:
records = []
for item in data:
        records.append({
            "title": item["title"],
            "synopsis": item.get("synopsis", ""),
            "genres": " ".join([g["name"] for g in item.get("genres", [])]),
            "score": item.get("score", 0),
            "image_url": item["images"]["jpg"]["image_url"],
            "id":item["mal_id"]
        })

In [5]:
records

[{'title': 'Monster',
  'synopsis': "Kenzou Tenma, a renowned Japanese neurosurgeon working in post-war Germany, faces a difficult choice: to operate on Johan Liebert, an orphan boy on the verge of death, or on the mayor of Düsseldorf. In the end, Tenma decides to gamble his reputation by saving Johan, effectively leaving the mayor for dead.\n\nAs a consequence of his actions, hospital director Heinemann strips Tenma of his position, and Heinemann's daughter Eva breaks off their engagement. Disgraced and shunned by his colleagues, Tenma loses all hope of a successful career—that is, until the mysterious killing of Heinemann gives him another chance.\n\nNine years later, Tenma is the head of the surgical department and close to becoming the director himself. Although all seems well for him at first, he soon becomes entangled in a chain of gruesome murders that have taken place throughout Germany. The culprit is a monster—the same one that Tenma saved on that fateful day nine years ago.\