In [None]:

# recommendation_runner_v2.py
import pandas as pd
import numpy as np
import pickle

# Load model and supporting files
with open('best_webtoon_model (1).pkl', 'rb') as f:
    model = pickle.load(f)

with open('top_items.pkl', 'rb') as f:
    top_items = pickle.load(f)

with open('item_features.pkl', 'rb') as f:
    item_features = pickle.load(f)

synopsis_embeddings = np.load('synopsis_embeddings.npy')

# Load full DataFrame for metadata lookup
df = pd.read_csv("webtoon_originals_en.csv")

# Content-based recommendation with genre
def content_based_recommendation(input_titles, genre=None, top_n=10):
    idx_inputs = [top_items[top_items['title'] == t].index[0] for t in input_titles if t in top_items['title'].values]
    input_vectors = synopsis_embeddings[idx_inputs] if idx_inputs else []

    if genre:
        genre_matches = df[df['genre'].str.lower().str.contains(genre.lower())]
        genre_idx = [top_items[top_items['title'] == t].index[0] for t in genre_matches['title'] if t in top_items['title'].values]
        genre_vectors = synopsis_embeddings[genre_idx] if genre_idx else []
        if len(input_vectors):
            input_vectors = np.vstack([input_vectors, genre_vectors])
        else:
            input_vectors = genre_vectors

    if len(input_vectors) == 0:
        return ["Tidak ada judul yang valid ditemukan."]

    avg_vector = np.mean(input_vectors, axis=0).reshape(1, -1)
    similarities = np.dot(synopsis_embeddings, avg_vector.T).flatten()
    for idx in idx_inputs:
        similarities[idx] = -1
    top_indices = np.argsort(-similarities)[:top_n]
    return df[df['title'].isin(top_items['title'].iloc[top_indices].tolist())][['title', 'genre', 'rating', 'synopsis', 'authors']]

# Hybrid recommendation
def hybrid_recommendation(user_id, input_titles, top_n=10):
    user_idx = 0
    item_labels = list(top_items['title'])
    idx_inputs = [item_labels.index(t) for t in input_titles if t in item_labels]
    if not idx_inputs:
        return ["Tidak ada judul yang valid ditemukan."]
    scores = model.predict(user_ids=user_idx, item_ids=np.arange(len(item_labels)), item_features=item_features)
    for idx in idx_inputs:
        scores[idx] = -np.inf
    top_items_idx = np.argsort(-scores)[:top_n]
    return df[df['title'].isin([item_labels[i] for i in top_items_idx])][['title', 'genre', 'rating', 'synopsis', 'authors']]

# CLI interaction
if __name__ == '__main__':
    user_id = 'user_0'
    history_titles = []

    print("\n🎯 Sistem Rekomendasi Webtoon")
    genre_input = input("Masukkan genre favoritmu (e.g. romance, action): ").lower()

    print("\n🔍 Rekomendasi awal berdasarkan genre:")
    rekomendasi_df = content_based_recommendation([], genre=genre_input)
    for i, row in rekomendasi_df.iterrows():
        print(f"\n{i+1}. Title: {row['title']}\n   Genre: {row['genre']}\n   Rating: {row['rating']}\n   Authors: {row['authors']}\n   Synopsis: {row['synopsis']}")

    while True:
        title = input("Masukkan judul Webtoon favoritmu (atau 'exit'): ")
        if title.lower() == 'exit':
            break
        if title not in top_items['title'].values:
            print(f"Judul '{title}' tidak ditemukan.")
            continue

        history_titles.append(title)

        if len(history_titles) < 3:
            print("\n🔍 Rekomendasi content-based (genre + judul):")
            rekomendasi_df = content_based_recommendation(history_titles, genre=genre_input)
        else:
            print("\n🤖 Rekomendasi hybrid (≥3 judul):")
            rekomendasi_df = hybrid_recommendation(user_id, history_titles)

        for i, row in rekomendasi_df.iterrows():
            print(f"\n{i+1}. Title: {row['title']}\n   Genre: {row['genre']}\n   Rating: {row['rating']}\n   Authors: {row['authors']}\n   Synopsis: {row['synopsis']}")
