In [1]:
# Cell 1: Install Libraries & Setup Environment
!pip install -q streamlit pyngrok sentence-transformers
import pandas as pd
import numpy as np
import os
import re
import pickle
import gc
from scipy import stats
from sklearn.preprocessing import MultiLabelBinarizer
import tensorflow
import tensorflow.keras
from numpy.linalg import norm
from google.colab import drive
import requests
import time
import networkx as nx

# Mount Drive and set path
drive.mount('/content/drive')
proj_path = '/content/drive/MyDrive/anime_recommendation_system'
os.chdir(proj_path)
print(f"‚úÖ Setup complete. Working directory: {os.getcwd()}")

Mounted at /content/drive
‚úÖ Setup complete. Working directory: /content/drive/MyDrive/anime_recommendation_system


In [None]:
# Cell 2: One-Time Data Preparation (Now includes Synopsis)
print("Starting one-time data preparation...")

# --- 0. CREATE NEW FOLDER ---
data_save_path = 'app_data'
os.makedirs(data_save_path, exist_ok=True)
print(f"...Created new data folder at: {data_save_path}")

# --- 1. Load Initial Data ---
user = pd.read_pickle('datasets/created_datasets/user.pkl')
anime = pd.read_pickle('datasets/created_datasets/anime.pkl')
print("...Initial data loaded.")

# --- 2. Run Preprocessing ---
anime['Genres_edited'] = anime['Genres'].copy()
anime['Genres_edited'] = anime['Genres_edited'].str.replace('Slice of Life', 'Slice-of-Life')
anime['Genres_edited'] = anime['Genres_edited'].str.replace('Martial Arts', 'Martial-Arts')
anime['Genres_edited'] = anime['Genres_edited'].str.replace('Super Power', 'Super-Power')

def space_split_genres(genres):
    genres_edited = genres.copy()
    for i in range(len(genres)):
        if ' ' in genres[i]:
            genres_edited = genres_edited[0:i] + genres[i].split(' ') + genres[i+1:]
    return ('|').join(sorted(genres_edited))
anime['Genres_edited'] = anime['Genres_edited'].str.split(', ').apply(space_split_genres)

def year_finding(aired):
    if aired == 'Unknown': return None
    match = re.search(r'([1-2][0-9]{3})', aired)
    if match is not None: return int(match.group(0))
    else: return -1

anime['Origin_year'] = anime['Aired'].apply(lambda x: year_finding(str(x))).astype('Int32')
# Only one anime was found to be having None as value, 'Katsudou Shashin' is the name, manually I found its Origin Year.
anime.loc[anime['MAL_ID'] == 33187, 'Origin_year'] = 1907
print("...Base anime processing complete.")

# --- 3. Load and Merge Synopsis Data ---
try:
    synopsis_df = pd.read_csv('datasets/kaggle_dataset/anime_with_synopsis.csv')
    synopsis_df.rename(columns={'sypnopsis': 'synopsis'}, inplace=True)
    # Keep only the columns we need to merge
    synopsis_df = synopsis_df[['MAL_ID', 'synopsis']]

    # Merge with our processed anime dataframe
    anime = anime.merge(synopsis_df, on='MAL_ID', how='left')
    print("...Synopsis data loaded and merged.")
except FileNotFoundError:
    print("...Synopsis file not found. Skipping synopsis merge.")
    anime['synopsis'] = "No synopsis available."
except Exception as e:
    print(f"...Error merging synopsis: {e}. Skipping.")
    anime['synopsis'] = "No synopsis available."

# --- 4. Create and Save Anime Aggregates ---
anime_agg = user.groupby('anime_id')['rating'].agg(['mean', 'count']).reset_index()
anime_agg.columns = ['anime_id', 'anime_avg_rating', 'total_ratings_anime']
anime_agg.to_pickle(os.path.join(data_save_path, 'anime_agg_processed.pkl'))

# Building Popular_adjusted (Popularity column become continuous and have non-repetitive entries)
anime['total_ratings_anime'] = anime['MAL_ID'].map(anime_agg.set_index('anime_id')['total_ratings_anime'])
sorted_indices = anime.sort_values(by=['Popularity', 'total_ratings_anime'], ascending=[True, False]).index
anime.loc[sorted_indices, 'Popularity_adjusted'] = range(1, len(anime) + 1)
anime['Popularity_adjusted'] = anime['Popularity_adjusted'].astype('int')
anime.drop(columns=['total_ratings_anime'], inplace=True)

# --- 5. Save the processed dataframe ---
anime.to_pickle(os.path.join(data_save_path, 'anime_processed.pkl'))
print("...Processed 'anime_processed.pkl' (with synopsis) saved.")

# --- 6. Create and Save Genre MLB File ---
genre_value_counts = pd.Series(('|').join(anime['Genres_edited'].fillna('')).split('|')).value_counts()
genres_list = genre_value_counts.index.tolist()
if 'Unknown' in genres_list: genres_list.remove('Unknown')
genres_mlb_obj = MultiLabelBinarizer(classes=genres_list)
anime_genres_mlb_df = anime[['MAL_ID', 'Genres_edited']].copy()
anime_genres_mlb_temp = pd.DataFrame(
    genres_mlb_obj.fit_transform(anime_genres_mlb_df['Genres_edited'].fillna('').str.split('|')),
    columns=genres_mlb_obj.classes_,
    index=anime_genres_mlb_df.index
)
anime_genres_mlb_df = anime_genres_mlb_df.join(anime_genres_mlb_temp)
anime_genres_mlb_df.to_pickle(os.path.join(data_save_path, 'anime_genres_mlb.pkl'))
with open(os.path.join(data_save_path, 'genres_list.pkl'), 'wb') as f:
    pickle.dump(genres_list, f)
print("...Genre MLB file and genres list saved.")

# --- 7. Create and Save Divided Opinion IDs ---
anime_rating_freq = user.groupby(['anime_id','rating'])['rating'].count().unstack(fill_value = 0)
anime_rating_freq.columns = [str(col) for col in anime_rating_freq.columns]
anime_rating_freq = anime_rating_freq.add_prefix('rating_')
rating_columns = anime_rating_freq.columns
bad_rating_columns  = [f'rating_{i}' for i in range(1, 6) if f'rating_{i}' in rating_columns]
good_rating_columns = [f'rating_{i}' for i in range(8, 11) if f'rating_{i}' in rating_columns]
total_ratings = anime_rating_freq[rating_columns].sum(axis=1)
anime_rating_freq['bad_rating_ratio'] = anime_rating_freq[bad_rating_columns].sum(axis=1) / total_ratings
anime_rating_freq['good_rating_ratio'] = anime_rating_freq[good_rating_columns].sum(axis=1) / total_ratings
anime_rating_freq['bad_to_good_ratio'] = anime_rating_freq['bad_rating_ratio'] / anime_rating_freq['good_rating_ratio']
anime_rating_freq['bad_to_good_ratio'] = anime_rating_freq['bad_to_good_ratio'].replace([np.inf, -np.inf], np.nan)
divided_opinion_cond = ((anime_rating_freq['bad_rating_ratio'] >= 0.25) & (anime_rating_freq['bad_to_good_ratio'].between(0.9, 1.15)))
divided_opinion_animes_mal_ids = anime_rating_freq[divided_opinion_cond].index.values
with open(os.path.join(data_save_path, 'divided_opinion_anime_ids.pkl'), 'wb') as f:
    pickle.dump(divided_opinion_animes_mal_ids, f)
print(f"...Divided Opinion file saved. Found {len(divided_opinion_animes_mal_ids)} animes.")


# --- 8. NEW: EXTRACT AND SAVE MODEL WEIGHTS ---
print("...Loading model to extract weights...")
model = tensorflow.keras.models.load_model('model/checkpoint.model.keras')
try:
    # Get weights from the layer
    weights = model.get_layer('anime_embedding').get_weights()[0]
    # Normalize them immediately to save computation time in the app
    weights_norm = weights / norm(weights, axis=1).reshape((-1, 1))

    # Save as a simple pickle file
    with open(os.path.join(data_save_path, 'anime_model_weights.pkl'), 'wb') as f:
        pickle.dump(weights_norm, f)
    print(f"Weights extracted! Saved 'anime_model_weights.pkl'. The big model file is NOT needed for the app.")
except Exception as e:
    print(f"Error extracting weights: {e}")

# --- 9. Clean up memory ---
del user
del anime_rating_freq
del anime
del anime_agg
del model
del sorted_indices
del anime_genres_mlb_temp
del anime_genres_mlb_df
del divided_opinion_cond
del divided_opinion_animes_mal_ids
del genre_value_counts
del genres_mlb_obj
del weights
del weights_norm
del bad_rating_columns
del good_rating_columns
del total_ratings

gc.collect()

print("\n‚úÖ All data files for the DEPLOYABLE app have been created in 'app_data/'.")

Starting one-time data preparation...
...Created new data folder at: app_data
...Initial data loaded.
...Base anime processing complete.
...Synopsis data loaded and merged.
...Processed 'anime_processed.pkl' (with synopsis) saved.




...Genre MLB file and genres list saved.
...Divided Opinion file saved. Found 208 animes.
...Loading model to extract weights...
Weights extracted! Saved 'anime_model_weights.pkl'. The big model file is NOT needed for the app.

‚úÖ All data files for the DEPLOYABLE app have been created in 'app_data/'.


In [None]:
# Cell 3: Data Enrichment (Fetch Posters)

print("Starting data enrichment")

# Define the path to our data
data_path = 'app_data'
anime_processed_path = os.path.join(data_path, 'anime_processed.pkl')
anime_with_posters_path = os.path.join(data_path, 'anime_with_posters.pkl')

# Load the dataframe we just created
anime = pd.read_pickle(anime_processed_path)

# Create a new column for the image URL if it doesn't exist
if 'image_url' not in anime.columns:
    anime['image_url'] = None

# Define the Jikan API endpoint
JIKAN_API_URL = "https://api.jikan.moe/v4/anime/{}/pictures"

i = 0
# --- Loop through each anime to get its poster URL ---
for index, row in anime.iterrows():

    i += 1
    if i % 100 == 0:
        print(f"Processed {i} animes")
    mal_id = row['MAL_ID']

    # Only fetch if we don't already have the URL
    if pd.isna(row['image_url']):
        try:
            # 1. Call the API
            response = requests.get(JIKAN_API_URL.format(mal_id))

            if response.status_code == 200:
                # 2. Get the poster URL from the JSON
                data = response.json()
                if data['data']: # Check if data is not empty
                    poster_url = data['data'][0]['jpg']['image_url']
                    anime.at[index, 'image_url'] = poster_url
                else:
                    anime.at[index, 'image_url'] = "NOT_FOUND"

            else:
                anime.at[index, 'image_url'] = "NOT_FOUND"

            time.sleep(1) # 1 request per second to avoid rate limiting

        except Exception as e:
            print(f"üö® Error on ID {mal_id}: {e}")
            anime.at[index, 'image_url'] = "NOT_FOUND"
            time.sleep(5) # Wait longer if there's an error

# --- Save the final, enriched dataframe ---
anime.to_pickle(anime_with_posters_path)

print(f"\n--- ENRICHMENT COMPLETE! ---")
print(f"‚úÖ Successfully saved enriched data to '{anime_with_posters_path}'.")

Starting data enrichment
Processed 100 animes
Processed 200 animes
Processed 300 animes
Processed 400 animes
Processed 500 animes
Processed 600 animes
Processed 700 animes
Processed 800 animes
Processed 900 animes
Processed 1000 animes
Processed 1100 animes
Processed 1200 animes
Processed 1300 animes
Processed 1400 animes
Processed 1500 animes
Processed 1600 animes
Processed 1700 animes
Processed 1800 animes
Processed 1900 animes
Processed 2000 animes
Processed 2100 animes
Processed 2200 animes
Processed 2300 animes
Processed 2400 animes
Processed 2500 animes
Processed 2600 animes
Processed 2700 animes
Processed 2800 animes
Processed 2900 animes
Processed 3000 animes
Processed 3100 animes
Processed 3200 animes
Processed 3300 animes
Processed 3400 animes
Processed 3500 animes
Processed 3600 animes
Processed 3700 animes
Processed 3800 animes
Processed 3900 animes
Processed 4000 animes
Processed 4100 animes
Processed 4200 animes
Processed 4300 animes
Processed 4400 animes
Processed 4500 a

In [None]:
# Cell 3b: Data Enrichment (Fetch Relations)


print("Starting relationship mapping")

# --- 1. SETUP PATHS ---
DATA_PATH = 'app_data'
anime_with_posters_path = os.path.join(DATA_PATH, 'anime_with_posters.pkl')
relations_save_path = os.path.join(DATA_PATH, 'anime_relations.pkl')

# --- 2. LOAD DATA ---
anime = pd.read_pickle(anime_with_posters_path)

# --- 3. INIT DICTIONARY ---
anime_relations = {}

# --- 4. CONFIGURATION ---
JIKAN_RELATIONS_URL = "https://api.jikan.moe/v4/anime/{}/relations"

# Anime Related types to ignore, cause weak/no relations at all to anime story
EXCLUDED_RELATION_TYPES = ['Other', 'Character']

for index, row in anime.iterrows():

    mal_id = row['MAL_ID']

    try:
        response = requests.get(JIKAN_RELATIONS_URL.format(mal_id))

        if response.status_code == 200:
            data = response.json().get('data', [])

            related_ids = []
            if data:
                for relation_group in data:
                    relation_type = relation_group.get('relation')

                    # --- FILTER 1: Skip 'Other' and 'Character' relations ---
                    if relation_type in EXCLUDED_RELATION_TYPES:
                        continue

                    for entry in relation_group['entry']:
                        # --- FILTER 2: Skip Manga/Light Novels ---
                        if entry['type'] == 'anime':
                            related_ids.append(entry['mal_id'])

            anime_relations[mal_id] = related_ids

        else:
            anime_relations[mal_id] = []

        time.sleep(1)

    except Exception as e:
        print(f"üö® Exception for {mal_id}: {e}")
        time.sleep(5)

# --- 5. SAVE ---
with open(relations_save_path, 'wb') as f:
    pickle.dump(anime_relations, f)

In [None]:
# Cell 3c: Expand Relationships

print("Starting relationship expansion...")

# 1. Setup Paths
DATA_PATH = 'app_data'
original_relations_path = os.path.join(DATA_PATH, 'anime_relations.pkl')
expanded_relations_path = os.path.join(DATA_PATH, 'anime_relations_expanded.pkl')
anime_processed_path = os.path.join(DATA_PATH, 'anime_with_posters.pkl') # Need this for validation

# 2. Load Data
with open(original_relations_path, 'rb') as f:
    raw_relations = pickle.load(f)

# Load our actual anime list to validate IDs
anime_df = pd.read_pickle(anime_processed_path)
valid_anime_ids = set(anime_df['MAL_ID'].unique())

# Necessary data updation
if 6115 in raw_relations[1412]:
    raw_relations[1412].remove(6115)

# 3. Build the Graph (With Validation)
G = nx.Graph()

for anime_id, related_ids in raw_relations.items():
    # Only add the source node if it's in our dataset
    if anime_id in valid_anime_ids:
        G.add_node(anime_id)

        for rel_id in related_ids:
            if rel_id in valid_anime_ids:
                G.add_edge(anime_id, rel_id)

print(f"Graph built. Found {G.number_of_nodes()} valid connected anime.")

# 4. Extract Families
expanded_relations = {}
for family in nx.connected_components(G):
    family_list = list(family)
    for anime_id in family_list:
        expanded_relations[anime_id] = family_list

# 5. Save
with open(expanded_relations_path, 'wb') as f:
    pickle.dump(expanded_relations, f)

print("‚úÖ Expansion Complete! Data is now clean and strictly anime-only.")

Starting relationship expansion...
Graph built. Found 6356 valid connected anime.
‚úÖ Expansion Complete! Data is now clean and strictly anime-only.


In [2]:
# Cell 4: Create the 'recommender.py' Backend
%%writefile streamlit_app/recommender.py

import pandas as pd
import numpy as np
import pickle
import os
from sentence_transformers import util
from numpy.linalg import norm

# --- 1. LOAD ALL FILES ---
try:
    SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
    PROJ_ROOT = os.path.abspath(os.path.join(SCRIPT_DIR, '..'))
    DATA_PATH = os.path.join(PROJ_ROOT, 'app_data')

    # Load DataFrames
    anime = pd.read_pickle(os.path.join(DATA_PATH, 'anime_with_posters.pkl'))
    anime_agg = pd.read_pickle(os.path.join(DATA_PATH, 'anime_agg_processed.pkl'))
    content_df = pd.read_pickle(os.path.join(PROJ_ROOT, 'datasets/created_datasets/content_df_model.pkl'))
    anime_genres_mlb = pd.read_pickle(os.path.join(DATA_PATH, 'anime_genres_mlb.pkl'))

    # Load IDs and Lists
    with open(os.path.join(DATA_PATH, 'divided_opinion_anime_ids.pkl'), 'rb') as f:
        divided_opinion_ids = pickle.load(f)
    with open(os.path.join(DATA_PATH, 'genres_list.pkl'), 'rb') as f:
        genres_list = pickle.load(f)

    # --- NEW: Load Relations Dictionary ---
    with open(os.path.join(DATA_PATH, 'anime_relations_expanded.pkl'), 'rb') as f:
        anime_relations = pickle.load(f)

    # Filter content_df
    content_df = content_df[content_df['MAL_ID'].isin(anime['MAL_ID'].unique())]

    # Load Weights
    with open(os.path.join(DATA_PATH, 'anime_model_weights.pkl'), 'rb') as f:
        anime_weights = pickle.load(f)

    # Load Encodings
    with open(os.path.join(PROJ_ROOT, 'datasets/created_datasets/encoded_dictionary/anime2anime_encoded.pkl'), 'rb') as f:
        anime2anime_encoded = pickle.load(f)
    with open(os.path.join(PROJ_ROOT, 'datasets/created_datasets/encoded_dictionary/anime_encoded2anime.pkl'), 'rb') as f:
        anime_encoded2anime = pickle.load(f)
    encoded_dictionary = {'anime2anime_encoded': anime2anime_encoded, 'anime_encoded2anime': anime_encoded2anime}

except Exception as e:
    print(f"Error loading files: {e}")
    anime = pd.DataFrame()
    anime_weights = None
    anime_relations = {}

# --- 2. CORE LOGIC FUNCTIONS ---
def get_anime_id_from_name(name, anime_df):
    try: return anime_df[anime_df['Name'].str.lower() == name.lower()]['MAL_ID'].values[0]
    except:
        try: return anime_df[anime_df['English name'].str.lower() == name.lower()]['MAL_ID'].values[0]
        except: return None

def get_anime_details(name, anime_df, anime_agg_df):
    anime_id = get_anime_id_from_name(name, anime_df)
    if anime_id is None: return None
    anime_info = anime_df[anime_df['MAL_ID'] == anime_id].copy()
    anime_info = anime_info.merge(anime_agg_df[['anime_id', 'anime_avg_rating']], left_on='MAL_ID', right_on='anime_id', how='left')
    if anime_info.empty: return None
    return anime_info

# --- 3. FILTER FUNCTION ---
def filter_recommendations(recommendations_df, anime_df, anime_agg_df, **filters):
    if recommendations_df is None or recommendations_df.empty:
        return recommendations_df

    if 'MAL_ID' not in recommendations_df.columns:
         recommendations_df.rename(columns={'anime_id': 'MAL_ID'}, inplace=True)

    needed_info_cols = ['Name', 'English name', 'Type', 'Genres_edited', 'Origin_year', 'Popularity_adjusted', 'image_url', 'synopsis']
    current_cols = set(recommendations_df.columns)
    missing_cols = set(needed_info_cols) - current_cols
    rec_with_info = recommendations_df.copy()

    if len(missing_cols) > 0:
        cols_to_merge = list(missing_cols) + ['MAL_ID']
        rec_with_info = rec_with_info.merge(anime_df[cols_to_merge], on='MAL_ID', how='left')

    rec_with_info = rec_with_info.merge(anime_agg_df[['anime_id', 'anime_avg_rating']], left_on='MAL_ID', right_on='anime_id', how='left')

    if filters.get('Type_preferred'):
        rec_with_info = rec_with_info[rec_with_info['Type'].isin(filters['Type_preferred'])]

    if filters.get('Genres_preferred'):
        genres_set = set(filters['Genres_preferred'])
        rec_with_info = rec_with_info[rec_with_info['Genres_edited'].fillna('').str.split('|').apply(lambda x: genres_set.issubset(x))]

    if filters.get('Origin_year_range'):
        year_range = filters['Origin_year_range']
        rec_with_info = rec_with_info[(rec_with_info['Origin_year'] >= year_range[0]) & (rec_with_info['Origin_year'] <= year_range[1])]

    if filters.get('min_anime_rating'):
        rec_with_info['anime_avg_rating'] = pd.to_numeric(rec_with_info['anime_avg_rating'], errors='coerce')
        rec_with_info = rec_with_info.dropna(subset=['anime_avg_rating'])
        rec_with_info = rec_with_info[rec_with_info['anime_avg_rating'] >= filters['min_anime_rating']]

    if filters.get('popularity_range'):
        pop_range = filters['popularity_range']
        rec_with_info = rec_with_info[
            (rec_with_info['Popularity_adjusted'] >= pop_range[0]) &
            (rec_with_info['Popularity_adjusted'] <= pop_range[1])
        ]

    return rec_with_info.reset_index(drop=True)


# --- 4. GETTER FUNCTIONS ---
def model_rec_based_on_anime_similarity(name, anime_df, weights, enc_dict):
    if weights is None: return None
    anime_id = get_anime_id_from_name(name, anime_df)
    if anime_id is None: return None
    encoded_index = enc_dict['anime2anime_encoded'].get(anime_id)
    if encoded_index is None: return None

    dists = np.dot(weights, weights[encoded_index])

    dists_df = pd.DataFrame(dists, columns=['similarity_model'])
    dists_df['MAL_ID'] = dists_df.index.map(enc_dict['anime_encoded2anime'])
    dists_df = dists_df[dists_df['MAL_ID'] != anime_id]
    return dists_df.sort_values(by='similarity_model', ascending=False)

def content_based_rec(name, anime_df, content_df):
    anime_id = get_anime_id_from_name(name, anime_df)
    if anime_id is None: return None
    try:
        user_anime_vector = content_df.loc[content_df['MAL_ID'] == anime_id, 'plot_embeddings'].values[0]
        scores = util.pytorch_cos_sim(user_anime_vector, np.array(content_df['plot_embeddings'].tolist()))[0]
        results = pd.DataFrame({'MAL_ID': content_df['MAL_ID'], 'similarity': scores.numpy()})
        results = results[results['MAL_ID'] != anime_id]
        return results.sort_values(by='similarity', ascending=False)
    except (IndexError, KeyError): return None

def rec_based_on_genre_similarity(name, anime_df, anime_genre_mlb_df):
    anime_id = get_anime_id_from_name(name, anime_df)
    if anime_id is None: return None
    try:
        if 'MAL_ID' not in anime_genre_mlb_df.columns: return None
        genre_columns = anime_genre_mlb_df.columns[2:]
        selected_anime_genre_vector = anime_genre_mlb_df.loc[anime_genre_mlb_df['MAL_ID'] == anime_id, genre_columns].values.reshape(-1, 1)
        anime_genre_array = anime_genre_mlb_df.loc[:, genre_columns].values

        similarity_scores = np.dot(anime_genre_array, selected_anime_genre_vector).reshape(-1)
        norm_factor = norm(anime_genre_array, axis=1) * norm(selected_anime_genre_vector)
        norm_factor = np.where(norm_factor == 0, 1e-6, norm_factor)
        similarity_scores = similarity_scores / norm_factor

        results = pd.DataFrame({'MAL_ID': anime_genre_mlb_df['MAL_ID'], 'similarity_genre': similarity_scores})
        results = results[results['MAL_ID'] != anime_id]
        return results.sort_values(by='similarity_genre', ascending=False)
    except (IndexError, KeyError): return None

def rec_based_on_comb_of_genre_sim_and_model(name, anime_df, weights, enc_dict, genre_mlb_df, threshold=0.5):
    if weights is None: return None
    rec_model = model_rec_based_on_anime_similarity(name, anime_df, weights, enc_dict)
    rec_genre = rec_based_on_genre_similarity(name, anime_df, genre_mlb_df)
    if rec_model is None or rec_genre is None: return None
    comb_rec = pd.merge(rec_genre, rec_model, on='MAL_ID', how='inner')
    comb_rec = comb_rec[comb_rec['similarity_genre'] >= threshold]
    return comb_rec.sort_values(by='similarity_model', ascending=False)

def get_divided_opinion_animes(anime_df, ids_list):
    if anime_df.empty: return pd.DataFrame()
    divided_df = anime_df[anime_df['MAL_ID'].isin(ids_list)]
    return divided_df

# --- 5. MAIN ORCHESTRATOR FUNCTIONS ---
def get_recommendations_by_name(name, rec_type, top_n=10, remove_related=False, **filters):
    if anime.empty or anime_weights is None: return pd.DataFrame()

    results_df = None
    if rec_type == 'Model-Based Similarity':
        results_df = model_rec_based_on_anime_similarity(name, anime, anime_weights, encoded_dictionary)
    elif rec_type == 'Content (Plot) Similarity':
        results_df = content_based_rec(name, anime, content_df)
    elif rec_type == 'Combined Model + Genre':
        results_df = rec_based_on_comb_of_genre_sim_and_model(name, anime, anime_weights, encoded_dictionary, anime_genres_mlb, threshold=filters.get('genre_threshold', 0.5))

    if results_df is None or results_df.empty: return None

    # --- NEW: Filter out related anime if requested ---
    if remove_related:
        input_id = get_anime_id_from_name(name, anime)
        # Check if we have relations data for this anime
        if input_id in anime_relations:
            related_ids = anime_relations[input_id]
            # Ensure proper column name for filtering
            if 'MAL_ID' not in results_df.columns and 'anime_id' in results_df.columns:
                 results_df.rename(columns={'anime_id': 'MAL_ID'}, inplace=True)

            # Remove any recommendation whose ID is in the related list
            if 'MAL_ID' in results_df.columns:
                results_df = results_df[~results_df['MAL_ID'].isin(related_ids)]
    # ------------------------------------------------

    filtered_results = filter_recommendations(results_df, anime, anime_agg, **filters)

    return filtered_results.head(top_n)[['Name', 'Genres_edited', 'Type', 'Origin_year', 'anime_avg_rating', 'Popularity_adjusted', 'image_url', 'synopsis']]

def get_discover_animes(top_n=20):
    if anime.empty: return pd.DataFrame()
    divided_df = get_divided_opinion_animes(anime, divided_opinion_ids)
    divided_df = divided_df.merge(anime_agg[['anime_id', 'anime_avg_rating']], left_on='MAL_ID', right_on='anime_id', how='left')
    if top_n > len(divided_df): top_n = len(divided_df)
    return divided_df.sample(n=top_n)[['Name', 'Genres_edited', 'Type', 'Origin_year', 'anime_avg_rating', 'Popularity_adjusted', 'image_url', 'synopsis']]

Overwriting streamlit_app/recommender.py


In [11]:
# Cell 5: Create the FINAL 'app.py' Frontend (With Relation Filter)
%%writefile streamlit_app/app.py

import streamlit as st
import pandas as pd
from recommender import (
    get_recommendations_by_name,
    get_discover_animes,
    anime,
    genres_list,
    get_anime_id_from_name,
    get_anime_details,
    anime_agg
)

# --- 1. UI Configuration ---
st.set_page_config(layout="wide", page_title="Anime Recommendation System")

# --- 2. CUSTOM CSS ---
st.markdown("""
<style>
/* --- FIX #1: GLOBAL LINK HIDER --- */
[data-testid="stHeaderActionElements"] {
    display: none !important;
}
h1, h2, h3 {
    pointer-events: none !important;
    text-decoration: none !important;
}
h1 a, h2 a, h3 a {
    display: none !important;
}

/* --- FIX #2: Rose Pink Buttons (Primary Actions Only) --- */
[data-testid="stSidebarUserContent"] button[kind="primary"] {
    background-color: #C21E56 !important; /* Rose Pink */
    color: white !important;              /* White Text */
    border: none !important;
    font-weight: 900 !important;          /* Max Font Weight */
    font-size: 16px !important;           /* Larger Text */
    text-shadow: 0.3px 0px 0px black;
    transition: 0.2s;

    /* Allow wrapping for primary buttons */
    white-space: normal !important;
    height: auto !important;
    padding-top: 0.5rem !important;
    padding-bottom: 0.5rem !important;
    line-height: 1.2 !important;
}
[data-testid="stSidebarUserContent"] button[kind="primary"]:hover {
    background-color: #A01848 !important; /* Darker Rose on hover */
    color: white !important;
    border: none !important;
}
[data-testid="stSidebarUserContent"] button[kind="primary"] svg {
    fill: white !important;
    color: white !important;
    stroke: white !important;
    stroke-width: 1px;
}

/* --- FIX #3: Secondary Button Styling --- */
/* Ensure standard buttons (like popovers) handle text properly */
[data-testid="stSidebarUserContent"] button[kind="secondary"] {
    white-space: normal !important;
    height: auto !important;
    padding-top: 0.2rem !important;
    padding-bottom: 0.2rem !important;
    line-height: 1.2 !important;
}

/* Main app styling */
[data-testid="stAppViewContainer"] > section:first-of-type {
    padding-top: 1rem !important;
}
[data-testid="stSidebarUserContent"] {
    padding-top: 1.5rem !important;
}

/* Recommendation Box Styling */
[data-testid="stBorderedContainer"] {
    border: 1px solid #2c2f38 !important;
    border-radius: 10px !important;
    padding: 1rem !important;
    margin-bottom: 1rem !important;
}
/* Title inside the box */
.rec-title {
    margin-top: 0rem !important;
    padding-top: 0rem !important;
    margin-bottom: 0.5rem;
    color: #fafafa;
    font-size: 1.25rem;
    font-weight: 600;
}
/* Poster box */
.poster-box {
    width: 150px;
    height: 210px;
    background-size: cover;
    background-position: center center;
    border-radius: 8px;
    border: 1px solid #444;
    margin-bottom: 1.25rem;
}
/* Popover spacing */
[data-testid="stPopoverBody"] h3 {
    margin-bottom: 0.25rem;
}
[data-testid="stPopoverBody"] p {
    margin-bottom: 0.75rem;
}
</style>
""", unsafe_allow_html=True)


# --- 3. Title ---
st.title("üé¨ Anime Recommendation System")
st.write("Created by Rahul Goyal. A deployable recommendation engine.")

# --- 4. Sidebar ---
# --- SECTION 1: SEARCH ---
st.sidebar.header("Find Recommendations")

# Create the list of searchable names
all_names = list(anime['Name'].unique())
english_names = list(anime[anime['English name'] != 'Unknown']['English name'].unique())
search_options = sorted(list(set(all_names + english_names)))

# Use selectbox instead of text_input
anime_name = st.sidebar.selectbox(
    "Enter or Select an Anime:",
    options=search_options,
    index=None,
    placeholder="Type to search..."
)

rec_type = st.sidebar.selectbox(
    "Choose a Recommendation Type:",
    (
        "Model-Based Similarity",
        "Content (Plot) Similarity",
        "Combined Model + Genre"
    )
)

# Popover Button (Standard style with arrow)
with st.sidebar.popover("‚ìò What does this do?"):
    if rec_type == "Model-Based Similarity":
        st.markdown("**Model-Based Similarity:**")
        st.write("Finds anime that other users rated in a similar way. This is a good 'if you liked this, you might also like...' feature based on the tastes of thousands of users.")

    elif rec_type == "Content (Plot) Similarity":
        st.markdown("**Content (Plot) Similarity:**")
        st.write("Finds animes with similar plotlines or similar themes.")

    elif rec_type == "Combined Model + Genre":
        st.markdown("**Combined Model + Genre:**")
        st.write("First finds anime with similar rating patterns, then filters that list to only show ones that also have good genre similarity with the input anime genres.")


show_input_details = st.sidebar.checkbox("Show details for input anime", value=True)

# --- NEW CHECKBOX ---
remove_related = st.sidebar.checkbox("Remove related (prequels/sequels/spin-offs)", value=False)
with st.sidebar.popover("‚ÑπÔ∏è Info"):
    st.markdown("**Remove Related:**")
    st.write('''
    We filter out major relations (Sequels/Prequels/Spin-offs/Side-stories) of the input anime from recommendations.
    \nNote: Loosely related animes listed under categories like 'Other' or 'Character' in related entries on MyAnimeList website will not be removed''')
# --------------------

genre_threshold = 0.5
if rec_type == "Combined Model + Genre":
    genre_threshold = st.sidebar.slider(
        "Min. Genre Similarity:", 0.0, 1.0, 0.5, 0.05,
        help="This is the minimum cosine similarity required with the input anime's genres."
    )
top_n_search = st.sidebar.slider("Number of recommendations:", 5, 20, 10, key="search_slider")

st.sidebar.markdown("---")
st.sidebar.subheader("Filter Your Results (Optional)")
type_options = ['TV', 'Movie', 'OVA', 'Special', 'ONA']
all_genres = sorted(genres_list)
min_year = int(anime['Origin_year'].min())
max_year = int(anime['Origin_year'].max())
min_pop = int(anime['Popularity_adjusted'].min())
max_pop = int(anime['Popularity_adjusted'].max())

genres_preferred = st.sidebar.multiselect("Must include all of these genres:", all_genres)
type_preferred = st.sidebar.multiselect("Must be one of these types:", type_options, default=type_options)
min_anime_rating = st.sidebar.slider("Minimum average user rating:", 0.0, 10.0, 0.0, 0.1)
origin_year_range = st.sidebar.slider("Origin Year:", min_year, max_year, (min_year, max_year))
popularity_range = st.sidebar.slider("Popularity Rank (1 = Most Popular):", min_pop, max_pop, (min_pop, max_pop))

search_button = st.sidebar.button("Get Recommendations", type="primary")


# --- SECTION 2: DISCOVER ---
st.sidebar.markdown("---")
st.sidebar.header("Discover")

# Main action button
discover_button = st.sidebar.button("Show 'Divided Opinion' Anime", type="primary")

# Small info button on the next line
with st.sidebar.popover("‚ÑπÔ∏è Info"):
    st.markdown("**Divided Opinion Anime:**")
    st.write("""
    These are polarizing anime. A similar number of users loved them (rating 8+) as hated them (rating < 5).

    We'll show you a random selection from this pool for you to discover!
    """)

top_n_discover = st.sidebar.slider("Number of Animes:", 5, 30, 10, key="discover_slider")


# --- 5. Store Filters ---
user_filters = {
    "Genres_preferred": genres_preferred if genres_preferred else None,
    "Type_preferred": type_preferred if type_preferred else None,
    "min_anime_rating": min_anime_rating,
    "Origin_year_range": origin_year_range,
    "popularity_range": popularity_range
}


# --- 6. Main Page Display Logic ---
def display_recommendations(recommendations_df, is_input_anime=False):
    """Helper function to display results in a nice layout."""

    if is_input_anime:
        row = recommendations_df.iloc[0]
        st.markdown(f'<h3 class="rec-title">Details for Input Anime: {row["Name"]}</h3>', unsafe_allow_html=True)

        with st.container(border=True):
            col1, col2 = st.columns([1, 4])
            with col1:
                if row['image_url'] and row['image_url'] != "NOT_FOUND":
                    image_style = f"background-image: url('{row['image_url']}')"
                else:
                    image_style = "background-image: url('https://via.placeholder.com/150x210.png?text=No+Poster')"
                st.markdown(f'<div class="poster-box" style="{image_style}"></div>', unsafe_allow_html=True)
            with col2:
                st.write(f"**Type:** {row['Type']}  |  **Year:** {row['Origin_year']}  |  **Avg. Rating:** {row['anime_avg_rating']:.2f} | **Popularity Rank:** {row['Popularity_adjusted']}")
                st.write(f"**Genres:** {row['Genres_edited'].replace('|', ', ')}")
                if pd.notna(row['synopsis']):
                    with st.expander("Show Synopsis", expanded=False):
                        st.write(row['synopsis'])

    else:
        for i, row in recommendations_df.reset_index(drop=True).iterrows():
            with st.container(border=True):
                st.markdown(f'<h3 class="rec-title">{i + 1}. {row["Name"]}</h3>', unsafe_allow_html=True)
                col1, col2 = st.columns([1, 4])
                with col1:
                    if row['image_url'] and row['image_url'] != "NOT_FOUND":
                        image_style = f"background-image: url('{row['image_url']}')"
                    else:
                        image_style = "background-image: url('https://via.placeholder.com/150x210.png?text=No+Poster')"
                    st.markdown(f'<div class="poster-box" style="{image_style}"></div>', unsafe_allow_html=True)
                with col2:
                    st.write(f"**Type:** {row['Type']}  |  **Year:** {row['Origin_year']}  |  **Avg. Rating:** {row['anime_avg_rating']:.2f} | **Popularity Rank:** {row['Popularity_adjusted']}")
                    st.write(f"**Genres:** {row['Genres_edited'].replace('|', ', ')}")
                    if pd.notna(row['synopsis']):
                        with st.expander("Show Synopsis"):
                            st.write(row['synopsis'])

# --- Updated logic for which button was pressed ---
if search_button:
    if anime_name:
        anime_details = get_anime_details(anime_name, anime, anime_agg)

        if anime_details is None:
            st.error("Anime is not found, please check the name and try again")
        else:
            if show_input_details:
                display_recommendations(anime_details, is_input_anime=True)
                st.markdown("---")

            with st.spinner('Searching for the best recommendations...'):
                recommendations = get_recommendations_by_name(
                    anime_name,
                    rec_type,
                    top_n_search,
                    genre_threshold=genre_threshold,
                    remove_related=remove_related, # <-- Pass the new argument
                    **user_filters
                )

                if recommendations is not None and not recommendations.empty:
                    st.success(f"Here are the top {len(recommendations)} recommendations for '{anime_name}':")
                    display_recommendations(recommendations, is_input_anime=False)
                else:
                    st.error(f"No recommendations found for '{anime_name}' with the selected filters. Try broadening your search!")
    else:
        st.warning("Please enter an anime name.")

elif discover_button:
    with st.spinner("Finding controversial anime..."):
        divided_animes = get_discover_animes(top_n=top_n_discover)
        if not divided_animes.empty:
            st.success(f"ü§î Here are {len(divided_animes)} 'Divided Opinion' animes for you:")
            display_recommendations(divided_animes, is_input_anime=False)
        else:
            st.error("No 'Divided Opinion' animes found.")

else:
    st.info("Choose an option from the sidebar to get started!")

Overwriting streamlit_app/app.py
