In [1]:
# Import Libraries
import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from scipy.sparse import csr_matrix

In [3]:
anime_df = pd.read_csv(r'D:\#Great Learning\MY\Project\Anime_Recommendation_System\anime_dataset\anime.csv')
rating_df = pd.read_csv(r'D:\#Great Learning\MY\Project\Anime_Recommendation_System\anime_dataset\rating.csv')


   ## Handles recommendations for new users and new anime

    Strategies:
    1. New User: Popularity-based + preference elicitation
    2. New Anime: Content-based similarity to existing items
    3. Hybrid: Combine multiple signals
    

In [7]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from scipy.sparse import csr_matrix

class ColdStartHandler:
    """
    Handles recommendations for new users and new anime
    
    Strategies:
    1. New User: Popularity-based + preference elicitation
    2. New Anime: Content-based similarity to existing items
    3. Hybrid: Combine multiple signals
    """
    
    def __init__(self, anime_df, rating_df):
        self.anime_df = anime_df
        self.rating_df = rating_df
        self.popularity_scores = self._calculate_popularity()
        self.genre_profiles = self._build_genre_profiles()
        
    def _calculate_popularity(self):
        """
        Calculate multiple popularity metrics
        
        Metrics:
        - Raw rating count
        - Weighted score (rating * log(count))
        - Trending score (recent popularity)
        """
        anime_stats = self.rating_df.groupby('anime_id').agg({
            'rating': ['mean', 'count', 'std']
        }).reset_index()
        anime_stats.columns = ['anime_id', 'avg_rating', 'num_ratings', 'rating_std']
        
        # Bayesian average (handles anime with few ratings)
        C = anime_stats['num_ratings'].mean()  # Average number of ratings
        m = anime_stats['avg_rating'].mean()   # Mean rating across all anime
        
        anime_stats['bayesian_avg'] = (
            (anime_stats['num_ratings'] * anime_stats['avg_rating'] + C * m) /
            (anime_stats['num_ratings'] + C)
        )
        
        # Weighted popularity score
        anime_stats['popularity_score'] = (
            anime_stats['bayesian_avg'] * np.log1p(anime_stats['num_ratings'])
        )
        
        return anime_stats.sort_values('popularity_score', ascending=False)
    
    def _build_genre_profiles(self):
        """Build genre preference profiles for clustering users"""
        # Create genre matrix
        genres = self.anime_df['genre'].str.get_dummies(sep=', ')
        genre_matrix = pd.concat([self.anime_df[['anime_id']], genres], axis=1)
        
        return genre_matrix
    
    def recommend_for_new_user(
    self,
    n_recommendations=10,
    preferred_genres=None,
    preferred_type=None,
    min_rating=7.0):
      recommendations = self.popularity_scores.copy()

      recommendations = recommendations.merge(
      self.anime_df[['anime_id', 'name', 'genre', 'type']],
        on='anime_id',
        how='left'
      )

      # ---------- SAFE CLEANING ----------
      recommendations['genre'] = recommendations['genre'].fillna('')
      recommendations['type'] = recommendations['type'].fillna('')

      # ---------- FILTERS ----------
      if preferred_genres:
         genre_pattern = '|'.join(preferred_genres)
         recommendations = recommendations[
            recommendations['genre'].str.contains(
                genre_pattern, case=False, regex=True
            )
         ]

      if preferred_type:
         recommendations = recommendations[
            recommendations['type'] == preferred_type
         ]

      if min_rating:
        recommendations = recommendations[
            recommendations['bayesian_avg'] >= min_rating
        ]

      # ---------- DIVERSITY SELECTION ----------
      selected_ids = []
      seen_genres = set()

      for _, row in recommendations.iterrows():
        genres = set(row['genre'].split(', ')) if row['genre'] else set()

        if not genres.intersection(seen_genres) or row['popularity_score'] > 100:
            selected_ids.append(row['anime_id'])
            seen_genres.update(genres)

        if len(selected_ids) >= n_recommendations:
            break

      # ---------- FINAL SAFE SELECTION ----------
      result = recommendations[
         recommendations['anime_id'].isin(selected_ids)
     ]

      # Safety fallback (VERY IMPORTANT)
      if result.empty:
        result = recommendations.head(n_recommendations)

      return result[[
        'anime_id',
        'name',
        'genre',
        'type',
        'bayesian_avg',
        'num_ratings',
        'popularity_score'
      ]].head(n_recommendations)


    
    def recommend_for_new_anime(self, new_anime_id, n_recommendations=10):
        """
        Recommend similar anime for a new anime (content-based)
        
        Uses: Genre similarity, type, and metadata
        """
        if new_anime_id not in self.anime_df['anime_id'].values:
            raise ValueError(f"Anime ID {new_anime_id} not found")
        
        # Get new anime metadata
        new_anime = self.anime_df[self.anime_df['anime_id'] == new_anime_id].iloc[0]
        
        # Find similar anime by genre
        new_genres = set(new_anime['genre'].split(', '))
        
        # Calculate genre overlap
        def genre_similarity(genres_str):
            if pd.isna(genres_str):
                return 0
            genres = set(genres_str.split(', '))
            intersection = len(new_genres.intersection(genres))
            union = len(new_genres.union(genres))
            return intersection / union if union > 0 else 0
        
        similar_anime = self.anime_df[
            self.anime_df['anime_id'] != new_anime_id
        ].copy()
        
        similar_anime['genre_similarity'] = similar_anime['genre'].apply(
            genre_similarity
        )
        
        # Boost score if same type (TV, Movie, etc.)
        similar_anime['type_match'] = (
            similar_anime['type'] == new_anime['type']
        ).astype(int)
        
        # Combined similarity score
        similar_anime['similarity_score'] = (
            0.7 * similar_anime['genre_similarity'] +
            0.3 * similar_anime['type_match']
        )
        
        # Merge with popularity
        similar_anime = similar_anime.merge(
            self.popularity_scores[['anime_id', 'bayesian_avg', 'popularity_score']],
            on='anime_id', how='left'
        )
        
        # Final score: Similarity + Popularity
        similar_anime['final_score'] = (
            0.6 * similar_anime['similarity_score'] +
            0.4 * (similar_anime['popularity_score'] / similar_anime['popularity_score'].max())
        )
        
        result = similar_anime.sort_values('final_score', ascending=False)
        return result[['anime_id', 'name', 'genre', 'type', 'genre_similarity',
                      'final_score']].head(n_recommendations)
    
    def quick_preference_elicitation(self, user_ratings, n_recommendations=10):
        """
        For users who have rated just a few anime (< 5 ratings)
        
        Strategy: Use their ratings to build a quick profile and recommend
        similar popular anime
        
        Args:
            user_ratings: Dict of {anime_id: rating}
        """
        if not user_ratings:
            return self.recommend_for_new_user(n_recommendations)
        
        # Get rated anime metadata
        rated_anime_ids = list(user_ratings.keys())
        rated_anime = self.anime_df[
            self.anime_df['anime_id'].isin(rated_anime_ids)
        ]
        
        # Build user genre profile
        user_genre_preference = {}
        for _, anime in rated_anime.iterrows():
            rating = user_ratings[anime['anime_id']]
            genres = anime['genre'].split(', ')
            for genre in genres:
                user_genre_preference[genre] = user_genre_preference.get(genre, 0) + rating
        
        # Normalize preferences
        total_pref = sum(user_genre_preference.values())
        user_genre_preference = {
            k: v/total_pref for k, v in user_genre_preference.items()
        }
        
        # Find anime matching user's genre preferences
        def match_score(genres_str):
            if pd.isna(genres_str):
                return 0
            genres = genres_str.split(', ')
            return sum(user_genre_preference.get(g, 0) for g in genres) / len(genres)
        
        candidates = self.anime_df[
            ~self.anime_df['anime_id'].isin(rated_anime_ids)
        ].copy()
        
        candidates['genre_match_score'] = candidates['genre'].apply(match_score)
        
        # Merge with popularity
        candidates = candidates.merge(
            self.popularity_scores[['anime_id', 'bayesian_avg', 'popularity_score']],
            on='anime_id', how='left'
        )
        
        # Combined score
        candidates['final_score'] = (
            0.5 * candidates['genre_match_score'] +
            0.5 * (candidates['popularity_score'] / candidates['popularity_score'].max())
        )
        
        result = candidates.sort_values('final_score', ascending=False)
        return result[['anime_id', 'name', 'genre', 'bayesian_avg', 
                      'final_score']].head(n_recommendations)


# Usage Example
cold_start = ColdStartHandler(anime_df, rating_df)

# # New user with no history
recs = cold_start.recommend_for_new_user(
    preferred_genres=['Action', 'Fantasy'],
    preferred_type='TV',
    min_rating=8.0
)

# # New user with 3 ratings
user_ratings = {1: 9, 5114: 8, 9253: 10}  # anime_id: rating
recs = cold_start.quick_preference_elicitation(user_ratings)

# # New anime recommendations
similar = cold_start.recommend_for_new_anime(new_anime_id=32281)

In [8]:
similar

Unnamed: 0,anime_id,name,genre,type,genre_similarity,final_score
59,10408,Hotarubi no Mori e,"Drama, Romance, Shoujo, Supernatural",Movie,0.6,0.740089
14,199,Sen to Chihiro no Kamikakushi,"Adventure, Drama, Supernatural",Movie,0.4,0.715025
1958,713,Air Movie,"Drama, Romance, Supernatural",Movie,0.75,0.705545
207,28725,Kokoro ga Sakebitagatterunda.,"Drama, Romance, School",Movie,0.75,0.69889
1110,14669,Aura: Maryuuin Kouga Saigo no Tatakai,"Comedy, Drama, Romance, School, Supernatural",Movie,0.8,0.691944
24,7311,Suzumiya Haruhi no Shoushitsu,"Comedy, Mystery, Romance, School, Sci-Fi, Supe...",Movie,0.428571,0.691562
1493,20903,Harmonie,"Drama, School, Supernatural",Movie,0.75,0.681486
2102,1723,Clannad Movie,"Drama, Fantasy, Romance, School",Movie,0.6,0.677102
410,1689,Byousoku 5 Centimeter,"Drama, Romance, Slice of Life",Movie,0.4,0.676848
34,431,Howl no Ugoku Shiro,"Adventure, Drama, Fantasy, Romance",Movie,0.333333,0.667018


In [9]:
class AnimeRecommenderSystem:

    def __init__(self, anime_df, rating_df, cold_start_handler):
        self.anime_df = anime_df
        self.rating_df = rating_df
        self.cold_start = cold_start_handler

    def recommend(
        self,
        user_id=None,
        user_ratings=None,
        n_recommendations=10,
        preferred_genres=None,
        preferred_type=None
    ):
        """
        Main recommendation entry point
        """

        # ---------- Case 1: Anonymous / Brand New User ----------
        if user_id is None and not user_ratings:
            return self.cold_start.recommend_for_new_user(
                n_recommendations=n_recommendations,
                preferred_genres=preferred_genres,
                preferred_type=preferred_type
            )

        # ---------- Case 2: Few Ratings (Cold User) ----------
        if user_ratings and len(user_ratings) < 5:
            return self.cold_start.quick_preference_elicitation(
                user_ratings=user_ratings,
                n_recommendations=n_recommendations
            )

        # ---------- Case 3: Existing User ----------
        if user_id is not None:
            user_history = self.rating_df[
                self.rating_df['user_id'] == user_id
            ]

            if len(user_history) < 5:
                return self.cold_start.recommend_for_new_user(
                    n_recommendations=n_recommendations
                )

            # Placeholder for collaborative filtering
            return self._recommend_from_cf(user_id, n_recommendations)

        raise ValueError("Invalid recommendation request")

    def _recommend_from_cf(self, user_id, n_recommendations):
        """
        Collaborative filtering fallback
        (to be replaced by ALS / LightFM / Neural CF)
        """
        watched = set(
            self.rating_df[self.rating_df['user_id'] == user_id]['anime_id']
        )

        candidates = self.anime_df[
            ~self.anime_df['anime_id'].isin(watched)
        ]

        return candidates.sort_values(
            'rating', ascending=False
        ).head(n_recommendations)[
            ['anime_id', 'name', 'genre', 'rating']
        ]


In [11]:
cold_start = ColdStartHandler(anime_df, rating_df)

recommender = AnimeRecommenderSystem(
    anime_df=anime_df,
    rating_df=rating_df,
    cold_start_handler=cold_start
)

# Anonymous user
recommender.recommend(
    preferred_genres=['Action', 'Fantasy'],
    preferred_type='TV'
)



Unnamed: 0,anime_id,name,genre,type,bayesian_avg,num_ratings,popularity_score
0,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,7.916509,24574,80.03183


In [None]:
#New user with few ratings
recommender.recommend(
    user_ratings={1: 9, 5114: 8, 9253: 10}
)


Unnamed: 0,anime_id,name,genre,bayesian_avg,final_score
11,2904,Code Geass: Hangyaku no Lelouch R2,"Action, Drama, Mecha, Military, Sci-Fi, Super ...",7.65938,0.526787
26,2001,Tengen Toppa Gurren Lagann,"Action, Adventure, Comedy, Mecha, Sci-Fi",7.578882,0.519447
17,1575,Code Geass: Hangyaku no Lelouch,"Action, Mecha, Military, School, Sci-Fi, Super...",7.550777,0.515356
83,16498,Shingeki no Kyojin,"Action, Drama, Fantasy, Shounen, Super Power",7.236734,0.508325
37,1535,Death Note,"Mystery, Police, Psychological, Supernatural, ...",7.493453,0.507692
13,199,Sen to Chihiro no Kamikakushi,"Adventure, Drama, Supernatural",7.312598,0.506717
156,6547,Angel Beats!,"Action, Comedy, Drama, School, Supernatural",7.195537,0.496424
8,4181,Clannad: After Story,"Drama, Fantasy, Romance, Slice of Life, Supern...",7.68941,0.493996
197,121,Fullmetal Alchemist,"Action, Adventure, Comedy, Drama, Fantasy, Mag...",6.972005,0.489559
21,164,Mononoke Hime,"Action, Adventure, Fantasy",7.094332,0.487804


In [13]:
# Existing user
recommender.recommend(
    user_id=12345
)

Unnamed: 0,anime_id,name,genre,rating
10464,33662,Taka no Tsume 8: Yoshida-kun no X-Files,"Comedy, Parody",10.0
10400,30120,Spoon-hime no Swing Kitchen,"Adventure, Kids",9.6
9595,23005,Mogura no Motoro,Slice of Life,9.5
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",9.37
9078,33607,Kahei no Umi,Historical,9.33
10786,26313,Yakusoku: Africa Mizu to Midori,"Drama, Kids",9.25
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",9.25
3,9253,Steins;Gate,"Sci-Fi, Thriller",9.17
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",9.16
5,32935,Haikyuu!!: Karasuno Koukou VS Shiratorizawa Ga...,"Comedy, Drama, School, Shounen, Sports",9.15
