In [1]:
# Import the required modules
import pandas as pd
pd.set_option('display.max_columns', None)
import numpy as np
import pickle as pkl
import random

# Pre-Processing
from sklearn.preprocessing import StandardScaler, OneHotEncoder, OrdinalEncoder
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer

# Models
from sklearn.neighbors import NearestNeighbors

# suppress warnings
import warnings
warnings.filterwarnings('ignore')

In [2]:
df = pd.read_csv("anime_ml.csv")
print(df.shape)
df.head()

(11903, 49)


Unnamed: 0,anime_id,name,episodes,rating,members,Action,Adventure,Cars,Comedy,Dementia,Demons,Drama,Fantasy,Game,Historical,Horror,Josei,Kids,Magic,MartialArts,Mecha,Military,Music,Mystery,Parody,Police,Psychological,Romance,Samurai,School,Sci-Fi,Seinen,Shoujo,ShoujoAi,Shounen,ShounenAi,SliceofLife,Space,Sports,SuperPower,Supernatural,Thriller,Vampire,Movie,Music.1,ONA,OVA,Special,TV
0,32281,Kimi no Na wa.,1,9.37,200630,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0
1,5114,Fullmetal Alchemist: Brotherhood,64,9.26,793665,1,1,0,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1
2,28977,Gintama,51,9.25,114262,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1
3,9253,Steins Gate,24,9.17,673572,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1
4,9969,Gintama',51,9.16,151266,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1


In [None]:
df.describe()

In [4]:
def recommend(self, anime_name=None, genres=None, type_filter=None, rating_min=None, episodes_min=None):
    # Filter DataFrame based on user input
    filtered_df = self.df.copy()

    # Filter by anime name
    if anime_name:
        anime_match = filtered_df[filtered_df['name'] == anime_name]
        if anime_match.empty:
            raise ValueError(f"Anime '{anime_name}' not found.")
        anime_id = anime_match['anime_id'].values[0]
    else:
        # Randomly select an anime if no name is provided
        if filtered_df.empty:
            raise ValueError("No anime data available.")
        anime_id = random.choice(filtered_df['anime_id'].values)

    # Apply genre filter if specified
    if genres:
        genre_filter = filtered_df[genres].sum(axis=1) > 0  # Keep rows where at least one genre is present
        filtered_df = filtered_df[genre_filter]

    # Apply type filter if specified
    if type_filter:
        type_conditions = [filtered_df[type] == True for type in type_filter if type in filtered_df.columns]
        if type_conditions:
            combined_condition = np.logical_or.reduce(type_conditions)
            filtered_df = filtered_df[combined_condition]

    # Apply minimum rating filter if specified
    if rating_min is not None:
        filtered_df = filtered_df[filtered_df['rating'] >= rating_min]

    # Apply minimum episodes filter if specified
    if episodes_min is not None:
        filtered_df = filtered_df[filtered_df['episodes'] >= episodes_min]

    # If no anime matches the filters, raise an error
    if filtered_df.empty:
        raise ValueError("No anime found for the specified filters.")

    # Prepare the feature matrix for the Nearest Neighbors model
    feature_cols = ['episodes', 'rating', 'members']  # Add more as needed
    X = filtered_df[feature_cols]
    X_preprocessed = self.preprocessor.transform(X)

    # Extract features of the selected anime
    anime_features = self.df.loc[self.df['anime_id'] == anime_id, feature_cols]

    # Check if the selected anime exists in the original DataFrame after filtering
    if anime_features.empty:
        raise ValueError(f"Selected anime ID '{anime_id}' is not present in the original DataFrame after filtering.")

    anime_features_preprocessed = self.preprocessor.transform(anime_features)

    # Find the nearest neighbors
    distances, indices = self.model.kneighbors(anime_features_preprocessed)

    # Retrieve the metadata of the recommended animes
    recommendations = filtered_df.iloc[indices[0]].copy()
    recommendations["distance"] = distances[0]  # Add distances to the DataFrame

    # Sort by distance and return
    recommendations = recommendations.sort_values(by="distance")
    return recommendations.to_dict(orient="records")


In [5]:
def make_fit(anime_length): 
    # Load the dataset
    df = pd.read_csv("anime_ml.csv")

    # Remove any rows with missing values and reset the index
    df = df.dropna(how="any").reset_index(drop=True)

    # Update the column name if necessary
    df.rename(columns={'Music.1': 'Music'}, inplace=True)

    # Remove duplicate anime based on name
    df = df.drop_duplicates(subset=["name"]).reset_index(drop=True)

    # Define the columns for metadata and features
    meta_cols = ["anime_id", "name"]
    feature_cols = ['episodes', 'rating', 'members'] + (genres if genres else [])  # Include genres if provided

    # Preprocessing for numeric features
    numeric_transformer = Pipeline(steps=[
        ('imputer', SimpleImputer(strategy='mean')),
        ('scaler', StandardScaler())
    ])
    
    # Combine all preprocessing steps
    preprocessor = ColumnTransformer(
        transformers=[
            ('num', numeric_transformer, ['episodes', 'rating', 'members'])
        ]
    )

    # Determine which anime to base recommendations on
    if anime_name:
        anime_match = df.loc[df.name == anime_name]
        if anime_match.empty:
            raise ValueError(f"Anime '{anime_name}' not found.")
        anime_id = anime_match.sort_values(by="rating", ascending=False).anime_id.values[0]
    else:
        # Randomly select an anime if no name is provided
        anime_id = random.choice(df['anime_id'].values)

    # Apply genre filter if specified
    filtered_df = df
    if genres is not None and genres:
        genre_filter = filtered_df[genres].sum(axis=1) > 0  # Keep rows where at least one genre is present
        filtered_df = filtered_df[genre_filter]

    # Apply type filter if specified
    if type_filter is not None and type_filter:
        type_conditions = [filtered_df[type] == True for type in type_filter if type in filtered_df.columns]
        if type_conditions:
            combined_condition = np.logical_or.reduce(type_conditions)
            filtered_df = filtered_df[combined_condition]

    # Apply minimum rating filter if specified
    if rating_min is not None:
        filtered_df = filtered_df[filtered_df['rating'] >= rating_min]

    # Apply minimum episodes filter if specified
    if episodes_min is not None:
        filtered_df = filtered_df[filtered_df['episodes'] >= episodes_min]

    # If no anime matches the filters, raise an error
    if filtered_df.empty:
        raise ValueError("No anime found for the specified filters.")

    # Prepare the feature matrix for the Nearest Neighbors model
    X = filtered_df[feature_cols]  # Select the feature columns
    X_preprocessed = preprocessor.fit_transform(X)  # Fit and transform the feature matrix

    # Initialize and fit the Nearest Neighbors model
    k = anime_length
    model1 = NearestNeighbors(n_neighbors=k, metric="cosine")
    model1.fit(X_preprocessed)

    with open('anime_length_name.pkl', 'wb') as model_file:
        pkl.dump(model1, model_file)

    


def make_recommendation(anime_model, anime_name=None, genres=None, type_filter=None, rating_min=None, episodes_min=None): 
    # Load the dataset
    df = pd.read_csv("anime_ml.csv")


    # Load Pickle file
    filename = anime_model
    model1 = pkl.load(open(filename, 'rb')) 




    # Define the columns for metadata and features
    meta_cols = ["anime_id", "name"]
    feature_cols = ['episodes', 'rating', 'members'] + (genres if genres else [])  # Include genres if provided

    # Preprocessing for numeric features
    numeric_transformer = Pipeline(steps=[
        ('imputer', SimpleImputer(strategy='mean')),
        ('scaler', StandardScaler())
    ])
    
    # Combine all preprocessing steps
    preprocessor = ColumnTransformer(
        transformers=[
            ('num', numeric_transformer, ['episodes', 'rating', 'members'])
        ]
    )

     # Determine which anime to base recommendations on
    if anime_name:
        anime_match = df.loc[df.name == anime_name]
        if anime_match.empty:
            raise ValueError(f"Anime '{anime_name}' not found.")
        anime_id = anime_match.sort_values(by="rating", ascending=False).anime_id.values[0]
    else:
        # Randomly select an anime if no name is provided
        anime_id = random.choice(df['anime_id'].values)

    # Apply genre filter if specified
    filtered_df = df
    if genres is not None and genres:
        genre_filter = filtered_df[genres].sum(axis=1) > 0  # Keep rows where at least one genre is present
        filtered_df = filtered_df[genre_filter]

    # Apply type filter if specified
    if type_filter is not None and type_filter:
        type_conditions = [filtered_df[type] == True for type in type_filter if type in filtered_df.columns]
        if type_conditions:
            combined_condition = np.logical_or.reduce(type_conditions)
            filtered_df = filtered_df[combined_condition]

    # Apply minimum rating filter if specified
    if rating_min is not None:
        filtered_df = filtered_df[filtered_df['rating'] >= rating_min]

    # Apply minimum episodes filter if specified
    if episodes_min is not None:
        filtered_df = filtered_df[filtered_df['episodes'] >= episodes_min]

    # If no anime matches the filters, raise an error
    if filtered_df.empty:
        raise ValueError("No anime found for the specified filters.")


    # Extract features of the selected anime
    anime_features = df.loc[df.anime_id == anime_id, feature_cols]
    anime_features_preprocessed = preprocessor.transform(anime_features)

    # Find the nearest neighbors
    distances, indices = model1.kneighbors(anime_features_preprocessed)

    # Retrieve the metadata of the recommended animes
    animes = filtered_df.iloc[indices[0]]
    animes["distance"] = distances[0]  # Add distances to the DataFrame

    # Filter the columns for the final output
    animes = animes.sort_values(by="distance")  # Sort by distance

    # Return the recommended animes as a list of dictionaries
    return animes.to_dict(orient="records")

In [6]:
def make_fit(anime_length, genres=None): 
    # Load the dataset
    df = pd.read_csv("anime_ml.csv")

    # Remove any rows with missing values and reset the index
    df = df.dropna(how="any").reset_index(drop=True)

    # Update the column name if necessary
    df.rename(columns={'Music.1': 'Music'}, inplace=True)

    # Remove duplicate anime based on name
    df = df.drop_duplicates(subset=["name"]).reset_index(drop=True)

    # Define the columns for metadata and features
    feature_cols = ['episodes', 'rating', 'members'] + (genres if genres else [])

    # Preprocessing for numeric features
    numeric_transformer = Pipeline(steps=[
        ('imputer', SimpleImputer(strategy='mean')),
        ('scaler', StandardScaler())
    ])
    
    # Combine all preprocessing steps
    preprocessor = ColumnTransformer(
        transformers=[
            ('num', numeric_transformer, ['episodes', 'rating', 'members'])
        ]
    )

    # Prepare the feature matrix
    X = df[feature_cols]
    X_preprocessed = preprocessor.fit_transform(X)  # Fit and transform the feature matrix

    # Initialize and fit the Nearest Neighbors model
    model1 = NearestNeighbors(n_neighbors=anime_length, metric="cosine")
    model1.fit(X_preprocessed)

    # Save the model and preprocessor
    with open('anime_model.pkl', 'wb') as model_file:
        pkl.dump(model1, model_file)
    with open('preprocessor.pkl', 'wb') as preprocessor_file:
        pkl.dump(preprocessor, preprocessor_file)

def make_recommendation(anime_model='anime_model.pkl', preprocessor_model='preprocessor.pkl', anime_name=None, genres=None, type_filter=None, rating_min=None, episodes_min=None): 
    # Load the dataset
    df = pd.read_csv("anime_ml.csv")

    # Load Pickle files
    model1 = pkl.load(open(anime_model, 'rb')) 
    preprocessor = pkl.load(open(preprocessor_model, 'rb'))

    # Define the columns for metadata and features
    feature_cols = ['episodes', 'rating', 'members'] + (genres if genres else [])

    # Determine which anime to base recommendations on
    if anime_name:
        anime_match = df.loc[df.name == anime_name]
        if anime_match.empty:
            raise ValueError(f"Anime '{anime_name}' not found.")
        anime_id = anime_match.sort_values(by="rating", ascending=False).anime_id.values[0]
    else:
        # Randomly select an anime if no name is provided
        anime_id = random.choice(df['anime_id'].values)

    # Apply genre filter if specified
    filtered_df = df
    if genres is not None and genres:
        genre_filter = filtered_df[genres].any(axis=1)  # Keep rows where at least one genre is True
        filtered_df = filtered_df[genre_filter]

    # Apply type filter if specified
    if type_filter is not None and type_filter:
        type_conditions = [filtered_df[type] for type in type_filter if type in filtered_df.columns]
        if type_conditions:
            combined_condition = np.logical_or.reduce(type_conditions)
            filtered_df = filtered_df[combined_condition]

    # Apply minimum rating filter if specified
    if rating_min is not None:
        filtered_df = filtered_df[filtered_df['rating'] >= rating_min]

    # Apply minimum episodes filter if specified
    if episodes_min is not None:
        filtered_df = filtered_df[filtered_df['episodes'] >= episodes_min]

    # If no anime matches the filters, raise an error
    if filtered_df.empty:
        raise ValueError("No anime found for the specified filters.")

    # Extract features of the selected anime
    anime_features = df.loc[df.anime_id == anime_id, feature_cols]
    anime_features_preprocessed = preprocessor.transform(anime_features)

    # Find the nearest neighbors
    distances, indices = model1.kneighbors(anime_features_preprocessed)

    print(indices[0])
   # Retrieve the metadata of the recommended animes
    animes = filtered_df.iloc[indices[0]]
    animes["distance"] = distances[0]  # Add distances to the DataFrame

    # Filter the columns for the final output
    animes = animes.sort_values(by="distance")  # Sort by distance

    # Return the recommended animes as a list of dictionaries
    return animes.to_dict(orient="records")

In [4]:
make_fit(20)

In [7]:
# Example usage

anime_name = None  # Can be None if no specific anime is needed
genres = ["Comedy"]  # Specify the genres you want to filter by (can be None)
type_filter = None  # Specify the types to filter by (can be None)
rating_min = None  # Minimum rating (can be None if no minimum is needed)
episodes_min = 100  # Minimum episodes (can be None if no minimum is needed)

response = make_recommendation(anime_name=anime_name, genres=genres, type_filter=type_filter, rating_min=rating_min, episodes_min=episodes_min)


[ 5524  5533  5530  9145  5503  5477  9912 10535  5471  8453  5577 10401
  5431  5482  5505  8736 10602 10452 10355  5664]


IndexError: positional indexers are out-of-bounds

In [8]:
# To test
pd.DataFrame(response)

NameError: name 'response' is not defined