In [None]:
import pandas as pd
import numpy as np

df = pd.read_csv("anime.csv")

df['genre'] = df['genre'].fillna("Unknown")
df['type'] = df['type'].fillna("Unknown")
df['rating'] = df['rating'].fillna(df['rating'].mean())

df['episodes'] = pd.to_numeric(df['episodes'], errors='coerce')
df['episodes'] = df['episodes'].fillna(df['episodes'].median())

df['members_log'] = np.log1p(df['members'])



from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.feature_extraction.text import TfidfVectorizer

preprocessor = ColumnTransformer(
    transformers=[
        ("genre", TfidfVectorizer(stop_words="english", max_features=5000), "genre"),
        ("type", OneHotEncoder(handle_unknown="ignore"), ["type"]),
        ("num", StandardScaler(), ["rating", "episodes", "members_log"])
    ]
)




from sklearn.metrics.pairwise import cosine_similarity
import joblib

X = preprocessor.fit_transform(df)
cosine_sim = cosine_similarity(X)

joblib.dump(preprocessor, "preprocessor.pkl")
joblib.dump(cosine_sim, "cosine_sim.pkl")
joblib.dump(df, "anime_df.pkl")

