In [1]:

# 📦 Imports
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import NearestNeighbors
import pickle

# 📂 Load the dataset
df = pd.read_csv("movieDataset.csv")

# 🧹 Clean and Weight Features
# (Make sure the weighted columns exist)
for col in ['directors_count', 'actor1_count', 'actor2_count', 'rating']:
    if col in df.columns:
        if col == 'directors_count':
            df[col] *= 3
        elif col in ['actor1_count', 'actor2_count', 'rating']:
            df[col] *= 2

# 🧼 Drop irrelevant columns for training
df_train = df.drop(columns=['tconst', 'primaryTitle', 'startYear'], errors='ignore')

# ⚖️ Scale features
scaler = StandardScaler()
scaled_features = scaler.fit_transform(df_train)
df_scaled = pd.DataFrame(scaled_features, columns=df_train.columns)

# 🤖 Train KNN model
knn_model = NearestNeighbors(n_neighbors=6, metric='cosine')
knn_model.fit(df_scaled)

# 💾 Save model and data
df.to_csv("movies.csv", index=False)
df_scaled.to_csv("scaled_features.csv", index=False)

with open("knn_model.pkl", "wb") as f:
    pickle.dump(knn_model, f)

print("✅ Model, data, and scaled features saved for Flask API.")


✅ Model, data, and scaled features saved for Flask API.
