In [1]:
import pandas as pd
from sklearn.neighbors import NearestNeighbors
import joblib
import numpy as np
import os

In [2]:
# --- CONFIGURATION ---
CSV_PATH = "dataset/bins_knn_dataset_500.csv"
MODEL_SAVE_PATH = "trained_models/knn_recommender.pkl"

In [3]:
# --- 1. LOAD DATA ---
if not os.path.exists(CSV_PATH):
    print(f"❌ Error: {CSV_PATH} not found.")
    exit()

df = pd.read_csv(CSV_PATH)
print(f"Loaded {len(df)} bins from database.")

Loaded 500 bins from database.


In [4]:
# --- 2. TRAIN SEPARATE MODELS BY CATEGORY ---
# We will save a dictionary: { "Hazardous": (knn_model, bin_data_df), ... }
model_registry = {}

unique_categories = df['category'].unique()

print(f"Training KNN models for categories: {unique_categories}")

for category in unique_categories:
    # Filter data for this specific category
    category_df = df[df['category'] == category].copy().reset_index(drop=True)
    
    # We need at least 1 bin to train
    if len(category_df) > 0:
        # Prepare coordinates (Lat, Long)
        coordinates = category_df[['latitude', 'longitude']].values
        
        # Initialize KNN (k=1 because we want the SINGLE nearest bin)
        knn = NearestNeighbors(n_neighbors=1, algorithm='ball_tree', metric='haversine')
        
        # Train (Fit) the model
        # Note: Haversine expects radians, so we convert degree -> radians
        knn.fit(np.radians(coordinates))
        
        # Store both the trained model AND the data (so we can retrieve the address later)
        model_registry[category] = {
            "model": knn,
            "data": category_df
        }

Training KNN models for categories: ['Screens_Monitors' 'Small_IT_General' 'Large_Equipment' 'Hazardous_Lamps']


In [5]:
# --- 3. SAVE ---
os.makedirs("trained_models", exist_ok=True)
joblib.dump(model_registry, MODEL_SAVE_PATH)
print(f"✅ Recommendation System Saved to {MODEL_SAVE_PATH}")

✅ Recommendation System Saved to trained_models/knn_recommender.pkl
