In [1]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import NearestNeighbors
import joblib

# Load CSV
df = pd.read_csv("vehicles_merged.csv")

# Example numeric features
numeric_cols = ['Seating Capacity', 'EFF (km/l)/(km/kwh)', 'Ground Clearance (range)']

# Fill missing numeric values
df[numeric_cols] = df[numeric_cols].fillna(df[numeric_cols].mean())

# Scale numeric features
scaler = StandardScaler()
scaled_numeric = scaler.fit_transform(df[numeric_cols])

# Save scaler
joblib.dump(scaler, 'scaler.joblib')


['scaler.joblib']

In [2]:
body_cols = ['Body Type']  # or map to Body_sedan, Body_hatchback etc.
fuel_cols = ['Fuel Type']

# For simplicity, let's make feature columns
feature_cols = numeric_cols + list(df['Body Type'].str.lower().unique()) + list(df['Fuel Type'].str.lower().unique())
joblib.dump(feature_cols, 'feature_cols.joblib')


['feature_cols.joblib']

In [3]:
# Build the full feature matrix
import numpy as np

X = np.hstack([scaled_numeric, np.zeros((df.shape[0], len(feature_cols)-len(numeric_cols)))])
knn = NearestNeighbors(n_neighbors=10, metric='euclidean')
knn.fit(X)

joblib.dump(knn, 'knn_model.joblib')


['knn_model.joblib']

In [4]:
weights = np.ones(len(feature_cols))  # or customize if you want weighted features
joblib.dump(weights, 'weights.joblib')

# Save metadata DataFrame for recommendation mapping
joblib.dump(df, 'vehicle_metadata.joblib')


['vehicle_metadata.joblib']