**Importing Libraries and Dataset**

In [57]:
import pandas as pd
from sklearn.neighbors import NearestNeighbors


df = pd.read_csv("./model_dataset/vehicles_merged.csv")


**Constants**


In [None]:
body_types = [
    "sedan","hatchback","suv","mpv","pickup","coupe",
    "convertible","wagon","van","crossover","kei / microvan",
    "roadster","other","liftback","mpv / minivan","minivan","microvan"
]

road_cols = ['City/Urban', "Suburban/Normal", "Mid Off-Road", "Off-Road/Hilly Terrain"]

fuel_types = df['Fuel Type'].str.lower().unique().tolist()


**One hot Encode Features and Features**

In [None]:
#One hot encode for body types and fuel types
for bt in body_types:
    df[f'Body_{bt}'] = df['Body Type'].apply(
        lambda x: 1 if bt.lower() in [t.strip().lower() for t in str(x).replace('/', ',').split(',')] else 0
    )

for f in fuel_types:
    df[f'Fuel_{f}'] = df['Fuel Type'].str.lower().apply(lambda x: 1 if x == f else 0)

#Featrures for the model
feature_cols = (
    [f'Body_{bt}' for bt in body_types] +
    ['Seating Capacity'] +
    road_cols +
    ['Eff (km/l)', 'Ground Clearance (range)'] +
    [f'Fuel_{f}' for f in fuel_types]
)


**Handling User Inputs**


In [None]:
# Road mapping for flexible user input

road_mapping = {
    "town": "City/Urban",
    "urban": "City/Urban",
    "city": "City/Urban",
    "suburban": "Suburban/Normal",
    "mid off-road": "Mid Off-Road",
    "off-road": "Off-Road/Hilly Terrain",
    "hilly": "Off-Road/Hilly Terrain"
}

# Dynamic User Input

user_body = input("Enter body type(s), comma separated: ")
user_seating = int(input("Enter seating capacity: "))
user_road = input("Enter road types, comma separated: ")
user_traffic = input("Enter traffic condition(s), comma separated (high/mid/low/mixed): ")
user_fuel = input("Enter fuel type(s), comma separated: ")

user_body_list = [b.strip().lower() for b in user_body.split(',')]
user_road_list = [r.strip().lower() for r in user_road.split(',')]
user_fuel_list = [f.strip().lower() for f in user_fuel.split(',')]
user_traffic_list = [t.strip().lower() for t in user_traffic.split(',')]

In [None]:
# Adjust fuel preference by traffic
if "high" in user_traffic_list:
    # Prioritize Hybrid and Electric first, but include Petrol and Diesel too
    preferred_fuels = ["electric","hybrid", "petrol", "diesel"]
elif "mixed" in user_traffic_list:
    # Mix of city and highway traffic
    preferred_fuels = ["hybrid", "petrol", "electric", "diesel"]
elif "mid" in user_traffic_list:
    preferred_fuels = ["hybrid", "petrol", "diesel"]
else:  # low traffic
    preferred_fuels = ["diesel", "petrol", "hybrid"]

# Merge user input + system preference
user_fuel_list = list(dict.fromkeys(preferred_fuels + user_fuel_list))

# ---------------------------
# Filter dataset by fuel type (hard constraint)
# ---------------------------
filtered_df = df[df['Fuel Type'].str.lower().isin(user_fuel_list)].copy()

# Filter by body type
filtered_df = filtered_df[
    filtered_df[[f'Body_{bt}' for bt in body_types]].apply(
        lambda row: any(row[f'Body_{bt}'] == 1 for bt in user_body_list if f'Body_{bt}' in row.index),
        axis=1
    )
]

if filtered_df.empty:
    print("No vehicles match your selected body type and fuel type.")
else:
    # ---------------------------
    # Fill missing feature values
    # ---------------------------
    X_filtered = filtered_df[feature_cols].fillna(0)

    # ---------------------------
    # Encode user input
    # ---------------------------
    # Body types
    body_vec = [1 if bt.lower() in user_body_list else 0 for bt in body_types]

    # Road types
    road_vec = [1 if rc in [road_mapping.get(r, "") for r in user_road_list] else 0 for rc in road_cols]

    # Fuel efficiency preference by traffic
    if "high" in user_traffic_list:
        fuel_efficiency = df['Eff (km/l)'].max() * 0.95  # push for highest efficiency
    elif "mixed" in user_traffic_list:
        fuel_efficiency = df['Eff (km/l)'].mean() * 1.05  # slightly above average
    elif "mid" in user_traffic_list:
        fuel_efficiency = df['Eff (km/l)'].mean()
    else:  # low
        fuel_efficiency = df['Eff (km/l)'].mean() * 0.85

    # Ground clearance preference (for off-road roads)
    if any(r in ["off-road", "mid off-road", "hilly"] for r in user_road_list):
        ground_clearance = df['Ground Clearance (range)'].max()
    else:
        ground_clearance = df['Ground Clearance (range)'].mean()

    # Fuel type one-hot
    fuel_vec = [1 if f in user_fuel_list else 0 for f in fuel_types]

    # Combine all into one vector for KNN
    user_vector = body_vec + [user_seating] + road_vec + [fuel_efficiency, ground_clearance] + fuel_vec

  
    n_all = len(X_filtered)
    knn = NearestNeighbors(n_neighbors=n_all, metric='euclidean')
    knn.fit(X_filtered)

    # ---------------------------
    # Find neighbors and rank
    # ---------------------------
    distances, indices = knn.kneighbors([user_vector])
    recommended_all = filtered_df.iloc[indices[0]].copy()
    recommended_all['Distance'] = distances[0]

    # Sort by fuel type priority and distance (for better ranking)
    recommended_all['Fuel_Priority'] = recommended_all['Fuel Type'].str.lower().apply(
        lambda f: preferred_fuels.index(f) if f in preferred_fuels else len(preferred_fuels)
    )
    recommended_all.sort_values(by=['Fuel_Priority', 'Distance'], inplace=True)

    # ---------------------------
    # Display recommendations
    # ---------------------------
    pd.set_option('display.max_rows', None)
    pd.set_option('display.max_columns', None)
    pd.set_option('display.width', 200)
    pd.set_option('display.max_colwidth', None)

    print("\nRecommended Vehicles Ranked by Similarity and Fuel Priority:\n")
    print(recommended_all[['Manufacturer', 'Model', 'Body Type', 'Seating Capacity', 'Fuel Type', 'Eff (km/l)', 'Ground Clearance (range)']])



Recommended Vehicles Ranked by Similarity and Fuel Priority:

       Manufacturer                                  Model                      Body Type  Seating Capacity Fuel Type  Eff (km/l)  Ground Clearance (range)
157        Daihatsu                             mira la350                      hatchback                 4    Hybrid       22.50                       155
305         Hyundai                                  ioniq                      hatchback                 5    Hybrid       24.00                       142
242           Honda           fit shuttle gp2 navi premium                      hatchback                 5    Hybrid       25.50                       141
244           Honda           fit shuttle gp7 navi premium                      hatchback                 5    Hybrid       25.50                       141
245           Honda              fit shuttle gp7 z sensing                      hatchback                 5    Hybrid       25.50                       141
2

