In [None]:
import json
from datetime import datetime, timezone
from pathlib import Path
import re

# In-memory stores
memory_stores = {
    "unstructured": {},
    "structured": {},
    "dislike_unstructured": {},
    "dislike_structured": {}
}

# File paths
file_paths = {
    "unstructured": "unstructured.json",
    "structured": "structured.json",
    "dislike_unstructured": "dislike_unstructured.json",
    "dislike_structured": "dislike_structured.json"
}

def contains_number(value: str):
    """Check if value contains at least one digit."""
    return bool(re.search(r'\d', value))

def append_preference(user_id: str, preference_string: str):
    """Append preferences to memory with proper dislike and unstructured handling."""
    now = datetime.now(timezone.utc).isoformat()
    
    for item in preference_string.split(','):
        if ':' not in item:
            continue

        field, value = item.split(':', 1)
        field = field.strip()
        value = value.lstrip()

        # Detect dislike
        dislike = False
        if value.startswith('!='):
            dislike = True
            value = value[2:].lstrip()
        elif value.startswith('!'):
            dislike = True
            value = value[1:].lstrip()

        # Remove leading '=' if present
        if value.startswith('='):
            value = value[1:].lstrip()

        # Determine structured/unstructured
        if contains_number(value):
            store_name = "dislike_structured" if dislike else "structured"
        else:
            store_name = "dislike_unstructured" if dislike else "unstructured"

        # Save entry
        entry = {"field": field, "value": value, "timestamp": now}
        memory_stores.setdefault(store_name, {}).setdefault(user_id, []).append(entry)

def save_all_to_file():
    for store_name, data in memory_stores.items():
        path = file_paths[store_name]
        existing_data = {}
        if Path(path).exists():
            with open(path, "r") as f:
                try:
                    existing_data = json.load(f)
                except json.JSONDecodeError:
                    existing_data = {}

        for user_id, entries in data.items():
            existing_data.setdefault(user_id, []).extend(entries)

        with open(path, "w") as f:
            json.dump(existing_data, f, indent=2)

def get_user_preferences(store_name: str, user_id: str):
    return memory_stores.get(store_name, {}).get(user_id, [])

# ===== Example usage =====
user_id = "user123"

append_preference(user_id, "make: =Ford, price: < 40000, color: Red")
append_preference(user_id, "dislike_field: !=Some text")
append_preference(user_id, "!dislike_number: 123")
append_preference(user_id, "lease_term: =36")
append_preference(user_id, "dislike_number: !123")

save_all_to_file()

print("Unstructured:", get_user_preferences("unstructured", user_id))
print("Structured:", get_user_preferences("structured", user_id))
print("Dislike unstructured:", get_user_preferences("dislike_unstructured", user_id))
print("Dislike structured:", get_user_preferences("dislike_structured", user_id))


Unstructured: [{'field': 'make', 'value': 'Ford', 'timestamp': '2025-08-30T02:24:35.106183+00:00'}, {'field': 'color', 'value': 'Red', 'timestamp': '2025-08-30T02:24:35.106183+00:00'}]
Structured: [{'field': 'price', 'value': '< 40000', 'timestamp': '2025-08-30T02:24:35.106183+00:00'}, {'field': '!dislike_number', 'value': '123', 'timestamp': '2025-08-30T02:24:35.106183+00:00'}, {'field': 'lease_term', 'value': '36', 'timestamp': '2025-08-30T02:24:35.106183+00:00'}]
Dislike unstructured: [{'field': 'dislike_field', 'value': 'Some text', 'timestamp': '2025-08-30T02:24:35.106183+00:00'}]
Dislike structured: [{'field': 'dislike_number', 'value': '123', 'timestamp': '2025-08-30T02:24:35.106183+00:00'}]


In [207]:
import pandas as pd
from typing import Optional, List, Dict, Any
from scipy import sparse
from dataclasses import dataclass
import math

In [208]:
from sklearn.preprocessing import RobustScaler, OneHotEncoder
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.neighbors import NearestNeighbors
from datetime import datetime

In [209]:
vehicle_df = pd.read_csv("vehicle_data.csv")
contract_df = pd.read_csv("contract_data.csv")
product_df = pd.read_csv("leasing_data.csv")


In [210]:
import numpy as np
from faker import Faker
fake = Faker()
num_quotes = 20
quotes = pd.DataFrame({
    "Vehicle ID": np.random.choice(vehicle_df["Vehicle ID"], num_quotes),
    "Product ID": np.random.choice(product_df["Product ID"], num_quotes),
    "User ID": np.random.choice(contract_df["Customer ID"], num_quotes),
    "Discount": np.random.choice(["Yes", "No"], num_quotes, p=[0.3, 0.7]),
    "Quote Value": np.random.randint(10000, 50000, num_quotes),
    "Created Date": [fake.date_time_between(start_date="-1y", end_date="now") for _ in range(num_quotes)]

})

print(quotes.head(10))


  Vehicle ID Product ID  User ID Discount  Quote Value        Created Date
0      V1027      P1024     1006      Yes        41541 2025-05-20 03:18:05
1      V1038      P1027     1001       No        38055 2025-03-11 03:15:20
2      V1063      P1036     1002       No        32310 2025-08-19 18:46:36
3      V1048      P1030     1016      Yes        20550 2025-05-07 05:54:07
4      V1068      P1008     1019       No        39591 2024-09-22 02:01:05
5      V1065      P1002     1008      Yes        41005 2025-02-15 09:46:05
6      V1072      P1010     1010       No        42755 2025-06-03 21:21:54
7      V1055      P1025     1012       No        18958 2025-07-10 14:56:52
8      V1003      P1015     1018       No        30268 2025-06-23 09:53:49
9      V1068      P1023     1017      Yes        13631 2024-12-10 08:53:13


In [211]:
def _time_decay_weight(days: np.ndarray, half_life: float) -> np.ndarray:
    # w = 0.5 ** (days / half_life)
    return np.power(0.5, days / np.maximum(half_life, 1e-6))


In [212]:
import pandas as pd
import numpy as np
from scipy import sparse
from sklearn.preprocessing import OneHotEncoder, RobustScaler
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.neighbors import NearestNeighbors
from datetime import datetime

def _time_decay_weight(days_ago: np.ndarray, half_life: float = 30.0) -> np.ndarray:
    """Exponential decay weight: weight = 0.5 ** (days_ago / half_life)"""
    return 0.5 ** (days_ago / half_life)

def _normalize_scores(scores: np.ndarray) -> np.ndarray:
    if scores.max() - scores.min() > 0:
        return (scores - scores.min()) / (scores.max() - scores.min())
    return scores

class HybridConfig:
    k_final: int = 5
    k_similar_users: int = 5
    w_content: float = 0.4
    w_cf: float = 0.4
    w_simuser: float = 0.2
    history_alpha: float = 0.7
    recent_days_boost_half_life: float = 30.0
    normalize_component_scores: bool = True

class HybridRecommenderSystemWithClicks:
    def __init__(self, vehicles: pd.DataFrame, plans: pd.DataFrame, contracts: pd.DataFrame,
                 quotes: pd.DataFrame, clicks: Optional[pd.DataFrame] = None,
                 config: Optional[HybridConfig] = None):
        self.vehicles = vehicles.copy()
        self.plans = plans.copy()
        self.contracts = contracts.copy()
        self.quotes = quotes.copy()
        self.cfg = config or HybridConfig()
        self.today = datetime.now()
        self.contracts.rename(columns={"Customer ID":"User ID"},inplace=True)
        self.clicks = clicks.copy() if clicks is not None else pd.DataFrame(
            columns=["User ID", "Vehicle ID", "Product ID", "Clicked Date", "Rec Type"]
        )
        self._prepare_indices()
        self._build_content_encoders()
        self._build_behavior_matrices()
        self._fit_similar_user_index()

    def _prepare_indices(self):
        # Ensure IDs are strings
        self.vehicles["Vehicle ID"] = self.vehicles["Vehicle ID"].astype(str)
        self.plans["Product ID"] = self.plans["Product ID"].astype(str)
        self.contracts["User ID"] = self.contracts["User ID"].astype(str)
        self.contracts["Vehicle ID"] = self.contracts["Vehicle ID"].astype(str)
        self.contracts["Product ID"] = self.contracts["Product ID"].astype(str)
        self.quotes["User ID"] = self.quotes["User ID"].astype(str)
        self.quotes["Vehicle ID"] = self.quotes["Vehicle ID"].astype(str)
        self.quotes["Product ID"] = self.quotes["Product ID"].astype(str)
        self.vehicle_index = {vid: i for i, vid in enumerate(self.vehicles["Vehicle ID"].tolist())}
        self.plan_index = {pid: i for i, pid in enumerate(self.plans["Product ID"].tolist())}

    def _build_content_encoders(self):
        # Vehicles
        veh_struct_cols = [c for c in ["Year", "Price", "Mileage", "Horsepower"] if c in self.vehicles]
        veh_cat_cols = [c for c in ["Country", "Make", "Model", "Gear Type", "Fuel", "Preowned", "Currency"] if c in self.vehicles]
        self.veh_scaler = RobustScaler() if veh_struct_cols else None
        self.veh_ohe = OneHotEncoder(handle_unknown="ignore", sparse_output=True) if veh_cat_cols else None
        parts = []
        if self.veh_scaler:
            parts.append(self.veh_scaler.fit_transform(self.vehicles[veh_struct_cols].fillna(0.0)))
        if self.veh_ohe:
            parts.append(self.veh_ohe.fit_transform(self.vehicles[veh_cat_cols].astype(str)))
        self.veh_content_matrix = sparse.hstack(parts).tocsr() if parts else sparse.csr_matrix((len(self.vehicles), 0))

        # Plans
        plan_struct_cols = [c for c in ["Lease Term"] if c in self.plans]
        plan_cat_cols = [c for c in ["Product Name","Flexi Lease","Tax Saving Plan","Renewal Cycle","Maintenance Type"] if c in self.plans]
        plan_text_col = "Short Description" if "Short Description" in self.plans else None
        self.plan_scaler = RobustScaler() if plan_struct_cols else None
        self.plan_ohe = OneHotEncoder(handle_unknown="ignore", sparse_output=True) if plan_cat_cols else None
        self.plan_tfidf = TfidfVectorizer(max_features=20000, ngram_range=(1,2)) if plan_text_col else None
        parts = []
        if self.plan_scaler:
            parts.append(self.plan_scaler.fit_transform(self.plans[plan_struct_cols].fillna(0.0)))
        if self.plan_ohe:
            parts.append(self.plan_ohe.fit_transform(self.plans[plan_cat_cols].astype(str)))
        if self.plan_tfidf:
            parts.append(self.plan_tfidf.fit_transform(self.plans[plan_text_col].fillna("")))
        self.plan_content_matrix = sparse.hstack(parts).tocsr() if parts else sparse.csr_matrix((len(self.plans), 0))

    def _build_behavior_matrices(self):
        # Build userâ†”vehicle feedback from quotes/contracts
        q = self.quotes.copy()
        q["Days Ago"] = (self.today - pd.to_datetime(q["Created Date"])).dt.days.clip(lower=0)
        q["Decay"] = _time_decay_weight(q["Days Ago"].to_numpy(), self.cfg.recent_days_boost_half_life)
        q["Signal"] = 4 * q["Decay"]
        c = self.contracts.copy()
        c["Signal"] = 8.0
        fb = pd.concat([q[["User ID","Vehicle ID","Product ID","Signal"]], c[["User ID","Vehicle ID","Product ID","Signal"]]], ignore_index=True)
        self.feed=fb
        fb["User ID"] = fb["User ID"].astype(str)
        self.user_ids = sorted(fb["User ID"].unique().tolist())
        self.user_index = {uid:i for i, uid in enumerate(self.user_ids)}
        rows_u = fb["User ID"].map(self.user_index).to_numpy()
        cols_v = fb["Vehicle ID"].map(self.vehicle_index)
        cols_p = fb["Product ID"].map(self.plan_index)
        vals = fb["Signal"].to_numpy(dtype=float)
        valid_v = cols_v.notna()
        valid_p = cols_p.notna()
        self.user_vehicle_matrix = sparse.coo_matrix((vals[valid_v], (rows_u[valid_v], cols_v[valid_v].astype(int))),
                                                     shape=(len(self.user_ids), len(self.vehicles))).tocsr()
        self.user_plan_matrix = sparse.coo_matrix((vals[valid_p], (rows_u[valid_p], cols_p[valid_p].astype(int))),
                                                  shape=(len(self.user_ids), len(self.plans))).tocsr()
        self.user_vehicle_centroids = self._compute_user_centroids(self.user_vehicle_matrix, self.veh_content_matrix)
        self.user_plan_centroids = self._compute_user_centroids(self.user_plan_matrix, self.plan_content_matrix)

    @staticmethod
    def _compute_user_centroids(user_item: sparse.csr_matrix, item_content: sparse.csr_matrix) -> sparse.csr_matrix:
        row_sums = np.array(user_item.sum(axis=1)).ravel()
        row_sums[row_sums==0] = 1.0
        return sparse.diags(1.0/row_sums) @ user_item @ item_content

    def _fit_similar_user_index(self):
        if not self.user_ids:
            self.user_knn = None
            return
        veh_dense = self.user_vehicle_matrix.toarray()
        plan_dense = self.user_plan_matrix.toarray()
        user_vecs = np.hstack([veh_dense, plan_dense])
        self.user_knn = NearestNeighbors(n_neighbors=min(self.cfg.k_similar_users, max(1, user_vecs.shape[0]-1)), metric="cosine")
        self.user_knn.fit(user_vecs)
        self._user_vecs_cache = user_vecs

    def _cf_scores(self, user_id: str, item_matrix: sparse.csr_matrix, interactions: sparse.csr_matrix) -> np.ndarray:
        if self.user_knn is None or user_id not in self.user_index:
            return np.zeros(item_matrix.shape[0])
        uid = self.user_index[user_id]
        dist, idx = self.user_knn.kneighbors(self._user_vecs_cache[uid:uid+1], return_distance=True)
        sims = 1.0 - dist.ravel()
        neighbor_rows = interactions[idx.ravel(), :]
        scores = sims @ neighbor_rows.toarray()
        return scores

    def _content_scores(self, user_id: str, item_content: sparse.csr_matrix, user_centroid: sparse.csr_matrix) -> np.ndarray:
        if item_content.shape[1]==0:
            return np.zeros(item_content.shape[0])
        target = user_centroid
        A = item_content
        A_norm = np.sqrt(A.multiply(A).sum(axis=1)).A1 + 1e-9
        t_norm = math.sqrt((target.multiply(target)).sum()) + 1e-9
        dots = (A @ target.T).toarray().ravel()
        sims = dots / (A_norm * t_norm)
        return sims

    def _similar_user_popularity(self, user_id: str, item_type: str) -> np.ndarray:
        if self.user_knn is None or user_id not in self.user_index:
            return np.zeros(len(self.vehicles) if item_type=="Vehicle" else len(self.plans))
        uid = self.user_index[user_id]
        _, idx = self.user_knn.kneighbors(self._user_vecs_cache[uid:uid+1], return_distance=True)
        idx = idx.ravel()
        neigh_mat = self.user_vehicle_matrix[idx,:] if item_type=="Vehicle" else self.user_plan_matrix[idx,:]
        pop = neigh_mat.toarray().sum(axis=0)
        return _normalize_scores(pop)

    def recommend_vehicles_separate(self, user_id: str):
        """Return separate lists for content/cf/popularity scores, sorted by score descending."""
        user_row = self.user_index.get(str(user_id), None)
        user_centroid = self.user_vehicle_centroids[user_row:user_row+1] if user_row is not None else sparse.csr_matrix((1, self.veh_content_matrix.shape[1]))
        
        # Compute scores
        content = self._content_scores(user_id, self.veh_content_matrix, user_centroid)
        cf = self._cf_scores(user_id, self.veh_content_matrix, self.user_vehicle_matrix)
        pop = self._similar_user_popularity(user_id, "Vehicle")
        
        # Build sorted lists
        content_based = sorted(
            [{"Vehicle ID": vid, "score": float(score)} for vid, score in zip(self.vehicles["Vehicle ID"], content)],
            key=lambda x: -x["score"]
        )
        cf_based = sorted(
            [{"Vehicle ID": vid, "score": float(score)} for vid, score in zip(self.vehicles["Vehicle ID"], cf)],
            key=lambda x: -x["score"]
        )
        popularity_based = sorted(
            [{"Vehicle ID": vid, "score": float(score)} for vid, score in zip(self.vehicles["Vehicle ID"], pop)],
            key=lambda x: -x["score"]
        )
        
        return {
            "content_based": content_based,
            "cf_based": cf_based,
            "popularity_based": popularity_based,
        }

    def register_click(self, user_id: str, vehicle_id: Optional[str] = None, rec_type: str = "content"):
        """Log a user click."""
        self.clicks = pd.concat([
            self.clicks,
            pd.DataFrame([{
                "User ID": user_id,
                "Vehicle ID": vehicle_id if vehicle_id else -1,
                "Product ID": -1,
                "Clicked Date": pd.Timestamp.now(),
                "Rec Type": rec_type
            }])
        ], ignore_index=True)

    def recommend_vehicles_by_preference(self, user_id: str):
        """Return recommendations ordered by click-inferred user preference with time decay."""
        recs = self.recommend_vehicles_separate(user_id)
        # Compute decayed clicks
        user_clicks = self.clicks[self.clicks["User ID"]==user_id].copy()
        if not user_clicks.empty:
            days_ago = (pd.Timestamp.now() - user_clicks["Clicked Date"]).dt.total_seconds()/86400
            user_clicks["decay_weight"] = 0.5 ** (days_ago / self.cfg.recent_days_boost_half_life)
            pref_scores = user_clicks.groupby("rec_type")["decay_weight"].sum().to_dict()
            rec_order = sorted(["content_based","cf_based","popularity_based"], key=lambda x: -pref_scores.get(x.split("_")[0],0))
        else:
            rec_order = ["content_based","cf_based","popularity_based"]
        sorted_recs = []
        for rtype in rec_order:
            sorted_recs.extend(sorted(recs[rtype], key=lambda x: -x["score"]))
        return sorted_recs

In [213]:
hybrid_rec_sys=HybridRecommenderSystemWithClicks(vehicles=vehicle_df,
                        contracts=contract_df,
                        plans=product_df,
                        quotes=quotes)


In [None]:
def get_most_popular(quotes_df,contract_df,vehicles_df,products_df,request_type,request_data,
                     ):

    if ((request_data=="Vehicle") and (request_type=="quote")):
        top_quoted_vehicles = (
        quotes_df['Vehicle ID']
        .value_counts()
        .head(5)
        .reset_index()
        .merge(vehicles_df, left_on='index', right_on='Vehicle ID')
        )
        return top_quoted_vehicles
    elif ((request_data=="Product") and (request_type=="quote")):
        top_quoted_products = (
        quotes_df['Product ID']
        .value_counts()
        .head(5)
        .reset_index()
        .merge(products_df, left_on='index', right_on='Product ID')
        )
        return top_quoted_products
    elif ((request_data=="Vehicle") and (request_type=="contract")):
        top_contract_vehicles = (
        contract_df['Vehicle ID']
        .value_counts()
        .head(5)
        .reset_index()
        .merge(vehicles_df, left_on='index', right_on='Vehicle ID')
        )
        return top_contract_vehicles
    elif ((request_data=="Product") and (request_type=="contract")):
        top_contract_products = (
        contract_df['Product ID']
        .value_counts()
        .head(5)
        .reset_index()
        .merge(products_df, left_on='index', right_on='Product ID')
        )
        return top_contract_products

In [None]:
veh

In [None]:
def convert_to_customer(user_id,country_id,request_type,request_data,vehicle_index,vehicle_matrix,product_index,product_matrix,vehicle_df, product_df, contract_df,quote_df):
        # 1. Get last quoted item for the user
        user_quotes = quote_df[quote_df['User ID'] == user_id]
        last_quote = user_quotes.sort_values('Created Date', ascending=False).iloc[0]
        last_vehicle_id = last_quote['Vehicle ID']
        last_product_id = last_quote['Product ID']
        last_vehicle = vehicle_df[vehicle_df["Vehicle ID"]==last_vehicle_id]
        last_product = product_df[product_df["Product ID"]==last_product_id]
        last_price = last_vehicle["Price"]  # last quoted price
        last_country = last_vehicle["Country"]
        last_lease_term = last_product["Lease Term"]  # last quoted price
        vehicle_id_list = contract_df["Vehicle ID"]
        product_id_list = contract_df["Product ID"]
        target_vehicle_embedding = vehicle_matrix[vehicle_index[last_vehicle]]
        target_product_embedding = vehicle_matrix[vehicle_index[last_product]]

        if ((request_data=="Vehicle") and (request_type=="quote")):
                candidates = vehicle_df[(vehicle_df["Country"]==last_country)&(vehicle_df["Price"]<=last_price)]["Vehicle ID"].tolist()
                candidates_id =[]
                for id in candidates:
                        candidates_id.append(vehicle_index[id])
                candidates_embedding = vehicle_matrix[candidates]
                recommender_knn = NearestNeighbors(n_neighbors=2, metric="cosine")        
                recommender_knn.fit(candidates_embedding)
                idx = recommender_knn.kneighbors(target_vehicle_embedding )
                return vehicle_df[vehicle_df["Vehicle ID"].isin(idx)]
                
        elif ((request_data=="Product") and (request_type=="quote")):
                candidates = product_df[product_df["Lease Term"]==last_lease_term].tolist()
                candidates_id =[]
                for id in candidates:
                        candidates_id.append(product_index[id])
                candidates_embedding = product_matrix[candidates]
                recommender_knn = NearestNeighbors(n_neighbors=2, metric="cosine")        
                recommender_knn.fit(candidates_embedding)
                idx = recommender_knn.kneighbors(target_product_embedding)
                return product_df[product_df["Product ID"].isin(idx)]


        elif ((request_data=="Vehicle") and (request_type=="contract")):
                candidates= vehicle_df[(vehicle_df["Vehicle ID"].isin(vehicle_id_list))&(vehicle_df["Country"]==last_country)&(vehicle_df["Price"]<=last_price)].tolist()
                candidates_id =[]
                for id in candidates:
                        candidates_id.append(vehicle_index[id])
                candidates_embedding = vehicle_matrix[candidates]
                recommender_knn = NearestNeighbors(n_neighbors=2, metric="cosine")        
                recommender_knn.fit(candidates_embedding)
                idx = recommender_knn.kneighbors(target_vehicle_embedding )
                return vehicle_df[vehicle_df["Vehicle ID"].isin(idx)]
                
                
        elif ((request_data=="Product") and (request_type=="contract")):
                candidates = product_df[(product_df["Product ID"].isin(product_id_list))&product_df["Lease Term"]==last_lease_term].tolist()
                candidates_id =[]
                for id in candidates:
                        candidates_id.append(product_index[id])
                candidates_embedding = product_matrix[candidates]
                recommender_knn = NearestNeighbors(n_neighbors=2, metric="cosine")        
                recommender_knn.fit(candidates_embedding)
                idx = recommender_knn.kneighbors(target_product_embedding )
                return product_df[product_df["Product ID"].isin(idx)]


    

In [193]:
hybrid_rec_sys=HybridRecommenderSystemWithClicks(vehicles=vehicle_df,
                        contracts=contract_df,
                        plans=product_df,
                        quotes=quotes)


Unnamed: 0_level_0,Contract ID,Customer ID,Existing Customer,Product ID,Monthly EMI,Lease Start Date,Lease Expiry Date,Road Assistance,Maintenance,Discount Applied,Preferred Customer,Summary
Vehicle ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
V1004,1,1,1,1,1,1,1,1,1,1,1,1
V1014,1,1,1,1,1,1,1,1,1,1,1,1
V1021,1,1,1,1,1,1,1,1,1,1,1,1
V1025,1,1,1,1,1,1,1,1,1,1,1,1
V1026,1,1,1,1,1,1,1,1,1,1,1,1
V1027,1,1,1,1,1,1,1,1,1,1,1,1
V1028,1,1,1,1,1,1,1,1,1,1,1,1
V1031,1,1,1,1,1,1,1,1,1,1,1,1
V1032,1,1,1,1,1,1,1,1,1,1,1,1
V1034,1,1,1,1,1,1,1,1,1,1,1,1


In [205]:
vehicle_df[vehicle_df['Vehicle ID']=='V1003']

Unnamed: 0,Vehicle ID,Country,Make,Model,Year,Mileage,Fuel,Gear Type,Horsepower,Price,Currency,Preowned,Inserted Date,Summary
3,V1003,Tokelau,Audi,A4,2018,7.5,EV,Manual,204,46611,EUR,Yes,2024-05-17,"Country: Tokelau, Make: Audi, Model: A4, Year:..."


In [203]:
vehicle_df[vehicle_df['Vehicle ID']=='V1040']

Unnamed: 0,Vehicle ID,Country,Make,Model,Year,Mileage,Fuel,Gear Type,Horsepower,Price,Currency,Preowned,Inserted Date,Summary
40,V1040,Tokelau,Ford,F-150 Lightning (EV),2006,5.9,EV,Automatic,143,44228,INR,Yes,2024-07-30,"Country: Tokelau, Make: Ford, Model: F-150 Lig..."


In [194]:
hybrid_rec_sys.recommend_vehicles_separate(1009)

{'content_based': [{'Vehicle ID': 'V1040', 'score': 0.5836737868058364},
  {'Vehicle ID': 'V1055', 'score': 0.5832512211404888},
  {'Vehicle ID': 'V1085', 'score': 0.5823091216812781},
  {'Vehicle ID': 'V1082', 'score': 0.5523296113165355},
  {'Vehicle ID': 'V1003', 'score': 0.5483336731554196},
  {'Vehicle ID': 'V1039', 'score': 0.5473593207370223},
  {'Vehicle ID': 'V1093', 'score': 0.5463740956725327},
  {'Vehicle ID': 'V1036', 'score': 0.5446013982597117},
  {'Vehicle ID': 'V1073', 'score': 0.5405393443922164},
  {'Vehicle ID': 'V1070', 'score': 0.5350511049550761},
  {'Vehicle ID': 'V1069', 'score': 0.5339858084387035},
  {'Vehicle ID': 'V1072', 'score': 0.532631764619689},
  {'Vehicle ID': 'V1028', 'score': 0.5234434715840728},
  {'Vehicle ID': 'V1002', 'score': 0.5197416317347422},
  {'Vehicle ID': 'V1015', 'score': 0.5139975331802071},
  {'Vehicle ID': 'V1046', 'score': 0.509503517242877},
  {'Vehicle ID': 'V1080', 'score': 0.5027962466711192},
  {'Vehicle ID': 'V1029', 'score'