In [1]:
import sys
sys.path.append("..")
import pandas as pd
import numpy as np
from src.transformers import *
from src.user_item_funcs import *
from src.item_item_funcs import *
from sklearn.pipeline import Pipeline
from sklearn.metrics.pairwise import cosine_similarity
%autosave 60

Autosaving every 60 seconds


### Loading data

In [2]:
train = pd.read_csv("../data/processed/train.csv", sep=",")
test = pd.read_csv("../data/processed/test.csv", sep=",")
print(train.shape)

(16728, 55)


## User-Item Similarity

### Get User Preferences

In [3]:
#Make UI here
# if you want to add more preferences. Go to user_item_funcs.py.
#For the numerical ones add the ranges and bins and add them to pref_dict_num dictionary with column name as key.
#For the categorical ones add the categories/ranges and it to pref_dict_cat with column name as key.
user_pref_dict = {'price':'', 'depreciation':'$11k to $12k /yr',
                  'type_of_vehicle':'luxury sedan'}

### Preparing User Row and Dataframe for similarities

In [4]:
user_pref_cols = get_user_pref_cols(user_pref_dict)

df_train_user_pref_cols = train.loc[:, ['listing_id']+user_pref_cols]


df_prepared_num = prepare_df_num(df_train_user_pref_cols, user_pref_dict, pref_dict_num)


user_row_dict = prepare_user_row_dict(df_prepared_num, user_pref_dict, pref_dict_num, pref_dict_cat)

df_prepared_normalized = normalize_df(df_prepared_num, user_pref_cols, pref_dict_num)
df_prepared_normalized.head(1)

ValueError: '' is not in list

In [None]:
df_prepared = prepare_df_cat(df_prepared_num, user_pref_dict, pref_dict_cat)
df_prepared.head(1)
df_prepared_normalized = prepare_df_cat(df_prepared_normalized, user_pref_dict, pref_dict_cat)
df_prepared_normalized.head(1)

In [None]:
df_user_row, cols_to_be_dropped = get_user_row(user_row_dict, pref_dict_num, pref_dict_cat, df_prepared_normalized)
df_user_row

#### Dropping range columns

In [None]:
df_prepared_final = df_prepared_normalized.drop(cols_to_be_dropped, axis=1)
df_user_row_final = df_user_row.drop(cols_to_be_dropped, axis=1)
df_prepared_final.head(1)

In [None]:
df_user_row_final.head(1)

### Finding similarity between custom user's item and other items

In [None]:
x_user = df_user_row_final['listing_id'].to_numpy()
y_user = df_user_row_final.iloc[:,1:].to_numpy()
print(x_user.shape,y_user.shape)
x_items = df_prepared_final['listing_id'].to_numpy()
y_items = df_prepared_final.iloc[:,1:].to_numpy()
print(x_items.shape, y_items.shape)
similarity_user_item = cosine_similarity(y_user, y_items)[0]

In [None]:
ind_ordered = np.argsort(similarity_user_item)[::-1]

### Top k recommendations as per User-Item similarity

In [None]:
df_top_user_item = get_top_recommendations_user_item(similarity_user_item, ind_ordered, x_items, y_items, 10, train)
print('user_pref_dict: ', user_pref_dict)
df_top_user_item.loc[:, ['listing_id', 'make', 'model', 'depreciation', 'price', 'type_of_vehicle']]

### Choose a Car from the recommended list

In [None]:
#Suppose the user chooses listing_id=1023716 from the above list. 
#We apply Item-item similarity on the chosen car and all other cars.
#Make UI here

## Item-Item Similarity

### Prepare DataFrame for item-item similarity

In [None]:
df_recommend_ii = train.loc[:, ['listing_id','make', 'vehicle_age', 'type_of_vehicle', 'depreciation',
                   'dereg_value', 'mileage', 'price', 'engine_cap',  'fuel_type_diesel',
                   'fuel_type_petrol-electric', 'fuel_type_petrol', 'fuel_type_electric','transmission_auto',
                   'transmission_manual', 'brand_rank']]

# cols_to_be_normalized = ['vehicle_age', 'depreciation', 'dereg_value', 'mileage', 'engine_cap', 'price']
cols_to_be_normalized = ['vehicle_age', 'depreciation', 'dereg_value', 'mileage', 'price']
df_normalized_ii = get_normalized_cols_item_item(df_recommend_ii, cols_to_be_normalized)
df_normalized_ii.head(2)

In [None]:
# df_recommend_ii.loc[:, ['vehicle_age', 'depreciation', 'dereg_value', 'mileage', 'engine_cap', 'price']] = df_normalized_ii
df_recommend_ii.loc[:, ['vehicle_age', 'depreciation', 'dereg_value', 'mileage', 'price']] = df_normalized_ii
df_recommend_ii.head(1)

### Convert Categorical Columns to One Hot Encoding

In [None]:
df_transformed_ii = pd.get_dummies(df_recommend_ii, columns = ['make', 'type_of_vehicle', 'brand_rank'])
df_transformed_ii.head(2)

### Computing Item-Item similarities

In [None]:
x_items_ii = df_transformed_ii['listing_id'].to_numpy()
y_items_ii = df_transformed_ii.iloc[:,1:].to_numpy()
#Suppose the user chooses listing_id=1023716 from the above list.
listing_id_chosen = 1023716
x_chosen = listing_id_chosen
y_chosen = df_transformed_ii[df_transformed_ii['listing_id'] == 1023716].iloc[:,1:].to_numpy()

In [None]:
similarity_item_item = cosine_similarity(y_chosen, y_items_ii)[0]
indices_ii_ordered = np.argsort(similarity_item_item)[::-1]

In [None]:
print('similarity_item_item: ', similarity_item_item)
print('indices_ii_ordered: ', indices_ii_ordered)
print(similarity_item_item[indices_ii_ordered])

In [None]:
# k=11 as the result includes the given listing_id row too.
df_top_ii = get_top_recommendations_ii(similarity_item_item, indices_ii_ordered, x_items_ii, y_items_ii, 11, train)

In [None]:
train[train.listing_id == 1023716].loc[:, ['listing_id','make', 'model', 'vehicle_age', 'type_of_vehicle', 'depreciation',
                   'dereg_value', 'mileage', 'price', 'engine_cap',  'fuel_type','transmission', 'brand_rank']]

In [None]:
# Remove the given listing_id from recommendations 
df_top_ii.loc[:, ['listing_id','make', 'model', 'vehicle_age', 'type_of_vehicle', 'depreciation',
                   'dereg_value', 'mileage', 'price', 'engine_cap',  'fuel_type','transmission', 'brand_rank']]