# Task 2: Recommendation Engine

## Setting up the Notebook

In [30]:
import random
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from pandas import Series
from src.recommendation import normalize_numerical_features, encode_categorical_features, get_user_profile, calculate_cos_similar_for_all_items
%matplotlib inline

## Load the Data

In [2]:
car_resale_simplified_dataset = pd.read_csv('./data/preprocessed_recommendation_data.csv')

car_resale_simplified_dataset.head()


Unnamed: 0,listing_id,make,manufactured,reg_date,type_of_vehicle,transmission,power,engine_cap,no_of_owners,depreciation,coe,road_tax,dereg_value,mileage,omv,arf,price
0,1030324,bmw,2013.0,2013.0,luxury sedan,auto,135.0,1997.0,1.0,17700.0,77100.0,1210.0,47514.0,73000.0,45330.0,50462.0,71300.0
1,1026909,mercedes-benz,2016.0,2016.0,luxury sedan,auto,90.0,1595.0,1.0,15070.0,53694.0,740.0,44517.0,80000.0,27886.0,26041.0,95500.0
2,1019371,mercedes-benz,2019.0,2020.0,luxury sedan,auto,115.0,1497.0,1.0,16400.0,40690.0,684.0,80301.0,9800.0,46412.0,56977.0,197900.0
3,1031014,honda,2019.0,2019.0,mid-sized sedan,auto,92.0,1597.0,1.0,10450.0,26667.0,742.0,36453.0,40000.0,20072.0,20101.0,103200.0
4,1012998,volvo,2015.0,2015.0,hatchback,auto,90.0,1498.0,3.0,11020.0,56001.0,684.0,37311.0,77777.0,22809.0,18933.0,62500.0


## Content-based Recommendation

Once we obtain **filtered data** and **scores**, we are able to compute the recommendation list. The **process of recommendation** is shown below:

1. **Create user profile**
  - First we normalize the numerical features and use one-hot encoding to encode the categorical features. Then we use the processed dataframe to create user profile according to the scores given above.
  
2. **Compute cosine similarity**
  - Given the user profile, we calculate the cosine similarity of all items in the filtered data.
  
3. **Select top k items**
  - According to the cosine similarity, we choose the top k items to be the return.

## Get top-k recommendations

In [9]:
def get_top_recommendations(k, **kwargs):
    filtered_data = kwargs.get("filtered_data")
    listingid_score_dict = kwargs.get("listingid_score_dict")
    
    # filtered data processing: normalize and one-hot encode
    processed_df = normalize_numerical_features(filtered_data)
    processed_df = encode_categorical_features(processed_df)
    
    # get the number of features
    non_zero_column_num = filtered_data.shape[1] - 1
    
    # get user profile
    user_profile = get_user_profile(listingid_score_dict, processed_df, non_zero_column_num)
    
    # compute similarity for all items in filtered data
    similarity_list = calculate_cos_similar_for_all_items(user_profile=user_profile, user_item_index=list(listingid_score_dict.keys()), df=processed_df)
    sorted_list = sorted(similarity_list,key=lambda t:t[0], reverse=True)
    
    # return top k items
    top_k = sorted_list[:k]
    id_list = []
    for x in top_k:
        id_list.append(x[1])
    recommend_items = filtered_data.set_index('listing_id').loc[id_list].reset_index(inplace=False)
    return recommend_items

## Testing the Recommendation Engine

This will be the main part of your notebook to allow for testing your solutions. Most basically, for a given listing (defined by the row id in your input dataframe), we would like to see the recommendations you make. So however you set up your notebook, it should have at least a comparable section that will allow us to run your solution for different inputs.

### Please input some filtering conditions:
'make', 'manufactured', 'reg_date', 'type_of_vehicle', 'transmission', 'power', 'engine_cap', 'no_of_owners', 'depreciation', 'coe', 'road_tax', 'dereg_value', 'mileage', 'omv', 'arf', 'price'

In [3]:
user_select_data = car_resale_simplified_dataset[(car_resale_simplified_dataset.price < 80000) & (car_resale_simplified_dataset.make == "bmw")]
user_select_data.head()

Unnamed: 0,listing_id,make,manufactured,reg_date,type_of_vehicle,transmission,power,engine_cap,no_of_owners,depreciation,coe,road_tax,dereg_value,mileage,omv,arf,price
0,1030324,bmw,2013.0,2013.0,luxury sedan,auto,135.0,1997.0,1.0,17700.0,77100.0,1210.0,47514.0,73000.0,45330.0,50462.0,71300.0
31,1023935,bmw,2012.0,2012.0,hatchback,auto,100.0,1598.0,2.0,27010.0,56501.0,742.0,17432.0,67000.0,27217.0,27217.0,35100.0
54,1024395,bmw,2013.0,2013.0,luxury sedan,auto,100.0,1598.0,2.0,15130.0,79223.0,742.0,31297.0,80000.0,31108.0,25552.0,48900.0
77,991635,bmw,2015.0,2016.0,luxury sedan,auto,100.0,1499.0,2.0,12710.0,44001.0,684.0,34770.0,116000.0,26979.0,24771.0,76800.0
144,1008086,bmw,2016.0,2016.0,hatchback,auto,85.0,1496.0,2.0,11800.0,49501.0,1082.0,29244.0,79000.0,23325.0,9655.0,67000.0


### Please give some scores according to the format

You are free to randomly or manually generate scores.

In [44]:
# randomly generate scores
n = 10
user_select_data = car_resale_simplified_dataset[(car_resale_simplified_dataset.price < 80000) 
                                                 & (car_resale_simplified_dataset.make == "bmw")]
user_item = user_select_data.sample(n=n)
user_item_index = user_item['listing_id'].tolist()
scores = [random.randint(1, 10) for _ in range(10)]

listingid_score_dict = {user_item_index[i]: scores[i] for i in range(n)}

# manually generate scores, e.g., 
# listingid_score_dict = {1022168: 7, 1027881: 7, 1022232: 10, 1024104: 10, 1012269: 5, 
#                         1030310: 5, 1026706: 6, 1003806: 9, 1017825: 7, 1022889: 6}

print(f"Your score list is: {listingid_score_dict}")
car_resale_simplified_dataset[car_resale_simplified_dataset['listing_id'].isin(listingid_score_dict.keys())]

Your score list is: {1000510: 2, 1028657: 3, 1030801: 3, 1010238: 1, 968690: 4, 881524: 9, 1021289: 6, 1025670: 9, 1014502: 9, 1024892: 7}


Unnamed: 0,listing_id,make,manufactured,reg_date,type_of_vehicle,transmission,power,engine_cap,no_of_owners,depreciation,coe,road_tax,dereg_value,mileage,omv,arf,price
693,1014502,bmw,2016.0,2016.0,hatchback,auto,85.0,1496.0,2.0,11740.0,56889.0,1082.0,36240.0,53456.0,25133.0,12187.0,74700.0
917,881524,bmw,2011.0,2012.0,luxury sedan,auto,135.0,1997.0,1.0,57390.0,84590.0,1210.0,26839.0,33000.0,42326.0,42326.0,66000.0
934,968690,bmw,2013.0,2014.0,luxury sedan,auto,135.0,1997.0,2.0,16120.0,65301.0,1210.0,49952.0,112000.0,46697.0,52376.0,79100.0
2388,1030801,bmw,2013.0,2013.0,luxury sedan,auto,100.0,1598.0,2.0,14150.0,62497.0,742.0,26080.0,75000.0,33029.0,28241.0,41800.0
3594,1025670,bmw,2006.0,2008.0,sports car,auto,130.0,2496.0,6.0,9000.0,40881.0,2512.0,27217.0,118000.0,23120.0,23120.0,65900.0
4448,1010238,bmw,2015.0,2015.0,luxury sedan,auto,100.0,1499.0,6.0,13750.0,69001.0,684.0,44339.0,84000.0,29417.0,28184.0,75700.0
4723,1024892,bmw,2015.0,2015.0,hatchback,auto,100.0,1499.0,3.0,11400.0,71509.0,684.0,39555.0,94128.0,26097.0,18536.0,58400.0
8353,1021289,bmw,2012.0,2012.0,luxury sedan,auto,135.0,1997.0,2.0,18100.0,89990.0,1210.0,32935.0,149834.0,40954.0,40954.0,46000.0
8844,1000510,bmw,2008.0,2008.0,sports car,auto,160.0,2497.0,5.0,8950.0,32121.0,2335.0,23321.0,170000.0,48714.0,48714.0,71500.0
9701,1028657,bmw,2015.0,2015.0,luxury sedan,auto,100.0,1598.0,1.0,14300.0,78001.0,742.0,40405.0,73900.0,26591.0,19228.0,69100.0


In [36]:
get_top_recommendations(10, filtered_data=user_select_data, listingid_score_dict=listingid_score_dict)

Unnamed: 0,listing_id,make,manufactured,reg_date,type_of_vehicle,transmission,power,engine_cap,no_of_owners,depreciation,coe,road_tax,dereg_value,mileage,omv,arf,price
0,1029653.0,bmw,2012.0,2012.0,hatchback,auto,100.0,1598.0,3.0,26950.0,56501.0,742.0,16677.0,115000.0,27348.0,27348.0,30800.0
1,1024481.0,bmw,2012.0,2012.0,hatchback,auto,125.0,1598.0,5.0,20830.0,59003.0,742.0,22586.0,128000.0,29780.0,29780.0,40500.0
2,1013555.0,bmw,2012.0,2013.0,hatchback,auto,100.0,1598.0,4.0,19750.0,64209.0,742.0,20777.0,110000.0,26590.0,19226.0,45100.0
3,1024792.0,bmw,2012.0,2012.0,hatchback,auto,100.0,1598.0,2.0,18430.0,59003.0,742.0,18396.0,108888.0,26995.0,26995.0,31700.0
4,1023345.0,bmw,2012.0,2012.0,hatchback,auto,100.0,1598.0,2.0,19910.0,59004.0,742.0,18152.0,67780.0,26410.0,26410.0,32900.0
5,1026748.0,bmw,2012.0,2012.0,hatchback,auto,125.0,1598.0,4.0,16580.0,71001.0,742.0,24490.0,141411.0,29814.0,29814.0,37200.0
6,1016124.0,bmw,2012.0,2013.0,hatchback,auto,100.0,1598.0,3.0,17340.0,84097.0,742.0,22210.0,118124.0,27342.0,17342.0,38300.0
7,995480.0,bmw,2012.0,2012.0,hatchback,auto,100.0,1598.0,1.0,18850.0,71001.0,742.0,22735.0,62500.0,26517.0,26517.0,38400.0
8,1022930.0,bmw,2013.0,2013.0,hatchback,auto,100.0,1598.0,2.0,14080.0,77989.0,742.0,27415.0,94000.0,27415.0,20381.0,42400.0
9,1030367.0,bmw,2015.0,2015.0,hatchback,auto,85.0,1496.0,3.0,14930.0,56209.0,2352.0,29054.0,100000.0,24156.0,10819.0,70300.0
