## Setting up the Notebook

In [8]:
import pandas as pd
from sklearn.metrics import pairwise 

In [10]:
%load_ext autoreload
%autoreload 2
from cos_similarity import similarity

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Load the Data
Load preprocessed data

In [11]:
df_sample = pd.read_csv('data/clean_data.csv')
df_sample.head()

Unnamed: 0,property_type,tenure,built_year,num_beds,num_baths,size_sqft,lat,lng,price
0,0,0,1988.0,3.0,2.0,1115,1.414399,103.837196,514500.0
1,0,0,1992.0,4.0,2.0,1575,1.372597,103.875625,995400.0
2,2,2,2022.0,4.0,6.0,3070,1.298773,103.895798,8485000.0
3,2,2,2023.0,3.0,2.0,958,1.312364,103.803271,2626000.0
4,2,0,2026.0,2.0,1.0,732,1.273959,103.843635,1764000.0


## Computing the Top Recommendations

In [12]:
def get_top_recommendations(df_data, row_id, **kwargs) -> pd.DataFrame:
    
    #####################################################
    ## Initialize the required parameters
    
    # The number of recommendations seem recommended
    # Additional input parameters are up to you
    k = None
    
    # Extract all **kwargs input parameters
    # and set the used paramaters (here: k)
    for key, value in kwargs.items():
        if key == 'k':
            k = value
            
       
    #####################################################
    ## Compute your recommendations
    #
    # This is where your magic happens. Of course, you can call methods
    # defined in this notebook or in external Python (.py) scripts 
    df_result = similarity(row_id, df_data, k)   
        
    # Return the dataset with the k recommendations
    return df_result


## Testing the Recommendation Engine
### Pick a Sample Listing as Input

In [13]:
# # Pick a row id of choice
row_id = 10
# #row_id = 20
# #row_id = 30
# #row_id = 40
# #row_id = 50

# # Get the row from the dataframe (an valid row ids will throw an error)
row = df_sample.iloc[row_id]

# # Just for printing it nicely, we create a new dataframe from this single row
pd.DataFrame([row])

Unnamed: 0,property_type,tenure,built_year,num_beds,num_baths,size_sqft,lat,lng,price
10,2.0,2.0,2023.0,2.0,1.0,646.0,1.329703,103.905683,1365000.0


In [14]:
k = 3

df_recommendations = get_top_recommendations(df_sample, row_id, k=k)

df_recommendations.head(k)

Unnamed: 0,property_type,tenure,built_year,num_beds,num_baths,size_sqft,lat,lng,price
13440,2,2,2023.0,2.0,1.0,646,1.329703,103.905683,1361400.0
5742,2,2,2023.0,2.0,1.0,646,1.329703,103.905683,1360800.0
3772,2,2,2023.0,2.0,1.0,646,1.329703,103.905683,1374900.0


In [15]:
# Evaluate the performance by calculating intra-list similrity
intra_list = pairwise.pairwise_distances(df_recommendations, metric='cosine')
intra_list.sum()/(3*3 - 3)

8.206261596181245e-11