In [143]:
import numpy as np
import pandas as pd
import re
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [144]:
df = pd.read_csv('appartments.csv').drop(22)

In [145]:
df.head()

Unnamed: 0,PropertyName,PropertySubName,NearbyLocations,LocationAdvantages,Link,PriceDetails,TopFacilities
0,Smartworld One DXP,"2, 3, 4 BHK Apartment in Sector 113, Gurgaon","['Bajghera Road', 'Palam Vihar Halt', 'DPSG Pa...","{'Bajghera Road': '800 Meter', 'Palam Vihar Ha...",https://www.99acres.com/smartworld-one-dxp-sec...,"{'2 BHK': {'building_type': 'Apartment', 'area...","['Swimming Pool', 'Salon', 'Restaurant', 'Spa'..."
1,M3M Crown,"3, 4 BHK Apartment in Sector 111, Gurgaon","['DPSG Palam Vihar Gurugram', 'The NorthCap Un...","{'DPSG Palam Vihar Gurugram': '1.4 Km', 'The N...",https://www.99acres.com/m3m-crown-sector-111-g...,"{'3 BHK': {'building_type': 'Apartment', 'area...","['Bowling Alley', 'Mini Theatre', 'Manicured G..."
2,Adani Brahma Samsara Vilasa,"Land, 3, 4 BHK Independent Floor in Sector 63,...","['AIPL Business Club Sector 62', 'Heritage Xpe...","{'AIPL Business Club Sector 62': '2.7 Km', 'He...",https://www.99acres.com/adani-brahma-samsara-v...,{'3 BHK': {'building_type': 'Independent Floor...,"['Terrace Garden', 'Gazebo', 'Fountain', 'Amph..."
3,Sobha City,"2, 3, 4 BHK Apartment in Sector 108, Gurgaon","['The Shikshiyan School', 'WTC Plaza', 'Luxus ...","{'The Shikshiyan School': '2.9 KM', 'WTC Plaza...",https://www.99acres.com/sobha-city-sector-108-...,"{'2 BHK': {'building_type': 'Apartment', 'area...","['Swimming Pool', 'Volley Ball Court', 'Aerobi..."
4,Signature Global City 93,"2, 3 BHK Independent Floor in Sector 93 Gurgaon","['Pranavananda Int. School', 'DLF Site central...","{'Pranavananda Int. School': '450 m', 'DLF Sit...",https://www.99acres.com/signature-global-city-...,{'2 BHK': {'building_type': 'Independent Floor...,"['Mini Theatre', 'Doctor on Call', 'Concierge ..."


In [146]:
df.iloc[16].NearbyLocations

"['NH8', 'KMP Expressway', 'Dwarka Expressway', 'Karma Lakelands', 'Jungle Safari & Trails']"

In [147]:
df.iloc[16].LocationAdvantages

"{'NH8': '5 Min', 'KMP Expressway': '10 Min', 'Dwarka Expressway': '15 Min', 'Karma Lakelands': '5 Min', 'Jungle Safari & Trails': '15 Min', 'DPS Manesar': '15 Min', 'Medanta Hospital': '15 Min'}"

In [148]:
df.iloc[16].PriceDetails

"{'2 BHK': {'building_type': 'Apartment', 'area_type': 'Super Built-up Area', 'area': '1,420 sq.ft.', 'price-range': '₹ 1.49 - 1.5 Cr'}, '3 BHK': {'building_type': 'Apartment', 'area_type': 'Super Built-up Area', 'area': '1,532 - 2,155 sq.ft.', 'price-range': '₹ 1.61 - 3.49 Cr'}, '4 BHK': {'building_type': 'Apartment', 'area_type': 'Super Built-up Area', 'area': '2,630 - 2,677 sq.ft.', 'price-range': '₹ 2.78 - 4.34 Cr'}}"

In [149]:
df.iloc[16].TopFacilities

"['Salon', 'Spa', 'Cricket Pitch', 'Lawn Tennis Court', 'Amphitheatre', 'Basketball Court', 'Badminton Court', 'Entrance Lobby', 'Yoga/Meditation Area']"

In [150]:
df[['PropertyName','TopFacilities']]

Unnamed: 0,PropertyName,TopFacilities
0,Smartworld One DXP,"['Swimming Pool', 'Salon', 'Restaurant', 'Spa'..."
1,M3M Crown,"['Bowling Alley', 'Mini Theatre', 'Manicured G..."
2,Adani Brahma Samsara Vilasa,"['Terrace Garden', 'Gazebo', 'Fountain', 'Amph..."
3,Sobha City,"['Swimming Pool', 'Volley Ball Court', 'Aerobi..."
4,Signature Global City 93,"['Mini Theatre', 'Doctor on Call', 'Concierge ..."
...,...,...
242,DLF Princeton Estate,"['Swimming Pool', 'Medical Centre', 'Laundry',..."
243,Pyramid Urban Homes 2,"['Shopping Centre', 'Community Hall', '24x7 Se..."
244,Satya The Hermitage,"['Bus Shelter', 'Swimming Pool', 'Business Lou..."
245,BPTP Spacio,"['Swimming Pool', 'Card Room', 'Piped Gas', 'P..."


In [151]:
df[['PropertyName','TopFacilities']]['TopFacilities'][1]

"['Bowling Alley', 'Mini Theatre', 'Manicured Garden', 'Swimming Pool', 'Flower Garden', 'Reading Lounge', 'Golf Course', 'Barbecue', 'Sauna']"

In [152]:
def extract(s):
    return re.findall(r"'(.*?)'",s)
df['TopFacilities'] = df['TopFacilities'].apply(extract)

In [153]:
df[['PropertyName','TopFacilities']]['TopFacilities'][1]

['Bowling Alley',
 'Mini Theatre',
 'Manicured Garden',
 'Swimming Pool',
 'Flower Garden',
 'Reading Lounge',
 'Golf Course',
 'Barbecue',
 'Sauna']

In [154]:
df['FacilitiesStr'] = df['TopFacilities'].apply(' '.join)

In [155]:
df['FacilitiesStr'][1]

'Bowling Alley Mini Theatre Manicured Garden Swimming Pool Flower Garden Reading Lounge Golf Course Barbecue Sauna'

In [156]:
tfidf_vectorizer = TfidfVectorizer(stop_words='english', ngram_range=(1, 2))

In [157]:
tfidf_matrix = tfidf_vectorizer.fit_transform(df['FacilitiesStr'])

In [158]:
tfidf_matrix.toarray().shape

(246, 953)

In [159]:
cosine_sim1 = cosine_similarity(tfidf_matrix, tfidf_matrix)

In [160]:
cosine_sim1

array([[1.        , 0.01095159, 0.        , ..., 0.01183329, 0.08656385,
        0.0110727 ],
       [0.01095159, 1.        , 0.01982121, ..., 0.11904241, 0.01555534,
        0.00963852],
       [0.        , 0.01982121, 1.        , ..., 0.07020502, 0.03820314,
        0.01962826],
       ...,
       [0.01183329, 0.11904241, 0.07020502, ..., 1.        , 0.09825738,
        0.03255851],
       [0.08656385, 0.01555534, 0.03820314, ..., 0.09825738, 1.        ,
        0.06257614],
       [0.0110727 , 0.00963852, 0.01962826, ..., 0.03255851, 0.06257614,
        1.        ]])

In [161]:
cosine_sim1.shape

(246, 246)

In [162]:
def recommend_properties(property_name, cosine_sim=cosine_sim1):   
    idx = df.index[df['PropertyName'] == property_name].tolist()[0]
    sim_scores = list(enumerate(cosine_sim1[idx]))

    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)  
    sim_scores = sim_scores[1:6]
    property_indices = [i[0] for i in sim_scores]
    
    recommendations_df = pd.DataFrame({
        'PropertyName': df['PropertyName'].iloc[property_indices],
        'SimilarityScore': sim_scores
    })    
    return recommendations_df

In [163]:
recommend_properties("Adani Brahma Samsara Vilasa")

Unnamed: 0,PropertyName,SimilarityScore
133,ROF Insignia Park 2,"(132, 0.3400196693935143)"
54,Birla Navya Avik,"(53, 0.33086963113786777)"
152,Ashiana Amarah,"(151, 0.30867773207979776)"
153,JMS Prime Land,"(152, 0.3026217239287584)"
16,M3M Golf Hills,"(16, 0.2992316563707712)"


In [164]:
df[['PropertyName','PriceDetails']]['PriceDetails'][16]

"{'2 BHK': {'building_type': 'Apartment', 'area_type': 'Super Built-up Area', 'area': '1,420 sq.ft.', 'price-range': '₹ 1.49 - 1.5 Cr'}, '3 BHK': {'building_type': 'Apartment', 'area_type': 'Super Built-up Area', 'area': '1,532 - 2,155 sq.ft.', 'price-range': '₹ 1.61 - 3.49 Cr'}, '4 BHK': {'building_type': 'Apartment', 'area_type': 'Super Built-up Area', 'area': '2,630 - 2,677 sq.ft.', 'price-range': '₹ 2.78 - 4.34 Cr'}}"

In [165]:
import json

df_appartments = pd.read_csv('appartments.csv').drop(22)

def refined_parse_modified_v2(detail_str):
    try:
        details = json.loads(detail_str.replace("'", "\""))
    except:
        return {}

    extracted = {}
    for bhk, detail in details.items():
        extracted[f'building type_{bhk}'] = detail.get('building_type')

        area = detail.get('area', '')
        area_parts = area.split('-')
        if len(area_parts) == 1:
            try:
                value = float(area_parts[0].replace(',', '').replace(' sq.ft.', '').strip())
                extracted[f'area low {bhk}'] = value
                extracted[f'area high {bhk}'] = value
            except:
                extracted[f'area low {bhk}'] = None
                extracted[f'area high {bhk}'] = None
        elif len(area_parts) == 2:
            try:
                extracted[f'area low {bhk}'] = float(area_parts[0].replace(',', '').replace(' sq.ft.', '').strip())
                extracted[f'area high {bhk}'] = float(area_parts[1].replace(',', '').replace(' sq.ft.', '').strip())
            except:
                extracted[f'area low {bhk}'] = None
                extracted[f'area high {bhk}'] = None
        price_range = detail.get('price-range', '')
        price_parts = price_range.split('-')
        if len(price_parts) == 2:
            try:
                extracted[f'price low {bhk}'] = float(price_parts[0].replace('₹', '').replace(' Cr', '').replace(' L', '').strip())
                extracted[f'price high {bhk}'] = float(price_parts[1].replace('₹', '').replace(' Cr', '').replace(' L', '').strip())
                if 'L' in price_parts[0]:
                    extracted[f'price low {bhk}'] /= 100
                if 'L' in price_parts[1]:
                    extracted[f'price high {bhk}'] /= 100
            except:
                extracted[f'price low {bhk}'] = None
                extracted[f'price high {bhk}'] = None

    return extracted
data_refined = []

for _, row in df_appartments.iterrows():
    features = refined_parse_modified_v2(row['PriceDetails'])
    
    new_row = {'PropertyName': row['PropertyName']}
    
    for config in ['1 BHK', '2 BHK', '3 BHK', '4 BHK', '5 BHK', '6 BHK', '1 RK', 'Land']:
        new_row[f'building type_{config}'] = features.get(f'building type_{config}')
        new_row[f'area low {config}'] = features.get(f'area low {config}')
        new_row[f'area high {config}'] = features.get(f'area high {config}')
        new_row[f'price low {config}'] = features.get(f'price low {config}')
        new_row[f'price high {config}'] = features.get(f'price high {config}')
    
    data_refined.append(new_row)

df_final_refined_v2 = pd.DataFrame(data_refined).set_index('PropertyName')

In [166]:
df_final_refined_v2['building type_Land'] = df_final_refined_v2['building type_Land'].replace({'':'Land'})

In [167]:
df_final_refined_v2

Unnamed: 0_level_0,building type_1 BHK,area low 1 BHK,area high 1 BHK,price low 1 BHK,price high 1 BHK,building type_2 BHK,area low 2 BHK,area high 2 BHK,price low 2 BHK,price high 2 BHK,...,building type_1 RK,area low 1 RK,area high 1 RK,price low 1 RK,price high 1 RK,building type_Land,area low Land,area high Land,price low Land,price high Land
PropertyName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Smartworld One DXP,,,,,,Apartment,1370.0,1370.0,2.0000,2.40,...,,,,,,,,,,
M3M Crown,,,,,,,,,,,...,,,,,,,,,,
Adani Brahma Samsara Vilasa,,,,,,,,,,,...,,,,,,Land,500.0,4329.0,2.05,41.13
Sobha City,,,,,,Apartment,1381.0,1692.0,1.5500,3.21,...,,,,,,,,,,
Signature Global City 93,,,,,,Independent Floor,981.0,1118.0,0.9301,1.06,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
DLF Princeton Estate,,,,,,Apartment,964.0,964.0,,,...,,,,,,,,,,
Pyramid Urban Homes 2,Apartment,335.0,398.0,23.45,0.2786,Apartment,500.0,625.0,,,...,,,,,,,,,,
Satya The Hermitage,,,,,,Apartment,1450.0,1450.0,,,...,,,,,,,,,,
BPTP Spacio,,,,,,Apartment,1000.0,1079.0,,,...,,,,,,,,,,


In [168]:
df['PriceDetails'][16]

"{'2 BHK': {'building_type': 'Apartment', 'area_type': 'Super Built-up Area', 'area': '1,420 sq.ft.', 'price-range': '₹ 1.49 - 1.5 Cr'}, '3 BHK': {'building_type': 'Apartment', 'area_type': 'Super Built-up Area', 'area': '1,532 - 2,155 sq.ft.', 'price-range': '₹ 1.61 - 3.49 Cr'}, '4 BHK': {'building_type': 'Apartment', 'area_type': 'Super Built-up Area', 'area': '2,630 - 2,677 sq.ft.', 'price-range': '₹ 2.78 - 4.34 Cr'}}"

In [169]:
df_final_refined_v2.iloc[16]

building type_1 BHK         None
area low 1 BHK               NaN
area high 1 BHK              NaN
price low 1 BHK              NaN
price high 1 BHK             NaN
building type_2 BHK    Apartment
area low 2 BHK            1420.0
area high 2 BHK           1420.0
price low 2 BHK             1.49
price high 2 BHK             1.5
building type_3 BHK    Apartment
area low 3 BHK            1532.0
area high 3 BHK           2155.0
price low 3 BHK             1.61
price high 3 BHK            3.49
building type_4 BHK    Apartment
area low 4 BHK            2630.0
area high 4 BHK           2677.0
price low 4 BHK             2.78
price high 4 BHK            4.34
building type_5 BHK         None
area low 5 BHK               NaN
area high 5 BHK              NaN
price low 5 BHK              NaN
price high 5 BHK             NaN
building type_6 BHK         None
area low 6 BHK               NaN
area high 6 BHK              NaN
price low 6 BHK              NaN
price high 6 BHK             NaN
building t

In [170]:
categorical_columns = df_final_refined_v2.select_dtypes(include=['object']).columns.tolist()

In [171]:
categorical_columns

['building type_1 BHK',
 'building type_2 BHK',
 'building type_3 BHK',
 'building type_4 BHK',
 'building type_5 BHK',
 'building type_6 BHK',
 'building type_1 RK',
 'building type_Land']

In [172]:
ohe_df = pd.get_dummies(df_final_refined_v2, columns=categorical_columns, drop_first=True)

In [173]:
ohe_df.fillna(0,inplace=True)

In [174]:
ohe_df

Unnamed: 0_level_0,area low 1 BHK,area high 1 BHK,price low 1 BHK,price high 1 BHK,area low 2 BHK,area high 2 BHK,price low 2 BHK,price high 2 BHK,area low 3 BHK,area high 3 BHK,...,building type_2 BHK_Independent Floor,building type_2 BHK_Service Apartment,building type_3 BHK_Independent Floor,building type_3 BHK_Service Apartment,building type_3 BHK_Villa,building type_4 BHK_Independent Floor,building type_4 BHK_Villa,building type_5 BHK_Independent Floor,building type_5 BHK_Villa,building type_6 BHK_Villa
PropertyName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Smartworld One DXP,0.0,0.0,0.00,0.0000,1370.0,1370.0,2.0000,2.40,1850.0,2050.0,...,False,False,False,False,False,False,False,False,False,False
M3M Crown,0.0,0.0,0.00,0.0000,0.0,0.0,0.0000,0.00,1605.0,2170.0,...,False,False,False,False,False,False,False,False,False,False
Adani Brahma Samsara Vilasa,0.0,0.0,0.00,0.0000,0.0,0.0,0.0000,0.00,1800.0,3150.0,...,False,False,True,False,False,True,False,False,False,False
Sobha City,0.0,0.0,0.00,0.0000,1381.0,1692.0,1.5500,3.21,1711.0,2343.0,...,False,False,False,False,False,False,False,False,False,False
Signature Global City 93,0.0,0.0,0.00,0.0000,981.0,1118.0,0.9301,1.06,1235.0,1530.0,...,True,False,True,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
DLF Princeton Estate,0.0,0.0,0.00,0.0000,964.0,964.0,0.0000,0.00,1127.0,1127.0,...,False,False,False,False,False,False,False,False,False,False
Pyramid Urban Homes 2,335.0,398.0,23.45,0.2786,500.0,625.0,0.0000,0.00,0.0,0.0,...,False,False,False,False,False,False,False,False,False,False
Satya The Hermitage,0.0,0.0,0.00,0.0000,1450.0,1450.0,0.0000,0.00,1991.0,1991.0,...,False,False,False,False,False,False,False,False,False,False
BPTP Spacio,0.0,0.0,0.00,0.0000,1000.0,1079.0,0.0000,0.00,1225.0,1865.0,...,False,False,False,False,False,False,False,False,False,False


In [175]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

ohe_df_normalized = pd.DataFrame(scaler.fit_transform(ohe_df), columns=ohe_df.columns, index=ohe_df.index)

In [176]:
ohe_df_normalized

Unnamed: 0_level_0,area low 1 BHK,area high 1 BHK,price low 1 BHK,price high 1 BHK,area low 2 BHK,area high 2 BHK,price low 2 BHK,price high 2 BHK,area low 3 BHK,area high 3 BHK,...,building type_2 BHK_Independent Floor,building type_2 BHK_Service Apartment,building type_3 BHK_Independent Floor,building type_3 BHK_Service Apartment,building type_3 BHK_Villa,building type_4 BHK_Independent Floor,building type_4 BHK_Villa,building type_5 BHK_Independent Floor,building type_5 BHK_Villa,building type_6 BHK_Villa
PropertyName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Smartworld One DXP,-0.252266,-0.169584,-0.105197,-0.082332,1.223499,1.020101,-0.173712,1.158423,0.553787,0.370864,...,-0.289310,-0.063888,-0.372678,-0.063888,-0.171139,-0.254824,-0.236208,-0.111111,-0.216353,-0.063888
M3M Crown,-0.252266,-0.169584,-0.105197,-0.082332,-0.893541,-0.896660,-0.283546,-0.387986,0.293086,0.472749,...,-0.289310,-0.063888,-0.372678,-0.063888,-0.171139,-0.254824,-0.236208,-0.111111,-0.216353,-0.063888
Adani Brahma Samsara Vilasa,-0.252266,-0.169584,-0.105197,-0.082332,-0.893541,-0.896660,-0.283546,-0.387986,0.500583,1.304803,...,-0.289310,-0.063888,2.683282,-0.063888,-0.171139,3.924283,-0.236208,-0.111111,-0.216353,-0.063888
Sobha City,-0.252266,-0.169584,-0.105197,-0.082332,1.240497,1.470610,-0.198425,1.680336,0.405879,0.619632,...,-0.289310,-0.063888,-0.372678,-0.063888,-0.171139,-0.254824,-0.236208,-0.111111,-0.216353,-0.063888
Signature Global City 93,-0.252266,-0.169584,-0.105197,-0.082332,0.622383,0.667529,-0.232468,0.295011,-0.100626,-0.070634,...,3.456497,-0.063888,2.683282,-0.063888,-0.171139,-0.254824,-0.236208,-0.111111,-0.216353,-0.063888
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
DLF Princeton Estate,-0.252266,-0.169584,-0.105197,-0.082332,0.596113,0.452068,-0.283546,-0.387986,-0.215547,-0.412795,...,-0.289310,-0.063888,-0.372678,-0.063888,-0.171139,-0.254824,-0.236208,-0.111111,-0.216353,-0.063888
Pyramid Urban Homes 2,1.565039,0.704171,9.593528,0.224987,-0.120899,-0.022226,-0.283546,-0.387986,-1.414772,-1.369658,...,-0.289310,-0.063888,-0.372678,-0.063888,-0.171139,-0.254824,-0.236208,-0.111111,-0.216353,-0.063888
Satya The Hermitage,-0.252266,-0.169584,-0.105197,-0.082332,1.347122,1.132029,-0.283546,-0.387986,0.703823,0.320771,...,-0.289310,-0.063888,-0.372678,-0.063888,-0.171139,-0.254824,-0.236208,-0.111111,-0.216353,-0.063888
BPTP Spacio,-0.252266,-0.169584,-0.105197,-0.082332,0.651744,0.612964,-0.283546,-0.387986,-0.111267,0.213793,...,-0.289310,-0.063888,-0.372678,-0.063888,-0.171139,-0.254824,-0.236208,-0.111111,-0.216353,-0.063888


In [177]:
from sklearn.metrics.pairwise import cosine_similarity
cosine_sim2 = cosine_similarity(ohe_df_normalized)

In [178]:
cosine_sim2.shape

(246, 246)

In [179]:
def recommend_properties_with_scores(property_name, top_n=247):
    
    
    sim_scores = list(enumerate(cosine_sim2[ohe_df_normalized.index.get_loc(property_name)]))
    sorted_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    top_indices = [i[0] for i in sorted_scores[1:top_n+1]]
    top_scores = [i[1] for i in sorted_scores[1:top_n+1]]

    top_properties = ohe_df_normalized.index[top_indices].tolist()

    recommendations_df = pd.DataFrame({
        'PropertyName': top_properties,
        'SimilarityScore': top_scores
    })
    
    return recommendations_df

recommend_properties_with_scores('M3M Golf Hills')

Unnamed: 0,PropertyName,SimilarityScore
0,AIPL The Peaceful Homes,0.955462
1,Smartworld One DXP,0.954670
2,Unitech Escape,0.953092
3,M3M Capital,0.951156
4,BPTP Terra,0.943128
...,...,...
240,Golden Park,-0.522391
241,Satya Merano Greens,-0.523660
242,ROF Normanton Park,-0.525129
243,BPTP Green Oaks,-0.525286


In [180]:
df[['PropertyName','LocationAdvantages']]['LocationAdvantages'][1]

"{'DPSG Palam Vihar Gurugram': '1.4 Km', 'The NorthCap University': '4.4 Km', 'Park Hospital, Palam Vihar': '1.4 Km', 'Pacific D21 Mall': '8.2 Km', 'Palam Vihar Halt Railway Station': '1.2 Km', 'Dwarka Sector 21 Metro Station': '8.1 Km', 'Dwarka Expressway': '450 m', 'Fun N Food Water Park': '8.1 Km', 'Indira Gandhi International Airport': '14.1 Km', 'Tau DeviLal Sports Complex': '11.2 Km', 'Hamoni Golf Camp': '5 Km', 'Hyatt Place': '6.1 Km', 'Altrade Business Centre': '11.2 Km'}"

In [181]:
def distance_to_meters(distance_str):
    try:
        if 'Km' in distance_str or 'KM' in distance_str:
            return float(distance_str.split()[0]) * 1000
        elif 'Meter' in distance_str or 'meter' in distance_str:
            return float(distance_str.split()[0])
        else:
            return None
    except:
        return None

In [182]:
import ast
location_matrix = {}
for index, row in df.iterrows():
    distances = {}
    for location, distance in ast.literal_eval(row['LocationAdvantages']).items():
        distances[location] = distance_to_meters(distance)
    location_matrix[index] = distances

location_df = pd.DataFrame.from_dict(location_matrix, orient='index')

location_df.head()

Unnamed: 0,Bajghera Road,Palam Vihar Halt,DPSG Palam Vihar,Park Hospital,Gurgaon Railway Station,The NorthCap University,Dwarka Expy,Hyatt Place Gurgaon Udyog Vihar,"Dwarka Sector 21, Metro Station",Pacific D21 Mall,...,MCC Cricket Ground Dhankot,The Shri Ram School Aravali,Taj City Centre Gurugram,Minda Industries Corporate Office,"Rampura Flyover, Naurangpur Rd",Manesar toll plaza - Kherki Daula,"Imt Manesar, Gurugram",Holiday Inn,Sector 84 Road,Skyview Corporate Park
0,800.0,2500.0,3100.0,3100.0,4900.0,5400.0,1200.0,7700.0,7200.0,7400.0,...,,,,,,,,,,
25,550.0,,,,,6700.0,3800.0,,,7500.0,...,,,,,,,,,,
37,5300.0,,,,2500.0,8800.0,,,,,...,,,,,,,,,,
69,1500.0,,,,6500.0,6700.0,5100.0,,,8200.0,...,,,,,,,,,,
9,,,,5500.0,,,,,,,...,,,,,,,,,,


In [183]:
location_df.fillna(100000,inplace=True)

In [184]:
location_df.index = df.PropertyName

In [185]:
location_df

Unnamed: 0_level_0,Bajghera Road,Palam Vihar Halt,DPSG Palam Vihar,Park Hospital,Gurgaon Railway Station,The NorthCap University,Dwarka Expy,Hyatt Place Gurgaon Udyog Vihar,"Dwarka Sector 21, Metro Station",Pacific D21 Mall,...,MCC Cricket Ground Dhankot,The Shri Ram School Aravali,Taj City Centre Gurugram,Minda Industries Corporate Office,"Rampura Flyover, Naurangpur Rd",Manesar toll plaza - Kherki Daula,"Imt Manesar, Gurugram",Holiday Inn,Sector 84 Road,Skyview Corporate Park
PropertyName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Smartworld One DXP,800.0,2500.0,3100.0,3100.0,4900.0,5400.0,1200.0,7700.0,7200.0,7400.0,...,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0
M3M Crown,550.0,100000.0,100000.0,100000.0,100000.0,6700.0,3800.0,100000.0,100000.0,7500.0,...,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0
Adani Brahma Samsara Vilasa,5300.0,100000.0,100000.0,100000.0,2500.0,8800.0,100000.0,100000.0,100000.0,100000.0,...,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0
Sobha City,1500.0,100000.0,100000.0,100000.0,6500.0,6700.0,5100.0,100000.0,100000.0,8200.0,...,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0
Signature Global City 93,100000.0,100000.0,100000.0,5500.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,...,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
DLF Princeton Estate,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,...,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0
Pyramid Urban Homes 2,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,...,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0
Satya The Hermitage,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,...,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0
BPTP Spacio,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,...,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0


In [186]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

location_df_normalized = pd.DataFrame(scaler.fit_transform(location_df), columns=location_df.columns, index=location_df.index)

In [187]:
location_df_normalized

Unnamed: 0_level_0,Bajghera Road,Palam Vihar Halt,DPSG Palam Vihar,Park Hospital,Gurgaon Railway Station,The NorthCap University,Dwarka Expy,Hyatt Place Gurgaon Udyog Vihar,"Dwarka Sector 21, Metro Station",Pacific D21 Mall,...,MCC Cricket Ground Dhankot,The Shri Ram School Aravali,Taj City Centre Gurugram,Minda Industries Corporate Office,"Rampura Flyover, Naurangpur Rd",Manesar toll plaza - Kherki Daula,"Imt Manesar, Gurugram",Holiday Inn,Sector 84 Road,Skyview Corporate Park
PropertyName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Smartworld One DXP,-7.876525,-15.652476,-15.652476,-3.098598,-2.830230,-3.061862,-3.647080,-10.628470,-15.652476,-5.933039,...,0.0,0.063888,0.063888,0.063888,0.063888,0.063888,0.063888,0.0,0.063888,0.063888
M3M Crown,-7.896699,0.063888,0.063888,0.328659,0.371763,-3.015166,-3.543711,0.090474,0.063888,-5.926447,...,0.0,0.063888,0.063888,0.063888,0.063888,0.063888,0.063888,0.0,0.063888,0.063888
Adani Brahma Samsara Vilasa,-7.513392,0.063888,0.063888,0.328659,-2.911037,-2.939734,0.280952,0.090474,0.063888,0.171123,...,0.0,0.063888,0.063888,0.063888,0.063888,0.063888,0.063888,0.0,0.063888,0.063888
Sobha City,-7.820038,0.063888,0.063888,0.328659,-2.776358,-3.015166,-3.492026,0.090474,0.063888,-5.880303,...,0.0,0.063888,0.063888,0.063888,0.063888,0.063888,0.063888,0.0,0.063888,0.063888
Signature Global City 93,0.128540,0.063888,0.063888,-3.013713,0.371763,0.336174,0.280952,0.090474,0.063888,0.171123,...,0.0,0.063888,0.063888,0.063888,0.063888,0.063888,0.063888,0.0,0.063888,0.063888
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
DLF Princeton Estate,0.128540,0.063888,0.063888,0.328659,0.371763,0.336174,0.280952,0.090474,0.063888,0.171123,...,0.0,0.063888,0.063888,0.063888,0.063888,0.063888,0.063888,0.0,0.063888,0.063888
Pyramid Urban Homes 2,0.128540,0.063888,0.063888,0.328659,0.371763,0.336174,0.280952,0.090474,0.063888,0.171123,...,0.0,0.063888,0.063888,0.063888,0.063888,0.063888,0.063888,0.0,0.063888,0.063888
Satya The Hermitage,0.128540,0.063888,0.063888,0.328659,0.371763,0.336174,0.280952,0.090474,0.063888,0.171123,...,0.0,0.063888,0.063888,0.063888,0.063888,0.063888,0.063888,0.0,0.063888,0.063888
BPTP Spacio,0.128540,0.063888,0.063888,0.328659,0.371763,0.336174,0.280952,0.090474,0.063888,0.171123,...,0.0,0.063888,0.063888,0.063888,0.063888,0.063888,0.063888,0.0,0.063888,0.063888


In [188]:
cosine_sim3 = cosine_similarity(location_df_normalized)

In [189]:
cosine_sim3.shape

(246, 246)

In [201]:
def recommend_properties_with_scores(property_name, top_n=10):
    
    cosine_sim_matrix = 30*cosine_sim1 + 20*cosine_sim2 + 8*cosine_sim3
  
    sim_scores = list(enumerate(cosine_sim_matrix[location_df_normalized.index.get_loc(property_name)]))
   
    sorted_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
   
    top_indices = [i[0] for i in sorted_scores[1:top_n+1]]
    top_scores = [i[1] for i in sorted_scores[1:top_n+1]]
    
    top_properties = location_df_normalized.index[top_indices].tolist()
    
    recommendations_df = pd.DataFrame({
        'PropertyName': top_properties,
        'SimilarityScore': top_scores
    })
    
    return recommendations_df

recommend_properties_with_scores('Ireo Victory Valley')


Unnamed: 0,PropertyName,SimilarityScore
0,Pioneer Urban Presidia,28.0347
1,Ambience Creacions,27.793752
2,DLF The Crest,24.204307
3,Pioneer Araya,23.426506
4,SS The Leaf,21.028151
5,Silverglades The Melia,21.012934
6,AIPL The Peaceful Homes,20.921644
7,Bestech Park View Grand Spa,20.193408
8,Experion Windchants,19.794665
9,Indiabulls Centrum Park,19.70399


In [191]:
cosine_sim_matrix = 30*cosine_sim1 + 20*cosine_sim2 + 8*cosine_sim3
cosine_sim_matrix

array([[ 5.80000000e+01,  3.85883731e+00,  5.28455165e-02, ...,
         5.05976208e+00,  4.01524661e+00,  7.71645390e+00],
       [ 3.85883731e+00,  5.80000000e+01,  3.28279387e+00, ...,
        -2.12337475e+00, -2.73082727e+00, -2.61415998e+00],
       [ 5.28455165e-02,  3.28279387e+00,  5.80000000e+01, ...,
        -2.62123525e+00, -7.29918459e+00, -6.06115892e+00],
       ...,
       [ 5.05976208e+00, -2.12337475e+00, -2.62123525e+00, ...,
         5.80000000e+01,  5.47599587e+00,  5.06368734e+00],
       [ 4.01524661e+00, -2.73082727e+00, -7.29918459e+00, ...,
         5.47599587e+00,  5.80000000e+01,  2.76174878e+01],
       [ 7.71645390e+00, -2.61415998e+00, -6.06115892e+00, ...,
         5.06368734e+00,  2.76174878e+01,  5.80000000e+01]])

In [192]:
location_df_normalized

Unnamed: 0_level_0,Bajghera Road,Palam Vihar Halt,DPSG Palam Vihar,Park Hospital,Gurgaon Railway Station,The NorthCap University,Dwarka Expy,Hyatt Place Gurgaon Udyog Vihar,"Dwarka Sector 21, Metro Station",Pacific D21 Mall,...,MCC Cricket Ground Dhankot,The Shri Ram School Aravali,Taj City Centre Gurugram,Minda Industries Corporate Office,"Rampura Flyover, Naurangpur Rd",Manesar toll plaza - Kherki Daula,"Imt Manesar, Gurugram",Holiday Inn,Sector 84 Road,Skyview Corporate Park
PropertyName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Smartworld One DXP,-7.876525,-15.652476,-15.652476,-3.098598,-2.830230,-3.061862,-3.647080,-10.628470,-15.652476,-5.933039,...,0.0,0.063888,0.063888,0.063888,0.063888,0.063888,0.063888,0.0,0.063888,0.063888
M3M Crown,-7.896699,0.063888,0.063888,0.328659,0.371763,-3.015166,-3.543711,0.090474,0.063888,-5.926447,...,0.0,0.063888,0.063888,0.063888,0.063888,0.063888,0.063888,0.0,0.063888,0.063888
Adani Brahma Samsara Vilasa,-7.513392,0.063888,0.063888,0.328659,-2.911037,-2.939734,0.280952,0.090474,0.063888,0.171123,...,0.0,0.063888,0.063888,0.063888,0.063888,0.063888,0.063888,0.0,0.063888,0.063888
Sobha City,-7.820038,0.063888,0.063888,0.328659,-2.776358,-3.015166,-3.492026,0.090474,0.063888,-5.880303,...,0.0,0.063888,0.063888,0.063888,0.063888,0.063888,0.063888,0.0,0.063888,0.063888
Signature Global City 93,0.128540,0.063888,0.063888,-3.013713,0.371763,0.336174,0.280952,0.090474,0.063888,0.171123,...,0.0,0.063888,0.063888,0.063888,0.063888,0.063888,0.063888,0.0,0.063888,0.063888
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
DLF Princeton Estate,0.128540,0.063888,0.063888,0.328659,0.371763,0.336174,0.280952,0.090474,0.063888,0.171123,...,0.0,0.063888,0.063888,0.063888,0.063888,0.063888,0.063888,0.0,0.063888,0.063888
Pyramid Urban Homes 2,0.128540,0.063888,0.063888,0.328659,0.371763,0.336174,0.280952,0.090474,0.063888,0.171123,...,0.0,0.063888,0.063888,0.063888,0.063888,0.063888,0.063888,0.0,0.063888,0.063888
Satya The Hermitage,0.128540,0.063888,0.063888,0.328659,0.371763,0.336174,0.280952,0.090474,0.063888,0.171123,...,0.0,0.063888,0.063888,0.063888,0.063888,0.063888,0.063888,0.0,0.063888,0.063888
BPTP Spacio,0.128540,0.063888,0.063888,0.328659,0.371763,0.336174,0.280952,0.090474,0.063888,0.171123,...,0.0,0.063888,0.063888,0.063888,0.063888,0.063888,0.063888,0.0,0.063888,0.063888


In [193]:
# index_value = location_df_normalized.index.get_loc('Ireo Victory Valley')  
# cosine_sim_matrix[index_value]
print(location_df_normalized.index)


Index(['Smartworld One DXP', 'M3M Crown', 'Adani Brahma Samsara Vilasa',
       'Sobha City', 'Signature Global City 93', 'Whiteland The Aspen',
       'Bestech Altura', 'Elan The Presidential', 'Signature Global City 92',
       'Emaar Digihomes',
       ...
       'BPTP Freedom Park Life', 'DLF New Town Heights', 'La Lagune',
       'M3M My Den', 'Suncity Avenue 102', 'DLF Princeton Estate',
       'Pyramid Urban Homes 2', 'Satya The Hermitage', 'BPTP Spacio',
       'SS The Coralwood'],
      dtype='object', name='PropertyName', length=246)


In [194]:
print(location_df_normalized.columns)


Index(['Bajghera Road', 'Palam Vihar Halt', 'DPSG Palam Vihar',
       'Park Hospital', 'Gurgaon Railway Station', 'The NorthCap University',
       'Dwarka Expy', 'Hyatt Place Gurgaon Udyog Vihar',
       'Dwarka Sector 21, Metro Station', 'Pacific D21 Mall',
       ...
       'MCC Cricket Ground Dhankot', 'The Shri Ram School Aravali',
       'Taj City Centre Gurugram', 'Minda Industries  Corporate Office',
       'Rampura Flyover, Naurangpur Rd', 'Manesar toll plaza - Kherki Daula',
       'Imt Manesar, Gurugram', 'Holiday Inn', 'Sector 84 Road',
       'Skyview Corporate Park'],
      dtype='object', length=1070)


In [195]:
print(location_df_normalized.dtypes)  


Bajghera Road                        float64
Palam Vihar Halt                     float64
DPSG Palam Vihar                     float64
Park Hospital                        float64
Gurgaon Railway Station              float64
                                      ...   
Manesar toll plaza - Kherki Daula    float64
Imt Manesar, Gurugram                float64
Holiday Inn                          float64
Sector 84 Road                       float64
Skyview Corporate Park               float64
Length: 1070, dtype: object


In [196]:
print(location_df_normalized.index)


Index(['Smartworld One DXP', 'M3M Crown', 'Adani Brahma Samsara Vilasa',
       'Sobha City', 'Signature Global City 93', 'Whiteland The Aspen',
       'Bestech Altura', 'Elan The Presidential', 'Signature Global City 92',
       'Emaar Digihomes',
       ...
       'BPTP Freedom Park Life', 'DLF New Town Heights', 'La Lagune',
       'M3M My Den', 'Suncity Avenue 102', 'DLF Princeton Estate',
       'Pyramid Urban Homes 2', 'Satya The Hermitage', 'BPTP Spacio',
       'SS The Coralwood'],
      dtype='object', name='PropertyName', length=246)


In [197]:
print(location_df.head())


                             Bajghera Road  Palam Vihar Halt  \
PropertyName                                                   
Smartworld One DXP                   800.0            2500.0   
M3M Crown                            550.0          100000.0   
Adani Brahma Samsara Vilasa         5300.0          100000.0   
Sobha City                          1500.0          100000.0   
Signature Global City 93          100000.0          100000.0   

                             DPSG Palam Vihar  Park Hospital  \
PropertyName                                                   
Smartworld One DXP                     3100.0         3100.0   
M3M Crown                            100000.0       100000.0   
Adani Brahma Samsara Vilasa          100000.0       100000.0   
Sobha City                           100000.0       100000.0   
Signature Global City 93             100000.0         5500.0   

                             Gurgaon Railway Station  The NorthCap University  \
PropertyName         

In [198]:
print('Gurgaon Railway Station' in location_df_normalized.columns)


True


In [199]:
print('Gurgaon Railway Station' in location_df.columns)
print('Gurgaon Railway Station' in location_df.index)


True
False


In [200]:
col_index = location_df_normalized.columns.get_loc('Gurgaon Railway Station')
cosine_sim_matrix[col_index]


array([ 1.02162818, -0.38390297, -0.55199493,  0.6213664 , 58.        ,
       -4.67730051, -1.79137665, -4.56517758, 23.1552328 ,  7.05480549,
       22.84264657, -4.65757403, 11.02805521, 16.1765285 , -1.18562803,
       -4.02235241,  1.0480676 , 19.28879355,  2.20838953,  4.44696411,
        2.24635509,  8.0336221 ,  4.4270441 , 14.55677834,  7.03685809,
       16.50229542, 22.78569956,  6.48723653, 18.4595562 ,  4.71090453,
       11.04648012,  9.97995091,  7.89728745,  0.43850403,  5.25692521,
       -3.95702127, -3.81590695,  4.12332314, -3.67980499, -0.3103675 ,
        2.07325492,  9.97950423,  2.45395648, -1.35757563,  3.21008785,
       19.93880821, -3.32228939, -1.33750594, -4.40305814,  1.2336768 ,
       -0.8340529 , 10.55900657,  1.06955621, -3.54637752, -2.53789001,
        5.43558608,  0.83215643, -0.28593898,  1.95348515,  2.15236392,
        0.66517644, 15.25216671,  0.21215873, 12.84484123,  7.00822028,
        1.47475228, 19.41818344,  3.13309361, -0.27863299, -2.56