In [12]:
import warnings
import re
import ast
import numpy as np
import pandas as pd
from wordcloud import WordCloud
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns

In [13]:
sns.set_style('darkgrid')
matplotlib.rcParams['font.size'] = 12
matplotlib.rcParams['figure.figsize'] = (9, 6)
matplotlib.rcParams['figure.facecolor'] = '#00000000'

In [14]:
df = pd.read_csv('data/preprocessed_residential_apartment.csv')
dff = df.copy()

In [15]:
df.head(1)

Unnamed: 0,DESCRIPTION,CITY,LOCALITY,BEDROOM_NUM,BATHROOM_NUM,BALCONY_NUM,FLOOR_CATEGORY,BUILDING_TYPE,AREA,PRICE,TOP_USPS,CORNER_PROPERTY,FURNISH_LABEL,PROP_DETAILS_URL
0,Best in class property available at south bopa...,Ahmedabad West,Ghuma,3.0,3.0,1.0,High Floors,Mid-rise buildings,1700.0,7971000.0,INFO NOT AVAILABLE,N,Unfurnished,3-bhk-bedroom-apartment-flat-for-sale-in-ghuma...


In [16]:
df[['DESCRIPTION', 'CITY', 'LOCALITY', 'BUILDING_TYPE', 'PROP_DETAILS_URL']]

Unnamed: 0,DESCRIPTION,CITY,LOCALITY,BUILDING_TYPE,PROP_DETAILS_URL
0,Best in class property available at south bopa...,Ahmedabad West,Ghuma,Mid-rise buildings,3-bhk-bedroom-apartment-flat-for-sale-in-ghuma...
1,This is your chance to own a 3 bhk residential...,Ahmedabad West,Thaltej,Mid-rise buildings,3-bhk-bedroom-apartment-flat-for-sale-in-gala-...
2,We are the proud owners of this 3 bhk apartmen...,Ahmedabad West,Vejalpur,Low-rise buildings,3-bhk-bedroom-apartment-flat-for-sale-in-baker...
3,This is your chance to a 3 bhk apartment / fla...,Ahmedabad West,Panjara pol,Low-rise buildings,3-bhk-bedroom-apartment-flat-for-sale-in-slok-...
4,Residential apartment for sell.The property co...,Ahmedabad East,Vastral,Mid-rise buildings,3-bhk-bedroom-apartment-flat-for-sale-in-vastr...
...,...,...,...,...,...
7825,1 bhk flat in aagam 99 residential property,Ahmedabad West,Sachana,Mid-rise buildings,1-bhk-bedroom-apartment-flat-for-sale-in-aagam...
7826,Changodar & kerala industrial estate are well ...,Ahmedabad South,Bavla,Low-rise buildings,1-bhk-bedroom-apartment-flat-for-sale-in-bavla...
7827,We are the proud owners of this 2 bhk apartmen...,Gandhinagar,Sughad,Low-rise buildings,2-bhk-bedroom-apartment-flat-for-sale-in-parsh...
7828,Shivsai residency near malaya high jantanagar ...,Ahmedabad North,Chandkheda,Low-rise buildings,3-bhk-bedroom-apartment-flat-for-sale-in-chand...


In [17]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

In [18]:
# Vectorize the descriptions
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(df['DESCRIPTION'])

# Compute cosine similarity
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

# Function to recommend similar apartments
def recommend_similar_apartments(prop_details_url, cosine_sim=cosine_sim, df=df):
    # Get the index of the apartment that matches the PROP_DETAILS_URL
    idx = df.index[df['PROP_DETAILS_URL'] == prop_details_url].tolist()[0]
    
    # Get the pairwise similarity scores of all apartments with that apartment
    sim_scores = list(enumerate(cosine_sim[idx]))
    
    # Sort the apartments based on the similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    
    # Get the top 5 most similar apartments
    sim_scores = sim_scores[1:6]
    
    # Get the apartment indices
    apartment_indices = [i[0] for i in sim_scores]
    
    # Return the top 5 most similar apartments
    return df.iloc[apartment_indices]

In [19]:
# Example usage
user_input_url = df['PROP_DETAILS_URL'].iloc[0]
similar_apartments = recommend_similar_apartments(user_input_url)

In [20]:
similar_apartments

Unnamed: 0,DESCRIPTION,CITY,LOCALITY,BEDROOM_NUM,BATHROOM_NUM,BALCONY_NUM,FLOOR_CATEGORY,BUILDING_TYPE,AREA,PRICE,TOP_USPS,CORNER_PROPERTY,FURNISH_LABEL,PROP_DETAILS_URL
7643,Best in class property available at south bopa...,Ahmedabad West,South bopal,3.0,3.0,0.0,Low Floors,Low-rise buildings,1513.6,7000000.0,INFO NOT AVAILABLE,N,Furnished,3-bhk-bedroom-apartment-flat-for-sale-in-gold-...
7755,Best in class property available at south bopa...,Ahmedabad West,South bopal,3.0,3.0,0.0,High Floors,Low-rise buildings,1595.0,6500000.0,INFO NOT AVAILABLE,N,Semifurnished,3-bhk-bedroom-apartment-flat-for-sale-in-kavis...
7644,Best in class property available at shela loca...,Ahmedabad West,Shela,3.0,3.0,1.0,High Floors,Low-rise buildings,1661.0,6460000.0,INFO NOT AVAILABLE,N,Unfurnished,3-bhk-bedroom-apartment-flat-for-sale-in-shela...
7661,Best in class property available at shela loca...,Ahmedabad West,Shela,3.0,3.0,1.0,High Floors,Low-rise buildings,1661.0,6700000.0,INFO NOT AVAILABLE,N,Unfurnished,3-bhk-bedroom-apartment-flat-for-sale-in-shela...
7071,Best in class property available at gota locat...,Ahmedabad North,Gota,2.0,2.0,1.0,Low Floors,Mid-rise buildings,1291.2,6000000.0,INFO NOT AVAILABLE,N,Unfurnished,2-bhk-bedroom-apartment-flat-for-sale-in-gota-...


In [21]:
# Load the model from disk
with open('model.pkl', 'rb') as f:
    loaded_model = pickle.load(f)

# Now you can use the loaded model to make predictions
loaded_model.predict(np.array(['Ahmedabad East', 'Mid-rise buildings', 'Semifurnished', 2.0, 2.0, 3.0, 882.0]).reshape(1,-1))



array([4304438.71162715])

In [22]:
import pickle
import numpy as np

In [24]:
prop_details_url = df['PROP_DETAILS_URL'].iloc[0]

In [28]:
 df.index[df['PROP_DETAILS_URL'] == prop_details_url].tolist()[0]

0

In [29]:
"https://www.99acres.com/" + df['PROP_DETAILS_URL'].iloc[1]

'https://www.99acres.com/3-bhk-bedroom-apartment-flat-for-sale-in-gala-eternia-thaltej-ahmedabad-west-1700-sq-ft-spid-B73228757'

In [30]:
"https://www.99acres.com/" + df['PROP_DETAILS_URL']

0       https://www.99acres.com/3-bhk-bedroom-apartmen...
1       https://www.99acres.com/3-bhk-bedroom-apartmen...
2       https://www.99acres.com/3-bhk-bedroom-apartmen...
3       https://www.99acres.com/3-bhk-bedroom-apartmen...
4       https://www.99acres.com/3-bhk-bedroom-apartmen...
                              ...                        
7825    https://www.99acres.com/1-bhk-bedroom-apartmen...
7826    https://www.99acres.com/1-bhk-bedroom-apartmen...
7827    https://www.99acres.com/2-bhk-bedroom-apartmen...
7828    https://www.99acres.com/3-bhk-bedroom-apartmen...
7829    https://www.99acres.com/1-bhk-bedroom-apartmen...
Name: PROP_DETAILS_URL, Length: 7830, dtype: object

In [33]:
# Function to recommend similar apartments
def recommend_similar_apartments(prop_details_url):
    # Load data for recommender system
    df = pd.read_csv('data/preprocessed_residential_apartment.csv')
    df['PROP_DETAILS_URL'] = 'https://www.99acres.com/' + df['PROP_DETAILS_URL']

    # Vectorize the descriptions
    tfidf = TfidfVectorizer(stop_words='english')
    tfidf_matrix = tfidf.fit_transform(df['DESCRIPTION'])

    # Compute cosine similarity
    cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

    idx = df.index[df['PROP_DETAILS_URL'] == prop_details_url].tolist()[0]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[0:6]
    apartment_indices = [i[0] for i in sim_scores]
    return df.iloc[apartment_indices]

In [34]:
recommend_similar_apartments("https://www.99acres.com/" + df['PROP_DETAILS_URL'].iloc[1])

Unnamed: 0,DESCRIPTION,CITY,LOCALITY,BEDROOM_NUM,BATHROOM_NUM,BALCONY_NUM,FLOOR_CATEGORY,BUILDING_TYPE,AREA,PRICE,TOP_USPS,CORNER_PROPERTY,FURNISH_LABEL,PROP_DETAILS_URL
1,This is your chance to own a 3 bhk residential...,Ahmedabad West,Thaltej,3.0,3.0,4.0,High Floors,Mid-rise buildings,1700.0,18000000.0,"['Fitness Center/ Gym', 'Club/ Community Center']",N,Semifurnished,https://www.99acres.com/3-bhk-bedroom-apartmen...
3,This is your chance to a 3 bhk apartment / fla...,Ahmedabad West,Panjara pol,3.0,3.0,2.0,Low Floors,Low-rise buildings,1936.8,9500000.0,INFO NOT AVAILABLE,Y,Semifurnished,https://www.99acres.com/3-bhk-bedroom-apartmen...
1834,This is your chance to own a 2 bhk residential...,Ahmedabad West,Narmada vasahat,2.0,2.0,1.0,Low Floors,Mid-rise buildings,1215.0,4500000.0,INFO NOT AVAILABLE,N,Furnished,https://www.99acres.com/2-bhk-bedroom-apartmen...
30,Redefine luxury with a world of comfort at a 4...,Gandhinagar,Sughad,4.0,3.0,3.0,Low Floors,Low-rise buildings,2808.0,12000000.0,INFO NOT AVAILABLE,N,Furnished,https://www.99acres.com/4-bhk-bedroom-apartmen...
4428,"New construction , fully furnished , main road...",Ahmedabad West,Thaltej,3.0,3.0,1.0,High Floors,Mid-rise buildings,2394.0,29000000.0,"['Fitness Center/ Gym', 'Club/ Community Cente...",N,Semifurnished,https://www.99acres.com/3-bhk-bedroom-apartmen...
3258,2 bhk apartment available for sell in one of t...,Ahmedabad West,South bopal,2.0,2.0,1.0,High Floors,Mid-rise buildings,1250.0,6200000.0,"['East Facing', 'sarswati hospital within 1km'...",N,Unfurnished,https://www.99acres.com/2-bhk-bedroom-apartmen...


In [35]:
 df['PROP_DETAILS_URL'] = 'https://www.99acres.com/' + df['PROP_DETAILS_URL']


In [41]:
 df['PROP_DETAILS_URL'].sample(1).iloc[0]

'https://www.99acres.com/1-bhk-bedroom-apartment-flat-for-sale-in-adani-pratham-tragad-ahmedabad-north-450-sq-ft-spid-A74209487'