# Price Estimate
## Property Market

In [1]:
# Adjust Notebook Display
from IPython.display import display, HTML
display(HTML("<style>.container { width:90% !important; }</style>"))

In [2]:
# Required Packages
import pandas as pd
import numpy as np
import statsmodels.api as sm
import math

  from pandas import Int64Index as NumericIndex


In [3]:
# Load DataFrame
prop_market = pd.read_csv('prop_market.csv')
print('DataFrame Size : ', prop_market.shape)
prop_market.head()

DataFrame Size :  (5385, 21)


Unnamed: 0,Links,Locality,Price,Beds,Property Type,Baths,Description,Garden,Yard,Terrace,...,Pool,Roof Access,Seafront,Views,Garage,Optional Garage,Airspace,Optional Airspace,Furnished,Finished
0,https://www.propertymarket.com.mt/view/2-bedro...,Attard,349000,2,Town House,2,"BIRKIRKARA – Located in a UCA area, yet in a q...",0,0,1,...,0,1,0,0,0,0,1,0,1,0
1,https://www.propertymarket.com.mt/view/2-bedro...,Marsascala,300000,2,Apartment,2,"A 2 Bedroom second floor Apartment, with permi...",0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,https://www.propertymarket.com.mt/view/2-bedro...,San Gwann,330000,2,Maisonette,2,Situated on the outskirts of San Gwann and few...,0,1,0,...,0,0,0,0,0,0,0,0,0,0
3,https://www.propertymarket.com.mt/view/3-bedro...,Balzan,425000,3,Apartment,2,A very well located modern three bedroom 180 S...,0,0,0,...,0,0,0,0,0,0,0,0,0,1
4,https://www.propertymarket.com.mt/view/2-bedro...,Zebbug,370000,2,Town House,2,A two bedroom townhouse just minutes walk away...,0,1,0,...,0,1,0,0,0,0,1,0,0,0


In [4]:
# Double Check Duplicates
prop_market = prop_market.drop_duplicates(subset=['Links'])
print('DataFrame Size : ', prop_market.shape) # Confirmed, no duplicates

DataFrame Size :  (5385, 21)


#### Roof Access Adjustment

In [5]:
# Display Available Property Types
prop_market['Property Type'].unique()

array(['Town House', 'Apartment', 'Maisonette', 'Penthouse', 'Palazzo',
       'House of Character', 'Terraced House', 'Bungalow', 'Villa',
       'House', 'Semi-Detached House', 'Duplex', 'Semi-Detached Bungalow',
       'Farm House', 'Semi-Detached Villa', 'Detached Villa',
       'Village House', 'Site', 'Hotel', 'Detached Bungalow', 'Plot',
       'Guest House', 'Block of Apartments'], dtype=object)

In [6]:
# OG Roof Access Variable
prop_market['Roof Access'].value_counts()

0    4381
1    1004
Name: Roof Access, dtype: int64

In [7]:
# Assign Roof Access Dummy to Properties with Automatic Roof Access
with_roof = ['Town House', 'Palazzo', 'House of Character', 'Terraced House', 'Bungalow', 'Villa', 'House', 'Semi-Detached House', 'Duplex',
             'Semi-Detached Bungalow', 'Farm House', 'Semi-Detached Villa', 'Detached Villa', 'Village House', 'Detached Bungalow', 'Guest House']
for i in with_roof:
    prop_market['Roof Access'] = np.where((prop_market['Property Type'] == i), 1, prop_market['Roof Access'])
    
prop_market['Roof Access'].value_counts()

0    3942
1    1443
Name: Roof Access, dtype: int64

#### Beds and Baths Adjustment to Categories

In [8]:
# Categories for Beds
prop_market['Beds (2)'] = np.where((prop_market['Beds'] == 2), 1, 0)
prop_market['Beds (3)'] = np.where((prop_market['Beds'] == 3), 1, 0)
prop_market['Beds (4+)'] = np.where((prop_market['Beds'] >= 4), 1, 0)

In [9]:
# Categories for Baths
prop_market['Baths (2)'] = np.where((prop_market['Baths'] == 2), 1, 0)
prop_market['Baths (3)'] = np.where((prop_market['Baths'] == 3), 1, 0)
prop_market['Baths (4+)'] = np.where((prop_market['Baths'] >= 4), 1, 0)

## Regression Exercise

In [10]:
# Get Dependent Variable - Log Price
log_price = np.log(prop_market['Price'])

In [11]:
# Remove Columns that are not necessary for modelling
X = prop_market.drop(['Links', 'Price', 'Description', 'Beds', 'Baths'], axis=1)

In [12]:
# Get Dummies for Categorical Variables
X = pd.get_dummies(data=X, columns=['Property Type', 'Locality'], drop_first=True)
print('Number of Regressors :', X.shape[1])

Number of Regressors : 109


In [13]:
# Estimate Simple Linear Regression
X = sm.add_constant(X)
results = sm.OLS(log_price, X).fit()
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:                  Price   R-squared:                       0.776
Model:                            OLS   Adj. R-squared:                  0.771
Method:                 Least Squares   F-statistic:                     167.5
Date:                Sun, 23 Apr 2023   Prob (F-statistic):               0.00
Time:                        23:11:04   Log-Likelihood:                -1238.8
No. Observations:                5385   AIC:                             2698.
Df Residuals:                    5275   BIC:                             3423.
Df Model:                         109                                         
Covariance Type:            nonrobust                                         
                                           coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------------------------------


The regression is imperfect, with several insignificant variables (determined by p-value of less than 0.05) and a large JB (with P(JB) less than 0.05, indicates non-normal residuals), but figures such as the Adj. R Squared and F-Statistic indicate that the regression is able to significantly explain most price changes (log(Price)). The regression is deemed adequate for a simple and quick property price estimate, but the model is admittedly imperfect. The regression might require heteroskedasticity robustness subject to tests for heteroskedasticity.

In [14]:
# Price Estimate for Property of Interest
EPrice = math.exp(results.params['const'] + results.params['Beds (3)'] + results.params['Baths (2)'] + results.params['Lift'] +
                  results.params['Pool'] + results.params['Roof Access'] + results.params['Furnished'] + results.params['Property Type_Penthouse'] + 
                  results.params['Locality_Mosta'])
print('Expected Price for Property of Interest :', round(EPrice))

Expected Price for Property of Interest : 620649


In [15]:
# Price Estimate for Property of Interest - Excluding Pool
EPrice = math.exp(results.params['const'] + results.params['Beds (3)'] + results.params['Baths (2)'] + results.params['Lift'] + 
                  results.params['Roof Access'] + results.params['Furnished'] + results.params['Property Type_Penthouse'] + 
                  results.params['Locality_Mosta'])
print('Expected Price for Property of Interest, Excluding Pool:', round(EPrice))

Expected Price for Property of Interest, Excluding Pool: 483900
