
# Data Description

The dataset consists of data that was scraped from Zameen.com website. It is Pakistani top leading property buy and sell platform.
Geography: Pakistan
Unit of analysis: Real states Data Analysis
Dataset: The dataset contains detailed information online data available on zameen.com website . It contains propertyid,locationid,pageurl propertytype,price,location,city,provincename,latitude,longitude baths,area,purpose,bedrooms,dateadded,agency and agent. 

**Please upvote if you find this notebook helpful! 😊 Thank you! I would also be very happy to receive feedback on my work.**

In [None]:
import re
import json
import math
import pprint
import requests
# import pandas_profiling
from scipy.stats import zscore
from collections import defaultdict
from urllib.request import urlopen, Request

import numpy as np
import pandas as pd
# plotting stuff
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
colorMap = sns.light_palette("blue", as_cmap=True)
import datatable as dt
# misc
import missingno as msno
# system
import warnings
warnings.filterwarnings('ignore')
# garbage collector to keep RAM in check
import gc  
import matplotlib.gridspec as gridspec


%matplotlib inline

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))



# Data Description

In [None]:
# Loading data

filename = "/kaggle/input/zameencom-property-data-pakistan/Property_with_Feature_Engineering.csv"
# Making the Data Global
global data
data = pd.read_csv(filename, sep = ',')
data.head(5)

## Analyzing the Outliers

In [None]:
# Removing Outliers based on Price
z = np.abs(zscore(data['price']))
thresh = 3
outliers_indices = np.where(z > thresh)

In [None]:
# Printing Outliers
print("Number of Outliers:", len(outliers_indices[0]), "with Threshold:", thresh)
outliers_data = data.iloc[list(set(outliers_indices[0]))]

In [None]:
# Removing the Outliers from Data
data.drop(labels = list(set(outliers_indices[0])), inplace = True)

In [None]:
outliers_data.shape

# Rates of Change w.r.t Dates

In [None]:
# Converts Exponent to Float
def exp_to_float(num):
    s = str(num)
    if s.find("e+") >= 0:
        n, e = s.split('e+')
        n_e = float(n) * (10 ** int(e))
        return n_e
    else:
        return num

In [None]:
# Converts Pivot Tables' Indices to Labels for X-axis on Graph
def index_to_date(indices):
    labels = []
    for i in range(len(indices.codes[0])):
        label = str(indices.levels[0][indices.codes[0][i]]) + '-' + str(indices.codes[1][i] + 1)
        labels.append(label)
    return labels

In [None]:
# Calculates and Adds Rate of Change for all the Columns in the Pivot Table given Length of the whole Data.
def rates_of_change(pivot_table):
    length = len(pivot_table)
    
    # For all Columns
    for column in list(pivot_table.columns):
        
        # Calculating Rates of Change
        rates = [0] * (length - 1)
        for i in range(length - 1):
            rates[i] = round((((pivot_table[column].values[i + 1] - pivot_table[column].values[i]) / pivot_table[column].values[i]) * 100), 2)
            #print(pivot_table.get_value(i,column)
                  
        # Adding Rates of Change to Pivot Table
        pivot_table.insert(pivot_table.columns.get_loc(column) + 1, ('rate_of_change_' + column), ([np.nan] + rates), allow_duplicates=True)
    
    cols = list(pivot_table.columns)
    num_cols = len(cols)
    
    # Plotting the Results
    fig, axs = plt.subplots(num_cols // 2, 2,figsize=(15, int(num_cols * 2.5)))
    x_ticks = index_to_date(pivot_table.index)
    plt.setp(axs, xticks = range(len(x_ticks)), xticklabels = x_ticks)
    
    for i in range(num_cols):
        axs[i // 2, i % 2].plot(pivot_table[str(cols[i])].values, label = str(cols[i]))
        axs[i // 2, i % 2].legend(loc = 0)
        axs[i // 2, i % 2].grid()
        axs[i // 2, i % 2].tick_params(labelrotation=45)
    
    return pivot_table

In [None]:
# For Purpose
rate_of_change_wrt_purpose = pd.pivot_table(data, values = 'price', index = ['year', 'month'], columns=['purpose'], aggfunc = 'mean')
#rates_of_change(rate_of_change_wrt_purpose)

rates_of_change(rate_of_change_wrt_purpose)

In [None]:
# For Type
rate_of_change_wrt_type = pd.pivot_table(data, values = 'price', index = ['year', 'month'], columns=['property_type'], aggfunc = 'mean')
rates_of_change(rate_of_change_wrt_type)

In [None]:
# For City
rate_of_change_wrt_city = pd.pivot_table(data, values = 'price', index = ['year', 'month'], columns=['city'], aggfunc = 'mean')
rates_of_change(rate_of_change_wrt_city)

# Moving Averages w.r.t Dates

In [None]:
# Calculates and Adds Rate of Change for all the Columns in the Pivot Table given Length of the whole Data.
def moving_averages(pivot_table, window_size = 3):
    length = len(pivot_table)
    
    # For all Columns
    for column in list(pivot_table.columns):
        # Adding Moving Averages to Pivot Table
        moving_avgs = pivot_table[str(column)].rolling(window_size).mean().shift(-1)
        pivot_table.insert(pivot_table.columns.get_loc(column) + 1, ('moving_average_' + column), moving_avgs, allow_duplicates=True)
    
    cols = list(pivot_table.columns)
    num_cols = len(cols)
    
    # Plotting the Results
    fig, axs = plt.subplots(num_cols // 2, 2,figsize=(15, int(num_cols * 2.5)))
    x_ticks = index_to_date(pivot_table.index)
    plt.setp(axs, xticks = range(len(x_ticks)), xticklabels = x_ticks)
    
    for i in range(num_cols):
        axs[i // 2, i % 2].plot(pivot_table[str(cols[i])].values, label = str(cols[i]))
        axs[i // 2, i % 2].legend(loc = 0)
        axs[i // 2, i % 2].grid()
        axs[i // 2, i % 2].tick_params(labelrotation=45)
    
    return pivot_table

In [None]:
# For Purpose
rate_of_change_wrt_purpose = pd.pivot_table(data, values = 'price', index = ['year', 'month'], columns=['purpose'], aggfunc = 'mean')
moving_averages(rate_of_change_wrt_purpose)

In [None]:
# For Type
rate_of_change_wrt_type = pd.pivot_table(data, values = 'price', index = ['year', 'month'], columns=['property_type'], aggfunc = 'mean')
moving_averages(rate_of_change_wrt_type)

In [None]:
# For City
rate_of_change_wrt_city = pd.pivot_table(data, values = 'price', index = ['year', 'month'], columns=['city'], aggfunc = 'mean')
moving_averages(rate_of_change_wrt_city)

# Mean Price

In [None]:
%%HTML
<div class='tableauPlaceholder' id='viz1612724490621' style='position: relative'><noscript><a href='#'><img alt=' ' src='https:&#47;&#47;public.tableau.com&#47;static&#47;images&#47;Re&#47;RentalYields5_16127243602540&#47;Sheet3&#47;1_rss.png' style='border: none' /></a></noscript><object class='tableauViz'  style='display:none;'><param name='host_url' value='https%3A%2F%2Fpublic.tableau.com%2F' /> <param name='embed_code_version' value='3' /> <param name='site_root' value='' /><param name='name' value='RentalYields5_16127243602540&#47;Sheet3' /><param name='tabs' value='no' /><param name='toolbar' value='yes' /><param name='static_image' value='https:&#47;&#47;public.tableau.com&#47;static&#47;images&#47;Re&#47;RentalYields5_16127243602540&#47;Sheet3&#47;1.png' /> <param name='animate_transition' value='yes' /><param name='display_static_image' value='yes' /><param name='display_spinner' value='yes' /><param name='display_overlay' value='yes' /><param name='display_count' value='yes' /><param name='language' value='en' /></object></div>                <script type='text/javascript'>                    var divElement = document.getElementById('viz1612724490621');                    var vizElement = divElement.getElementsByTagName('object')[0];                    vizElement.style.width='100%';vizElement.style.height=(divElement.offsetWidth*0.75)+'px';                    var scriptElement = document.createElement('script');                    scriptElement.src = 'https://public.tableau.com/javascripts/api/viz_v1.js';                    vizElement.parentNode.insertBefore(scriptElement, vizElement);                </script>

# Mean Sale

In [None]:
%%HTML
<div class='tableauPlaceholder' id='viz1612725172704' style='position: relative'><noscript><a href='#'><img alt=' ' src='https:&#47;&#47;public.tableau.com&#47;static&#47;images&#47;Re&#47;RentalYields5_16127243602540&#47;Sheet2&#47;1_rss.png' style='border: none' /></a></noscript><object class='tableauViz'  style='display:none;'><param name='host_url' value='https%3A%2F%2Fpublic.tableau.com%2F' /> <param name='embed_code_version' value='3' /> <param name='site_root' value='' /><param name='name' value='RentalYields5_16127243602540&#47;Sheet2' /><param name='tabs' value='no' /><param name='toolbar' value='yes' /><param name='static_image' value='https:&#47;&#47;public.tableau.com&#47;static&#47;images&#47;Re&#47;RentalYields5_16127243602540&#47;Sheet2&#47;1.png' /> <param name='animate_transition' value='yes' /><param name='display_static_image' value='yes' /><param name='display_spinner' value='yes' /><param name='display_overlay' value='yes' /><param name='display_count' value='yes' /><param name='language' value='en' /></object></div>                <script type='text/javascript'>                    var divElement = document.getElementById('viz1612725172704');                    var vizElement = divElement.getElementsByTagName('object')[0];                    vizElement.style.width='100%';vizElement.style.height=(divElement.offsetWidth*0.75)+'px';                    var scriptElement = document.createElement('script');                    scriptElement.src = 'https://public.tableau.com/javascripts/api/viz_v1.js';                    vizElement.parentNode.insertBefore(scriptElement, vizElement);                </script>

# Rental Yield

In [None]:
%%HTML
<div class='tableauPlaceholder' id='viz1612725216942' style='position: relative'><noscript><a href='#'><img alt=' ' src='https:&#47;&#47;public.tableau.com&#47;static&#47;images&#47;Re&#47;RentalYields5_16127243602540&#47;Sheet1&#47;1_rss.png' style='border: none' /></a></noscript><object class='tableauViz'  style='display:none;'><param name='host_url' value='https%3A%2F%2Fpublic.tableau.com%2F' /> <param name='embed_code_version' value='3' /> <param name='site_root' value='' /><param name='name' value='RentalYields5_16127243602540&#47;Sheet1' /><param name='tabs' value='no' /><param name='toolbar' value='yes' /><param name='static_image' value='https:&#47;&#47;public.tableau.com&#47;static&#47;images&#47;Re&#47;RentalYields5_16127243602540&#47;Sheet1&#47;1.png' /> <param name='animate_transition' value='yes' /><param name='display_static_image' value='yes' /><param name='display_spinner' value='yes' /><param name='display_overlay' value='yes' /><param name='display_count' value='yes' /><param name='language' value='en' /></object></div>                <script type='text/javascript'>                    var divElement = document.getElementById('viz1612725216942');                    var vizElement = divElement.getElementsByTagName('object')[0];                    vizElement.style.width='100%';vizElement.style.height=(divElement.offsetWidth*0.75)+'px';                    var scriptElement = document.createElement('script');                    scriptElement.src = 'https://public.tableau.com/javascripts/api/viz_v1.js';                    vizElement.parentNode.insertBefore(scriptElement, vizElement);                </script>

In [None]:
!pip install dataprep

In [None]:
from dataprep.eda import plot, plot_correlation, create_report, plot_missing

In [None]:
plot(data)

In [None]:
create_report(data)

In [None]:
plot(data, "price")

In [None]:
plot(data, "price","city")

In [None]:
plot(data, "price","property_type")

In [None]:
plot(data, "price","baths")

In [None]:
plot(data, "price","area")

In [None]:
plot(data, "price","bedrooms")

In [None]:
data.head()

## Work is in progress will come back soon with new finding.

**Thank you! I would also be very happy to receive feedback on my work.**