### Basic python libraries import (to handle visualizations and dataframes)

In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
import warnings
warnings.filterwarnings("ignore")
sns.set(style="darkgrid",font_scale=1.5)
pd.set_option("display.max.columns",None)

###  Transformations imports 

In [2]:
from scipy import stats
from scipy.stats import skew
from scipy.special import boxcox1p
from scipy.stats import boxcox_normmax

### Machine learning models imports

In [3]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import GridSearchCV

### Data pre-processing 

In [4]:
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

### Pre-processing

In [5]:
from sklearn import preprocessing

### Math

In [6]:
import math

## Loading The Data 

In [7]:
df_main = pd.read_csv("./properties.csv")
df_main.head()

Unnamed: 0,type,title,location,bedroom,bathroom,size_sqm,price
0,Duplex,Prime Location Duplex Fully Finished With A\C,"Park View, North Investors Area, New Cairo Cit...",4,4,345,6850000
1,Villa,Town house resale at Mivida Emaar with best price,"Mivida, 5th Settlement Compounds, The 5th Sett...",3,3,285,10000000
2,Apartment,Lake View Residence - Apartment | Prime Location,"Lake View Residence, 5th Settlement Compounds,...",3,3,210,5700000
3,Townhouse,Best Penthouse for sale in villette ( sky conds ),"La Vista City, New Capital Compounds, New Capi...",4,4,230,7510000
4,Penthouse,2nd Floor | Fully Finished | Lowest Price |Par...,"Villette, 5th Settlement Compounds, The 5th Se...",5,6,284,8511300


## Data Cleaning

In [8]:
df_main.info()
df_main.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11418 entries, 0 to 11417
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   type      11418 non-null  object
 1   title     11418 non-null  object
 2   location  11418 non-null  object
 3   bedroom   11418 non-null  object
 4   bathroom  11418 non-null  int64 
 5   size_sqm  11418 non-null  object
 6   price     11418 non-null  object
dtypes: int64(1), object(6)
memory usage: 624.5+ KB


Unnamed: 0,type,title,location,bedroom,bathroom,size_sqm,price
0,Duplex,Prime Location Duplex Fully Finished With A\C,"Park View, North Investors Area, New Cairo Cit...",4,4,345,6850000
1,Villa,Town house resale at Mivida Emaar with best price,"Mivida, 5th Settlement Compounds, The 5th Sett...",3,3,285,10000000
2,Apartment,Lake View Residence - Apartment | Prime Location,"Lake View Residence, 5th Settlement Compounds,...",3,3,210,5700000
3,Townhouse,Best Penthouse for sale in villette ( sky conds ),"La Vista City, New Capital Compounds, New Capi...",4,4,230,7510000
4,Penthouse,2nd Floor | Fully Finished | Lowest Price |Par...,"Villette, 5th Settlement Compounds, The 5th Se...",5,6,284,8511300


### Check Nulls

In [9]:
null_array = round(df_main.isnull().sum()/len(df_main)*100,2).sort_values().to_frame().rename(columns= {0:"Train % of Missing Values"})
print(null_array, end='\n')

          Train % of Missing Values
type                            0.0
title                           0.0
location                        0.0
bedroom                         0.0
bathroom                        0.0
size_sqm                        0.0
price                           0.0


### Check unique values

In [14]:
for column in df_main.columns:
    print(f"Value counts for column\n '{column}':", end='/n')
    print(df_main[column].value_counts())
    print("\n----------------------\n")

Value counts for column
 'type':/nApartment          5848
Villa              2845
Townhouse           858
Twin House          601
Duplex              568
Penthouse           448
iVilla              199
Hotel Apartment      34
Chalet               14
Compound              3
Name: type, dtype: int64

----------------------

Value counts for column
 'title':/nVilla for sale In Lake View L:600 BUE:550 price18M                 14
STANDALONE in Palm Hills Katameya EX.PK2 For Sale                  13
2 bedrooms|2 bath|Terrace|with disc                                13
3 bedrooms|7 years install|Ready to move|Disc 12%                  12
Move Now to Standalone Villa with 5% DP over 8 years Sodic East     9
                                                                   ..
Pay Only 5% DP | Over 9 Years by Tatweer Misr                       1
Apartment 88 m fully finished without over loading                  1
Penthouse In Katameya Plaza Sodic Fully Finished                    1
Penthouse f