# Cleaning

### Data exploration

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [2]:
from functions import *

In [3]:
# Open the rax dataset
immo = pd.read_csv('../../Datasets/raw_immo_scrap.csv', sep=',', na_values=('None', 'no price'))
zip_code = pd.read_csv('../../Datasets/postal_code.csv', sep = ';')

In [4]:
immo.shape

(52077, 20)

In [5]:
immo

Unnamed: 0,locality,type_of_property,subtype_of_property,price,type_of_sale,number_of_rooms,house_area,fully_equipped_kitchen,furnished,open_fire,terrace,terrace_area,garden,garden_area,surface_of_the_land,surface_of_the_plot_of_land,number_of_facades,swimming_pool,state_of_the_building,construction_year
0,1050,house,house,340000.0,for sale,6.0,203.0,1,,0,1,,0,,95.0,,2.0,0,to be done up,1901.0
1,1880,house,villa,525000.0,for sale,6.0,250.0,1,,0,1,40.0,1,430.0,826.0,,4.0,0,as new,1992.0
2,4900,house,exceptional property,550000.0,for sale,11.0,475.0,1,,0,1,,1,1400.0,1543.0,,4.0,0,good,1853.0
3,7912,house,villa,550000.0,for sale,4.0,325.0,1,,0,1,125.0,1,2333.0,3570.0,,4.0,0,good,1918.0
4,6032,house,house,550000.0,for sale,5.0,400.0,1,,0,1,80.0,1,500.0,616.0,,3.0,0,as new,1977.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
52072,3120,house,house,455496.0,for sale,3.0,,0,,0,0,,0,,695.0,,,0,,
52073,1800,house,house,451650.0,for sale,3.0,,0,,0,0,,0,,550.0,,3.0,0,,
52074,2018,house,house,488000.0,for sale,3.0,145.0,0,,0,0,,1,48.0,0.0,,3.0,0,,
52075,9140,house,house,455000.0,for sale,3.0,,0,,0,0,,0,,1202.0,,4.0,0,,


In [6]:
immo.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 52077 entries, 0 to 52076
Data columns (total 20 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   locality                     52077 non-null  int64  
 1   type_of_property             52077 non-null  object 
 2   subtype_of_property          52077 non-null  object 
 3   price                        52007 non-null  float64
 4   type_of_sale                 52077 non-null  object 
 5   number_of_rooms              52075 non-null  float64
 6   house_area                   43445 non-null  float64
 7   fully_equipped_kitchen       52077 non-null  int64  
 8   furnished                    0 non-null      float64
 9   open_fire                    52077 non-null  int64  
 10  terrace                      52077 non-null  int64  
 11  terrace_area                 17518 non-null  float64
 12  garden                       52077 non-null  int64  
 13  garden_area     

In [7]:
immo.describe(include='all').transpose()

Unnamed: 0,count,unique,top,freq,mean,std,min,25%,50%,75%,max
locality,52077,,,,5406.22,2958.59,1000.0,2600.0,5060.0,8430.0,9992.0
type_of_property,52077,2.0,house,29353.0,,,,,,,
subtype_of_property,52077,24.0,house,22100.0,,,,,,,
price,52007,,,,305756.0,167907.0,1000.0,195000.0,267000.0,370000.0,950000.0
type_of_sale,52077,1.0,for sale,52077.0,,,,,,,
number_of_rooms,52075,,,,2.81458,2.20298,0.0,2.0,3.0,3.0,204.0
house_area,43445,,,,153.721,183.355,1.0,90.0,128.0,185.0,31700.0
fully_equipped_kitchen,52077,,,,0.652284,0.47625,0.0,0.0,1.0,1.0,1.0
furnished,0,,,,,,,,,,
open_fire,52077,,,,0.0469497,0.211533,0.0,0.0,0.0,0.0,1.0


In [8]:
immo.dtypes

locality                         int64
type_of_property                object
subtype_of_property             object
price                          float64
type_of_sale                    object
number_of_rooms                float64
house_area                     float64
fully_equipped_kitchen           int64
furnished                      float64
open_fire                        int64
terrace                          int64
terrace_area                   float64
garden                           int64
garden_area                    float64
surface_of_the_land            float64
surface_of_the_plot_of_land    float64
number_of_facades              float64
swimming_pool                    int64
state_of_the_building           object
construction_year              float64
dtype: object

In [9]:
immo.isnull().sum()

locality                           0
type_of_property                   0
subtype_of_property                0
price                             70
type_of_sale                       0
number_of_rooms                    2
house_area                      8632
fully_equipped_kitchen             0
furnished                      52077
open_fire                          0
terrace                            0
terrace_area                   34559
garden                             0
garden_area                    43624
surface_of_the_land            22724
surface_of_the_plot_of_land    52077
number_of_facades              13650
swimming_pool                      0
state_of_the_building          13586
construction_year              21369
dtype: int64

### Cleaning

###### Drop useless columns (not filled/all the same)

In [10]:
immo = immo.drop(["furnished", "surface_of_the_plot_of_land", "type_of_sale"], axis=1)

###### Drop rows with null in essential columns

In [11]:
immo.dropna(axis=0, inplace=True, subset=['house_area'])
immo.dropna(axis=0, inplace=True, subset=['price'])
immo.isna().sum()

locality                      0
type_of_property              0
subtype_of_property           0
price                         0
number_of_rooms               2
house_area                    0
fully_equipped_kitchen        0
open_fire                     0
terrace                       0
terrace_area              26488
garden                        0
garden_area               35339
surface_of_the_land       20649
number_of_facades         11064
swimming_pool                 0
state_of_the_building     10615
construction_year         16527
dtype: int64

###### Replace some non essential unknow values by 0

In [12]:
immo['terrace_area'] = immo['terrace_area'].fillna(0)
immo['garden_area'] = immo['garden_area'].fillna(0)
immo['surface_of_the_land'] = immo['surface_of_the_land'].fillna(0)

###### Drop Duplicates

In [13]:
immo.shape

(43401, 17)

In [14]:
immo = immo.drop_duplicates()

In [15]:
immo.shape

(42368, 17)

###### Setting the type

In [16]:
immo.dtypes

locality                    int64
type_of_property           object
subtype_of_property        object
price                     float64
number_of_rooms           float64
house_area                float64
fully_equipped_kitchen      int64
open_fire                   int64
terrace                     int64
terrace_area              float64
garden                      int64
garden_area               float64
surface_of_the_land       float64
number_of_facades         float64
swimming_pool               int64
state_of_the_building      object
construction_year         float64
dtype: object

In [17]:
immo['price'] = immo['price'].astype('int64')
immo['number_of_rooms'] = immo['number_of_rooms'].astype('Int64')
immo['house_area'] = immo['house_area'].astype('int64')
immo['terrace_area'] = immo['terrace_area'].astype('Int64')
immo['garden_area'] = immo['garden_area'].astype('Int64')
immo['surface_of_the_land'] = immo['surface_of_the_land'].astype('Int64')
immo['number_of_facades'] = immo['number_of_facades'].astype('Int64')
immo['construction_year'] = immo['construction_year'].astype('Int64')

In [18]:
immo.dtypes

locality                   int64
type_of_property          object
subtype_of_property       object
price                      int64
number_of_rooms            Int64
house_area                 int64
fully_equipped_kitchen     int64
open_fire                  int64
terrace                    int64
terrace_area               Int64
garden                     int64
garden_area                Int64
surface_of_the_land        Int64
number_of_facades          Int64
swimming_pool              int64
state_of_the_building     object
construction_year          Int64
dtype: object

###### Decision : wich variables are essentials ?

Arbitrarily, I decided that the locality, the price, the house area, the surface of the land and the state of the building are essentials.
Others columns will be cleaned on demand only, but these 5 will always be cleaned (but some ouliers will be cut off based on other categories).

###### Locality

In [19]:
zip_code

Unnamed: 0,column_1,column_2,column_3,column_4,coordonnees,geom
0,1060,Saint-Gilles,4.345668,50.826741,"50.8267409, 4.345668",
1,1080,Molenbeek-Saint-Jean,4.322778,50.854355,"50.8543551, 4.3227779",
2,1083,Ganshoren,4.317510,50.871240,"50.8712396, 4.3175103",
3,1120,Neder-Over-Heembeek,4.390489,50.897796,"50.89779605, 4.39048886842",
4,1300,Wavre,4.607744,50.716419,"50.7164189, 4.607744",
...,...,...,...,...,...,...
2752,9970,Kaprijke,3.621925,51.228952,"51.228952, 3.62192513724",
2753,9971,Lembeke,3.634631,51.194240,"51.19424, 3.63463121345",
2754,9981,Sint-Margriete,3.531501,51.267672,"51.26767195, 3.53150055658",
2755,9991,Adegem,3.485847,51.203529,"51.2035291, 3.4858471",


In [20]:
zip_code = zip_code.drop(["coordonnees", "geom"], axis=1)

In [21]:
zip_code

Unnamed: 0,column_1,column_2,column_3,column_4
0,1060,Saint-Gilles,4.345668,50.826741
1,1080,Molenbeek-Saint-Jean,4.322778,50.854355
2,1083,Ganshoren,4.317510,50.871240
3,1120,Neder-Over-Heembeek,4.390489,50.897796
4,1300,Wavre,4.607744,50.716419
...,...,...,...,...
2752,9970,Kaprijke,3.621925,51.228952
2753,9971,Lembeke,3.634631,51.194240
2754,9981,Sint-Margriete,3.531501,51.267672
2755,9991,Adegem,3.485847,51.203529


In [22]:
zip_code.rename(columns={"column_1": "locality", 
                                    "column_2": "city_name", 
                                    'column_3': 'lattitude', 
                                    'column_4': 'longitude'}, inplace=True)

In [23]:
zip_code

Unnamed: 0,locality,city_name,lattitude,longitude
0,1060,Saint-Gilles,4.345668,50.826741
1,1080,Molenbeek-Saint-Jean,4.322778,50.854355
2,1083,Ganshoren,4.317510,50.871240
3,1120,Neder-Over-Heembeek,4.390489,50.897796
4,1300,Wavre,4.607744,50.716419
...,...,...,...,...
2752,9970,Kaprijke,3.621925,51.228952
2753,9971,Lembeke,3.634631,51.194240
2754,9981,Sint-Margriete,3.531501,51.267672
2755,9991,Adegem,3.485847,51.203529


In [24]:
zip_code.drop_duplicates(subset=['locality'], inplace=True)

In [25]:
immo = pd.merge(immo, zip_code, on=['locality'], how='inner')

In [26]:
immo

Unnamed: 0,locality,type_of_property,subtype_of_property,price,number_of_rooms,house_area,fully_equipped_kitchen,open_fire,terrace,terrace_area,garden,garden_area,surface_of_the_land,number_of_facades,swimming_pool,state_of_the_building,construction_year,city_name,lattitude,longitude
0,1050,house,house,340000,6,203,1,0,1,0,0,0,95,2,0,to be done up,1901,Ixelles,4.381571,50.822285
1,1050,house,mixed use building,520000,4,200,0,0,0,0,0,0,69,2,0,to renovate,1940,Ixelles,4.381571,50.822285
2,1050,house,house,599000,4,160,1,0,1,0,1,55,100,2,0,to be done up,1898,Ixelles,4.381571,50.822285
3,1050,house,house,599000,3,160,1,0,1,15,1,60,130,2,0,good,1953,Ixelles,4.381571,50.822285
4,1050,house,house,575000,3,171,0,0,0,0,0,0,46,2,0,just renovated,,Ixelles,4.381571,50.822285
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
42362,1472,house,villa,475000,5,215,1,0,1,0,0,0,1550,,1,good,,Vieux-Genappe,4.401503,50.629025
42363,1461,house,villa,499000,5,275,1,0,1,0,1,0,1561,4,0,,1983,Haut-Ittre,4.296472,50.648804
42364,6686,house,chalet,495000,4,227,1,0,1,35,1,4400,4446,4,0,good,1983,Flamierge,5.603876,50.033352
42365,1761,house,villa,495000,4,235,1,0,0,0,1,0,488,4,0,,2020,Borchtlombeek,4.136915,50.848178


###### Subtype of property

In [27]:
# Subtype of property is related to the property's type.

print_unique_dtype(immo, 'subtype_of_property')
immo.subtype_of_property.value_counts()

23 - object


house                   16654
apartment               15336
villa                    2377
duplex                   1278
ground floor             1099
penthouse                 830
apartment block           795
mixed use building        741
flat studio               724
mansion                   394
exceptional property      382
town house                356
service flat              255
country cottage           237
bungalow                  226
loft                      199
chalet                    112
farmhouse                  98
triplex                    72
manor house                68
kot                        68
other property             58
castle                      7
pavilion                    1
Name: subtype_of_property, dtype: int64

In [28]:
# Drop castle & pavilion.
immo = immo[(immo['subtype_of_property'] != 'castle') & 
        (immo['subtype_of_property'] != 'pavilion')]

print_unique_dtype(immo, 'subtype_of_property')
immo.shape

21 - object


(42359, 20)

###### Price

In [29]:
print_unique_dtype(immo, 'price')

168 - int64


In [30]:
immo.price.describe()

count     42359.000000
mean     313492.573739
std      170021.022191
min        2500.000000
25%      199000.000000
50%      271300.000000
75%      379000.000000
max      950000.000000
Name: price, dtype: float64

In [31]:
immo[immo['price'] == 2500]

Unnamed: 0,locality,type_of_property,subtype_of_property,price,number_of_rooms,house_area,fully_equipped_kitchen,open_fire,terrace,terrace_area,garden,garden_area,surface_of_the_land,number_of_facades,swimming_pool,state_of_the_building,construction_year,city_name,lattitude,longitude
8146,1400,apartment,apartment,2500,3,90,1,0,1,4,0,0,0,4,0,as new,1978,Nivelles,4.331909,50.589163
33518,2530,house,exceptional property,2500,5,600,1,1,1,60,1,1750,1750,4,0,as new,1842,Boechout,4.510407,51.159694
37850,6960,house,other property,2500,3,154,1,1,1,18,1,3000,3000,4,0,as new,2006,Malempré,5.715525,50.281616


In [32]:
# Remove too much lower price, taking in consideration the house area
immo = immo[(immo['price'] > 10000)]
immo = immo[(immo['price'] > 35000) | (immo['house_area'] < 80)]

In [33]:
immo.price.describe()

count     42290.000000
mean     313966.527240
std      169753.614369
min       11825.000000
25%      199000.000000
50%      272500.000000
75%      379900.000000
max      950000.000000
Name: price, dtype: float64

In [34]:
immo[immo['price'] < 30000]

Unnamed: 0,locality,type_of_property,subtype_of_property,price,number_of_rooms,house_area,fully_equipped_kitchen,open_fire,terrace,terrace_area,garden,garden_area,surface_of_the_land,number_of_facades,swimming_pool,state_of_the_building,construction_year,city_name,lattitude,longitude
1843,4000,apartment,ground floor,28000,2,25,0,0,0,0,0,0,0,1,0,,,Glain,5.541864,50.648205
8472,8301,apartment,flat studio,20000,0,42,0,0,0,0,0,0,0,2,1,to renovate,1971.0,Ramskapelle,3.2505,51.312695
9163,8400,apartment,apartment,25000,2,72,1,0,1,5,0,0,0,2,0,good,1994.0,Oostende,2.920327,51.230318
30363,6740,apartment,kot,14500,1,28,1,0,0,0,0,0,0,4,0,good,2015.0,Villers-Sur-Semois,5.561466,49.698746
34271,5575,house,chalet,28500,1,36,1,0,0,0,0,0,264,4,0,,,Patignies,4.95212,49.99998
38838,5540,house,chalet,25000,1,60,1,0,0,0,0,0,215,4,0,to renovate,,Hastière,4.822554,50.197356
39170,6041,house,house,29900,2,70,1,0,0,0,0,0,41,2,0,to renovate,1874.0,Gosselies,4.430176,50.465385
40928,3111,house,house,11825,1,13,0,0,0,0,0,0,13,2,0,as new,2017.0,Wezemaal,4.768331,50.948255
41618,6464,house,house,20000,2,45,1,0,1,15,1,120,200,4,0,,,Bourlers,4.341293,50.027359


###### Number of rooms

In [35]:
immo.number_of_rooms.describe()

count    42288.000000
mean         2.811649
std          2.337715
min          0.000000
25%          2.000000
50%          3.000000
75%          3.000000
max        204.000000
Name: number_of_rooms, dtype: float64

In [36]:
immo.number_of_rooms.value_counts()

2      14029
3      13542
4       5927
1       4309
5       2147
6        888
0        819
7        278
8        144
9         62
10        57
11        28
12        21
14         6
16         6
15         5
13         4
18         4
204        3
24         2
20         2
30         2
22         1
165        1
23         1
Name: number_of_rooms, dtype: Int64

In [37]:
# Remove number_of_rooms >= 165
immo = immo[immo['number_of_rooms'] < 165]
immo.number_of_rooms.value_counts()

2     14029
3     13542
4      5927
1      4309
5      2147
6       888
0       819
7       278
8       144
9        62
10       57
11       28
12       21
14        6
16        6
15        5
13        4
18        4
24        2
20        2
30        2
22        1
23        1
Name: number_of_rooms, dtype: Int64

###### House Area

In [38]:
immo.house_area.value_counts()

90      904
120     898
100     889
150     838
140     769
       ... 
529       1
484       1
452       1
912       1
1407      1
Name: house_area, Length: 692, dtype: int64

In [39]:
immo.house_area.describe()

count    42284.000000
mean       154.691609
std        185.268171
min          1.000000
25%         91.000000
50%        130.000000
75%        186.000000
max      31700.000000
Name: house_area, dtype: float64

In [40]:
immo[immo['house_area'] < 15]

Unnamed: 0,locality,type_of_property,subtype_of_property,price,number_of_rooms,house_area,fully_equipped_kitchen,open_fire,terrace,terrace_area,garden,garden_area,surface_of_the_land,number_of_facades,swimming_pool,state_of_the_building,construction_year,city_name,lattitude,longitude
2290,4540,apartment,apartment,185000,2,11,0,0,1,5,0,0,0,3.0,0,as new,,Flône,5.335474,50.558239
9739,8400,apartment,apartment,425000,2,1,0,0,1,0,0,0,0,2.0,0,good,1961.0,Oostende,2.920327,51.230318
14502,2800,apartment,kot,99000,1,14,0,0,0,0,0,0,0,2.0,0,,2014.0,Mechelen,4.471321,51.028033
18053,3290,apartment,apartment,265000,2,1,0,0,0,0,1,0,0,,0,,,Deurne,5.096457,51.039238
21288,8870,apartment,apartment,125000,4,5,0,0,0,0,0,0,0,,0,as new,,Izegem,3.209065,50.913822
27619,4845,house,villa,323000,4,1,0,0,1,0,0,0,913,,0,as new,2013.0,Jalhay,5.964753,50.559253
27621,4845,house,villa,307000,3,1,0,0,0,0,1,500,0,4.0,0,as new,2019.0,Jalhay,5.964753,50.559253
28881,9340,apartment,penthouse,297500,3,13,0,0,1,0,0,0,0,,0,,2018.0,Oordegem,3.90209,50.957404
30807,2000,apartment,kot,115000,1,14,1,0,1,0,0,0,0,,0,good,,Antwerpen,4.399708,51.22111
34907,8791,house,house,335000,4,5,1,0,0,0,0,0,484,,0,good,1956.0,Beveren,3.342747,50.871701


In [41]:
immo[immo['house_area'] > 1500]

Unnamed: 0,locality,type_of_property,subtype_of_property,price,number_of_rooms,house_area,fully_equipped_kitchen,open_fire,terrace,terrace_area,garden,garden_area,surface_of_the_land,number_of_facades,swimming_pool,state_of_the_building,construction_year,city_name,lattitude,longitude
8932,9600,house,house,650000,1,1640,0,0,0,0,0,0,1640,3.0,0,good,1991.0,Renaix,3.602046,50.747619
9824,8400,house,house,219000,4,2019,0,0,0,0,0,0,165,2.0,0,,,Oostende,2.920327,51.230318
17163,7050,house,mixed use building,175000,2,1700,0,0,0,0,0,0,1700,4.0,0,to renovate,,Herchies,3.857709,50.528238
17961,4800,house,apartment block,250000,20,2400,0,0,1,0,0,0,1667,2.0,0,good,,Lambermont,5.191001,49.70519
19361,6200,house,mixed use building,399000,0,1525,0,0,0,0,0,0,790,2.0,0,good,,Bouffioulx,4.515196,50.390213
21983,2880,house,manor house,750000,3,3560,1,1,1,24,1,3560,3560,4.0,0,good,1979.0,Bornem,4.242326,51.092465
29305,3000,apartment,kot,250000,1,31700,1,0,1,0,0,0,0,,0,as new,,Leuven,4.69299,50.881253
33077,7140,house,mixed use building,360000,0,1900,0,0,1,0,0,0,0,3.0,0,,,Morlanwelz,4.252356,50.450984
33078,7140,house,apartment block,360000,0,1900,0,0,1,0,0,0,0,3.0,0,,,Morlanwelz,4.252356,50.450984
39129,6698,house,country cottage,399000,0,2562,0,0,0,0,0,0,100144,4.0,0,,,Grand-Halleux,5.907252,50.326275


In [42]:
# Drop too small and too big house_area, taking the price in consideration
immo = immo[(immo['house_area'] > 10) & 
            ((immo['house_area'] < 1000) | ((immo['house_area'] > 1000) & (immo['price'] > 390000)))]

In [43]:
immo.shape

(42254, 20)

###### Surface of the Land

In [44]:
immo.surface_of_the_land.describe()

count     42254.000000
mean        531.321887
std        3564.685553
min           0.000000
25%           0.000000
50%           0.000000
75%         400.000000
max      400000.000000
Name: surface_of_the_land, dtype: float64

In [45]:
immo[immo['surface_of_the_land'] == 0]

Unnamed: 0,locality,type_of_property,subtype_of_property,price,number_of_rooms,house_area,fully_equipped_kitchen,open_fire,terrace,terrace_area,garden,garden_area,surface_of_the_land,number_of_facades,swimming_pool,state_of_the_building,construction_year,city_name,lattitude,longitude
5,1050,house,house,590000,4,225,0,0,1,0,0,0,0,2,0,to renovate,,Ixelles,4.381571,50.822285
6,1050,house,house,575000,4,209,1,0,0,0,0,0,0,2,0,,,Ixelles,4.381571,50.822285
16,1050,house,house,685000,6,280,1,0,0,0,0,0,0,2,0,,1956,Ixelles,4.381571,50.822285
20,1050,house,apartment block,750000,0,307,0,0,0,0,0,0,0,2,0,as new,,Ixelles,4.381571,50.822285
25,1050,house,house,795000,4,240,1,0,1,8,1,59,0,2,0,good,1907,Ixelles,4.381571,50.822285
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
42269,5022,house,house,240000,2,230,0,0,0,0,1,0,0,3,0,as new,1885,Cognelée,4.912729,50.517188
42275,6221,house,house,210000,2,131,1,0,1,0,0,0,0,3,0,good,,Saint-Amand,4.531924,50.511168
42306,6723,house,house,385000,3,201,0,0,1,25,1,111,0,2,0,as new,2020,Habay-La-Vieille,5.622511,49.722842
42355,4342,house,house,425000,3,315,1,0,1,124,1,250,0,3,0,,2002,Hognoul,5.455639,50.680810


In [46]:
immo[(immo['surface_of_the_land']<2) & (immo['garden_area']>2)]

Unnamed: 0,locality,type_of_property,subtype_of_property,price,number_of_rooms,house_area,fully_equipped_kitchen,open_fire,terrace,terrace_area,garden,garden_area,surface_of_the_land,number_of_facades,swimming_pool,state_of_the_building,construction_year,city_name,lattitude,longitude
25,1050,house,house,795000,4,240,1,0,1,8,1,59,0,2,0,good,1907,Ixelles,4.381571,50.822285
69,1050,apartment,flat studio,220000,0,49,1,0,0,0,1,23,0,2,0,as new,2019,Ixelles,4.381571,50.822285
74,1050,apartment,flat studio,220000,0,49,1,0,0,0,1,35,0,,0,as new,2019,Ixelles,4.381571,50.822285
149,1050,apartment,ground floor,395000,2,91,1,0,1,15,1,20,0,,0,good,1979,Ixelles,4.381571,50.822285
185,1050,apartment,ground floor,440000,1,85,0,0,1,0,1,70,0,,0,good,,Ixelles,4.381571,50.822285
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
41792,6020,house,house,115000,3,160,0,0,0,0,1,190,0,3,0,,,Dampremy,4.432246,50.418618
41883,6762,house,house,110000,4,120,0,0,1,15,1,200,0,,0,to renovate,1950,Saint-Mard,5.528451,49.557756
41943,4560,house,farmhouse,199000,4,195,1,0,0,0,1,500,0,3,0,to be done up,1899,Clavier,5.357569,50.411509
42306,6723,house,house,385000,3,201,0,0,1,25,1,111,0,2,0,as new,2020,Habay-La-Vieille,5.622511,49.722842


In [48]:
immo['total_area'] = immo['surface_of_the_land'].apply(set_surface)

TypeError: 'int' object is not subscriptable

###### State of the building

###### Kitchen

###### Open Fire

###### Terrace

###### Terrace Area

###### Garden

###### Garden Area

###### Number of facades

###### Swimming pool

###### Construction year