In [1]:
# import modules and libraries
import pandas as pd
import numpy as np
from glob import glob
import matplotlib.pyplot as plt
import seaborn as sns
%pylab inline

Populating the interactive namespace from numpy and matplotlib


In [2]:
raw_data = sorted(glob('data_*.csv'))
raw_data

['data_flats.csv', 'data_house.csv']

In [3]:
df = pd.concat((pd.read_csv(data).assign(filename = data)
          for data in raw_data), ignore_index = True)

df.to_csv('all_data.csv', index = False)

In [4]:
data = pd.read_csv('all_data.csv')
data.head()

Unnamed: 0,property_type,location,price,features,filename
0,Sharp Two Bedroom Apartment,"Lbs Lekkki Ajah Lagos, Ajah, Lagos","1,200,000 per annum",2 Bedrooms3 Bathrooms3 Toilets12 Parking Space...,data_flats.csv
1,Two Bedroom Bungalow At Cooperative Villa Esta...,"Badore, Ajah, Lagos","1,000,000 per annum",2 Bedrooms2 Toilets Save,data_flats.csv
2,2 Bedroom Guest Charlet,"Cooperative Villa, Badore, Ajah, Lagos","1,000,000 per annum",2 Bedrooms2 Bathrooms3 Toilets Save,data_flats.csv
3,A Fairly Used & Spacious 2 Bedroom Flat,"Atunrase Estate, Gbagada, Lagos","1,100,000 per annum",2 Bedrooms2 Bathrooms3 Toilets1 Parking Space ...,data_flats.csv
4,Affordable 1 Bedroom Apartment,"Oniru, Victoria Island (VI), Lagos","1,800,000 per annum",1 Bedroom1 Bathroom2 Toilets Save,data_flats.csv


In [5]:
data.shape

(2320, 5)

In [6]:
data['address'] = data['location'].apply(lambda x: x.split(',')[0])

In [7]:
data['location'] = data['location'].apply(lambda x: x.split(',')[1])

In [8]:
data['price'] = data['price'].str.replace(',', '')

In [9]:
data['price'] = data['price'].apply(lambda x: x.split()[0])
data['price'] = data['price'].str.replace('all', '0')
data['price'].astype(int)

0       1200000
1       1000000
2       1000000
3       1100000
4       1800000
         ...   
2315    3000000
2316    5000000
2317    3500000
2318    5000000
2319    8000000
Name: price, Length: 2320, dtype: int32

In [10]:
data['bedroom'] = data['features'].apply(lambda x: x.split()[0])

In [11]:
data.bedroom

0       2
1       2
2       2
3       2
4       1
       ..
2315    4
2316    5
2317    4
2318    4
2319    4
Name: bedroom, Length: 2320, dtype: object

In [12]:
bath = data['features'].apply(lambda x: x.replace('Bedrooms','').replace('Bedroom',''))

In [13]:
bath

0        2 3 Bathrooms3 Toilets12 Parking Spaces Save
1                                    2 2 Toilets Save
2                         2 2 Bathrooms3 Toilets Save
3          2 2 Bathrooms3 Toilets1 Parking Space Save
4                          1 1 Bathroom2 Toilets Save
                            ...                      
2315      4 4 Bathrooms5 Toilets3 Parking Spaces Save
2316      5 5 Bathrooms6 Toilets6 Parking Spaces Save
2317                      4 4 Bathrooms5 Toilets Save
2318    4 4 Bathrooms5 Toilets800 Parking Spaces Save
2319                      4 4 Bathrooms5 Toilets Save
Name: features, Length: 2320, dtype: object

In [14]:
data['bathroom'] = bath.apply(lambda x: x.strip()[2])

In [15]:
toilets = bath.apply(lambda x: x.replace('Bathrooms','').replace('Bathroom','').replace('Toilets','').replace('Toilet',''))

In [16]:
toilets

0        2 3 3 12 Parking Spaces Save
1                           2 2  Save
2                         2 2 3  Save
3          2 2 3 1 Parking Space Save
4                         1 1 2  Save
                    ...              
2315      4 4 5 3 Parking Spaces Save
2316      5 5 6 6 Parking Spaces Save
2317                      4 4 5  Save
2318    4 4 5 800 Parking Spaces Save
2319                      4 4 5  Save
Name: features, Length: 2320, dtype: object

In [17]:
data['toilet'] = toilets.apply(lambda x: x.strip()[3:5])

In [18]:
data['parking_lot'] = toilets.apply(lambda x: x.strip()[5:8])

In [19]:
data.head()

Unnamed: 0,property_type,location,price,features,filename,address,bedroom,bathroom,toilet,parking_lot
0,Sharp Two Bedroom Apartment,Ajah,1200000,2 Bedrooms3 Bathrooms3 Toilets12 Parking Space...,data_flats.csv,Lbs Lekkki Ajah Lagos,2,3,3.0,12
1,Two Bedroom Bungalow At Cooperative Villa Esta...,Ajah,1000000,2 Bedrooms2 Toilets Save,data_flats.csv,Badore,2,2,,Sav
2,2 Bedroom Guest Charlet,Badore,1000000,2 Bedrooms2 Bathrooms3 Toilets Save,data_flats.csv,Cooperative Villa,2,2,3.0,S
3,A Fairly Used & Spacious 2 Bedroom Flat,Gbagada,1100000,2 Bedrooms2 Bathrooms3 Toilets1 Parking Space ...,data_flats.csv,Atunrase Estate,2,2,3.0,1
4,Affordable 1 Bedroom Apartment,Victoria Island (VI),1800000,1 Bedroom1 Bathroom2 Toilets Save,data_flats.csv,Oniru,1,1,2.0,S


In [21]:
data['bq'] = data['property_type'].apply(lambda x: 1 if 'Bq' in x else 0)

In [25]:
data['parking_lot'].unique()

array([' 12', 'Sav', '  S', ' 1 ', '0 P', ' Pa', ' 78', ' 2 ', 've',
       ' 3 ', ' 4 ', ' 8 ', ' 5 ', ' 1,', '3  ', '', ' 7 ', 'ark', '3 2',
       ' 10', ' 6 ', ' 50', 'Par', ' 15', ' 36', ' 24', ' 95', ' 20',
       ' 9 ', ' 64', ' 40', 'kin', '00 ', '50 ', ' 80', ' 23', '3 P',
       ' 30', '2 P', ' 65', ' 16', ' 11', 'qm ', ' 14', ' 70', ' 3,',
       '5 P', '1 1', ' 18', ' 25', '20 ', ' 45'], dtype=object)

In [30]:
data['parking_lot'] = data['parking_lot'].apply(lambda x: x.replace('0av,','0').replace('S','0').replace('ve','0'))

In [31]:
data.head(10)

Unnamed: 0,property_type,location,price,features,filename,address,bedroom,bathroom,toilet,parking_lot,bq
0,Sharp Two Bedroom Apartment,Ajah,1200000,2 Bedrooms3 Bathrooms3 Toilets12 Parking Space...,data_flats.csv,Lbs Lekkki Ajah Lagos,2,3,3.0,12,0
1,Two Bedroom Bungalow At Cooperative Villa Esta...,Ajah,1000000,2 Bedrooms2 Toilets Save,data_flats.csv,Badore,2,2,,0av,0
2,2 Bedroom Guest Charlet,Badore,1000000,2 Bedrooms2 Bathrooms3 Toilets Save,data_flats.csv,Cooperative Villa,2,2,3.0,0,0
3,A Fairly Used & Spacious 2 Bedroom Flat,Gbagada,1100000,2 Bedrooms2 Bathrooms3 Toilets1 Parking Space ...,data_flats.csv,Atunrase Estate,2,2,3.0,1,0
4,Affordable 1 Bedroom Apartment,Victoria Island (VI),1800000,1 Bedroom1 Bathroom2 Toilets Save,data_flats.csv,Oniru,1,1,2.0,0,0
5,Affordable 2 Bedroom Apartment,Ikoyi,6000000,2 Bedrooms2 Bathrooms3 Toilets Save,data_flats.csv,Old Ikoyi,2,2,3.0,0,0
6,A Lovely & Nice Very Good Mini Flat With Car Park,Akoka,700000,1 Bedroom1 Bathroom2 Toilets1 Parking Space Save,data_flats.csv,Off Community Road,1,1,2.0,1,0
7,Cozy Mini Flat,Victoria Island (VI),2250000,1 Bedroom1 Bathroom1 Toilet Save,data_flats.csv,Oniru,1,1,1.0,0,0
8,A Room Self Contained,Agungi Extention,350000,1 Bathroom2 Toilets10 Parking Spaces600 sqm To...,data_flats.csv,Osapa,1,B,1.0,0 P,0
9,A Room Self Contained.,Lekki Expressway,450000,1 Bathroom1 Toilet6 Parking Spaces500 sqm Tota...,data_flats.csv,Agungi,1,B,6.0,Pa,0


In [32]:
data.to_csv('data.csv', index = False)