In [1]:
# Importing the libraries
import pandas as pd
import re

In [2]:
# Reading in the dataset
data = pd.read_csv('../data/full_dataset.csv')

In [3]:
# Previewing the dataset
data.head()

Unnamed: 0,Page URL,Title,Location,Bedrooms,Baths,Toilets,Price,Date Added,Contact,More Details
0,https://www.propertypro.ng/property-for-sale?s...,,Parkview Estate Ikoyi Lagos,4 beds,4 baths,5 Toilets,$,"Updated 04 Aug 2023, Added 06 Mar 2023",8124269637,"Updated 04 Aug 2023, Added 06 Mar 2023 \n\n\n\..."
1,https://www.propertypro.ng/property-for-sale?s...,4 BEDROOM HOUSE FOR SALE,Chevron Lekki Lagos,4 beds,4 baths,5 Toilets,₦,"Updated 04 Aug 2023, Added 12 Jul 2023",8037557097,"Updated 04 Aug 2023, Added 12 Jul 2023 \n\n\n\..."
2,https://www.propertypro.ng/property-for-sale?s...,,Ajah Lagos,3 beds,3 baths,4 Toilets,₦,"Updated 04 Aug 2023, Added 12 Jul 2023",9062000899,"Updated 04 Aug 2023, Added 12 Jul 2023 \n\n\n\..."
3,https://www.propertypro.ng/property-for-sale?s...,4 BEDROOM HOUSE FOR SALE,Monarch Estate Lugbe Abuja,4 beds,4 baths,6 Toilets,₦,"Updated 04 Aug 2023, Added 26 Jul 2023",8035811984,"Updated 04 Aug 2023, Added 26 Jul 2023 \n\n\n\..."
4,https://www.propertypro.ng/property-for-sale?s...,2 BEDROOM FLAT / APARTMENT FOR SALE,Off Freedom Way Lekki Lagos,2 beds,2 baths,3 Toilets,₦,"Updated 04 Aug 2023, Added 24 Mar 2023",8177766115,"Updated 04 Aug 2023, Added 24 Mar 2023 \n\n\n\..."


In [4]:
# Viewing the information about the dataset
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 59572 entries, 0 to 59571
Data columns (total 10 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   Page URL      59572 non-null  object
 1   Title         59454 non-null  object
 2   Location      59572 non-null  object
 3   Bedrooms      59572 non-null  object
 4   Baths         59572 non-null  object
 5   Toilets       59572 non-null  object
 6   Price         59572 non-null  object
 7   Date Added    59572 non-null  object
 8   Contact       59488 non-null  object
 9   More Details  59572 non-null  object
dtypes: object(10)
memory usage: 4.5+ MB


## Data Cleaning

In [5]:
# Creating the property type column
def property_type(row):
    if "land" in row:
        value = "Land"
    elif "sale" in row:
        value = "For sale"
    elif "rent" in row:
        value = "For rent"
    elif "short-let" in row:
        value = 'Short Let'
    return value

# Creating the property type column from the data url column
data["Property Type"] = data['Page URL'].apply(property_type)

In [6]:
# Cleaning the bedrooms, baths and toilets
data['Bedrooms'] = data['Bedrooms'].str.strip('beds').str.strip().replace('', 0).astype(int)
data['Baths'] = data['Baths'].str.strip('baths').str.strip().replace('', 0).astype(int)
data['Toilets'] = data['Toilets'].str.strip('Toilets').str.strip().replace('', 0).astype(int)

In [7]:
# Getting the state column
data['State'] = data['Location'].apply(lambda x: x.split()[-1].strip() if x.split()[-1].strip() != 'River' else " ".join(x.split()[-2:]))

In [8]:
# Extracting the price from the more details column
list_of_price = list(map(lambda x: re.findall(f"\{x[0]}[\d,\s]*", x[1]), data[['Price', "More Details"]].values))

In [9]:
def extract_correct_price(chain):
    "This takes the list_of_price and returns the best possible value from rows that have more than one value"
    if len(chain) > 1:
        length = list(map(len, chain))

        value = chain[length.index(max(length))]
    elif len(chain) == 1:
        value = chain[0]
    else:
        value = ""
    
    return value.replace(',', "").strip()

# Extracted price from more details column
extracted_price = list(map(extract_correct_price, list_of_price))

In [10]:
# Creating the ext price column
data["ext price"] = extracted_price

In [11]:
# Previewing the changes
data.head()

Unnamed: 0,Page URL,Title,Location,Bedrooms,Baths,Toilets,Price,Date Added,Contact,More Details,Property Type,State,ext price
0,https://www.propertypro.ng/property-for-sale?s...,,Parkview Estate Ikoyi Lagos,4,4,5,$,"Updated 04 Aug 2023, Added 06 Mar 2023",8124269637,"Updated 04 Aug 2023, Added 06 Mar 2023 \n\n\n\...",For sale,Lagos,$500000
1,https://www.propertypro.ng/property-for-sale?s...,4 BEDROOM HOUSE FOR SALE,Chevron Lekki Lagos,4,4,5,₦,"Updated 04 Aug 2023, Added 12 Jul 2023",8037557097,"Updated 04 Aug 2023, Added 12 Jul 2023 \n\n\n\...",For sale,Lagos,₦140000000
2,https://www.propertypro.ng/property-for-sale?s...,,Ajah Lagos,3,3,4,₦,"Updated 04 Aug 2023, Added 12 Jul 2023",9062000899,"Updated 04 Aug 2023, Added 12 Jul 2023 \n\n\n\...",For sale,Lagos,₦75000000
3,https://www.propertypro.ng/property-for-sale?s...,4 BEDROOM HOUSE FOR SALE,Monarch Estate Lugbe Abuja,4,4,6,₦,"Updated 04 Aug 2023, Added 26 Jul 2023",8035811984,"Updated 04 Aug 2023, Added 26 Jul 2023 \n\n\n\...",For sale,Abuja,₦69000000
4,https://www.propertypro.ng/property-for-sale?s...,2 BEDROOM FLAT / APARTMENT FOR SALE,Off Freedom Way Lekki Lagos,2,2,3,₦,"Updated 04 Aug 2023, Added 24 Mar 2023",8177766115,"Updated 04 Aug 2023, Added 24 Mar 2023 \n\n\n\...",For sale,Lagos,₦80000000
