Intial Setup

In [192]:
# Import libraries
import pandas as pd 
import numpy as np

import matplotlib.pyplot as plt
%pip% install seaborn

from scipy.stats import zscore

In [193]:
# Import data 
listing_data = pd.read_csv('listings.csv')

In [194]:
#Intial Look at data 
listing_data.head()

Unnamed: 0,id,listing_url,scrape_id,last_scraped,source,name,description,neighborhood_overview,picture_url,host_id,...,review_scores_communication,review_scores_location,review_scores_value,license,instant_bookable,calculated_host_listings_count,calculated_host_listings_count_entire_homes,calculated_host_listings_count_private_rooms,calculated_host_listings_count_shared_rooms,reviews_per_month
0,157612,https://www.airbnb.com/rooms/157612,20250318145053,2025-03-18,city scrape,New attic space/single & Dble room,"The loft space is a small but cosy, private an...",There is a public park within easy walking dis...,https://a0.muscache.com/pictures/18150718/745a...,757016,...,4.94,4.68,4.89,,f,1,1,0,0,1.02
1,283495,https://www.airbnb.com/rooms/283495,20250318145053,2025-03-18,city scrape,En-suite room in detached house,,The suburbaness of it all but 2 minutes from t...,https://a0.muscache.com/pictures/78775473/2d8f...,1476718,...,5.0,4.8,5.0,,f,1,0,1,0,0.08
2,310742,https://www.airbnb.com/rooms/310742,20250318145053,2025-03-18,city scrape,Nice room 10 minutes walk from town,,,https://a0.muscache.com/pictures/3387158/a2f58...,1603652,...,4.86,4.81,4.65,,t,1,0,1,0,0.41
3,332580,https://www.airbnb.com/rooms/332580,20250318145053,2025-03-18,city scrape,**ELEGANT STAY** CENTRAL MANCHESTER,"An Elegant Stay right into the CITY CENTRE, in...",You will be staying in the trendy Northern Qua...,https://a0.muscache.com/pictures/52792580-e354...,1694961,...,4.94,4.87,4.82,,f,4,3,1,0,2.25
4,360142,https://www.airbnb.com/rooms/360142,20250318145053,2025-03-18,city scrape,Light double room with own bathroom,Lovely bright room at rear of house. Double be...,It's a proper cosmopolitan inner city neighbou...,https://a0.muscache.com/pictures/13631809/c147...,1821587,...,4.87,4.52,4.73,,f,2,0,2,0,0.31


Cleaning the Data

In [195]:
# Delete useless columns 
columns_to_keep = [
    'name',
    'description',
    'picture_url',
    'host_since',
    'host_response_time',
    'host_acceptance_rate',
    'host_is_superhost',
    'host_total_listings_count',
    'host_identity_verified',
    'neighbourhood_cleansed',
    'neighbourhood_group_cleansed',
    'latitude',
    'longitude',
    'property_type',
    'room_type',
    'accommodates',
    'bathrooms', 
    'bedrooms',
    'amenities',
    'price'
    # Add more column names here as needed
]

listing_data = listing_data[columns_to_keep]

In [196]:
# Calculate the number of missing values for each column
missing_values_count = listing_data.isnull().sum()
total_rows = len(listing_data)

missing_values_percentage = (missing_values_count / total_rows) * 100
columns_with_missing_percentage = missing_values_percentage[missing_values_percentage > 0]
columns_with_missing_percentage = columns_with_missing_percentage.sort_values(ascending=False)

print("Columns with missing data and their percentage:")
print(columns_with_missing_percentage.apply(lambda x: f"{x:.2f}%"))


Columns with missing data and their percentage:
host_response_time           12.98%
bathrooms                    12.00%
price                        11.97%
host_acceptance_rate          5.59%
bedrooms                      3.09%
host_is_superhost             2.35%
description                   2.32%
host_since                    0.03%
host_total_listings_count     0.03%
host_identity_verified        0.03%
dtype: object


In [197]:
# Delete rows where there is missing data
listing_data_cleaned = listing_data.dropna(subset=['host_response_time', 'bathrooms', 'price', 'host_acceptance_rate', 'bedrooms', 'host_is_superhost', 'description', 'host_since', 'host_total_listings_count', 'host_identity_verified'])

Cleaning the Price Column

In [198]:
# Cleaning Price Column
listing_data['price_cleaned'] = listing_data['price'].astype(str)
listing_data['price_cleaned'] = listing_data['price_cleaned'].str.replace(r'[^\d.]', '', regex=True)

listing_data['price_cleaned'] = listing_data['price_cleaned'].replace('', np.nan)
listing_data['price_cleaned'] = listing_data['price_cleaned'].replace('.', np.nan)

listing_data['price_cleaned'] = pd.to_numeric(listing_data['price_cleaned'], errors='coerce')

Name, Description

Picture

Host Since

In [199]:
listing_data['host_since'] = pd.to_datetime(listing_data['host_since'], errors='coerce')
listing_data.dropna(subset=['host_since'], inplace=True)

In [200]:
# Turn into number of days 
reference_date = pd.to_datetime('2025-06-04')
listing_data['host_days'] = (reference_date - listing_data['host_since']).dt.days
print (listing_data['host_days'])

0       5089
1       4929
2       4891
3       4870
4       4846
        ... 
7146    1798
7147      79
7148     534
7149     714
7150     714
Name: host_days, Length: 7149, dtype: int64


Host Response Time

In [201]:
listing_data['host_response_time'].unique()

array(['within a few hours', nan, 'within an hour', 'within a day',
       'a few days or more'], dtype=object)

In [202]:
# Encoding the variable
response_time_mapping = {
    'within a few minutes': 1,
    'within an hour': 2,
    'within a few hours': 3,
    'within a day': 4,
    np.nan: 0 
}

listing_data['host_response_time'] = listing_data['host_response_time'].map(response_time_mapping)

Host response rate and host acceptance rate

Host is a Superhost 

In [203]:
mapping = {'t': 1, 'f': 0}
listing_data['host_is_superhost'] = listing_data['host_is_superhost'].map(mapping)

In [205]:
listing_data

Unnamed: 0,name,description,picture_url,host_since,host_response_time,host_acceptance_rate,host_is_superhost,host_total_listings_count,host_identity_verified,neighbourhood_cleansed,...,longitude,property_type,room_type,accommodates,bathrooms,bedrooms,amenities,price,price_cleaned,host_days
0,New attic space/single & Dble room,"The loft space is a small but cosy, private an...",https://a0.muscache.com/pictures/18150718/745a...,2011-06-29,3.0,69%,1.0,3.0,t,Salford District,...,-2.262490,Entire loft,Entire home/apt,3,1.5,2.0,"[""Shampoo"", ""Iron"", ""Clothing storage: closet ...",$45.00,45.0,5089
1,En-suite room in detached house,,https://a0.muscache.com/pictures/78775473/2d8f...,2011-12-06,0.0,,0.0,1.0,f,Rochdale District,...,-2.218240,Private room in home,Private room,2,1.0,1.0,"[""Shampoo"", ""Iron"", ""Free parking on premises""...",$75.00,75.0,4929
2,Nice room 10 minutes walk from town,,https://a0.muscache.com/pictures/3387158/a2f58...,2012-01-13,3.0,74%,1.0,6.0,t,Ancoats and Clayton,...,-2.229190,Private room in rental unit,Private room,1,1.0,1.0,"[""Washer"", ""Kitchen"", ""Gym"", ""TV"", ""Elevator"",...",$38.00,38.0,4891
3,**ELEGANT STAY** CENTRAL MANCHESTER,"An Elegant Stay right into the CITY CENTRE, in...",https://a0.muscache.com/pictures/52792580-e354...,2012-02-03,2.0,90%,0.0,12.0,t,City Centre,...,-2.232849,Private room in condo,Private room,1,1.0,1.0,"[""Shampoo"", ""Iron"", ""Clothing storage: closet ...",$42.00,42.0,4870
4,Light double room with own bathroom,Lovely bright room at rear of house. Double be...,https://a0.muscache.com/pictures/13631809/c147...,2012-02-27,3.0,71%,0.0,3.0,t,Moss Side,...,-2.235420,Private room in townhouse,Private room,2,2.0,1.0,"[""Children\u2019s books and toys"", ""Iron"", ""Ca...",$35.00,35.0,4846
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7146,House Bolton,Relax together in a peaceful place to stay.,https://a0.muscache.com/pictures/hosting/Hosti...,2020-07-02,2.0,,0.0,3.0,t,Bolton District,...,-2.456652,Entire home,Entire home/apt,2,1.0,1.0,"[""Washer"", ""Kitchen"", ""Free parking on premise...",$85.00,85.0,1798
7147,Kozzy 2 Bedroom Flat,"Kozzy 2-Bedroom Flat Near Piccadilly Station, ...",https://a0.muscache.com/pictures/hosting/Hosti...,2025-03-17,0.0,,0.0,1.0,f,City Centre,...,-2.232804,Entire rental unit,Entire home/apt,5,2.0,2.0,"[""Shampoo"", ""Iron"", ""Carbon monoxide alarm"", ""...",$166.00,166.0,79
7148,10%OFF|Month Deal|Contractor|Parking|WiFi|Sleep6,🏳 𝐇𝐨𝐬𝐭 𝐀 𝐇𝐨𝐦𝐞 Short Lets & Serviced Accommodat...,https://a0.muscache.com/pictures/hosting/Hosti...,2023-12-18,2.0,97%,0.0,7.0,t,Ancoats and Clayton,...,-2.180201,Entire home,Entire home/apt,6,1.0,3.0,"[""Washer"", ""First aid kit"", ""Kitchen"", ""TV"", ""...",$122.00,122.0,534
7149,Long Stays - Free Parking - Media City Apartment,"Located directly in Media City, Salford Quays,...",https://a0.muscache.com/pictures/hosting/Hosti...,2023-06-21,2.0,99%,0.0,75.0,t,Salford District,...,-2.281962,Entire rental unit,Entire home/apt,6,2.0,2.0,"[""Shampoo"", ""Iron"", ""Free parking on premises""...",$140.00,140.0,714
