### Importing pandas

In [9]:
import pandas as pd 

### Read the CSV file into a pandas DataFrame

In [10]:
data = pd.read_csv('listings.csv')

### Explore the dataset

In [11]:
print(data.head())  # Print the first few rows of the DataFrame
print(data.info())  # Get information about the DataFrame

       id                          listing_url       scrape_id last_scraped  \
0   90676   https://www.airbnb.com/rooms/90676  20230328184402   2023-03-28   
1  543140  https://www.airbnb.com/rooms/543140  20230328184402   2023-03-28   
2  591101  https://www.airbnb.com/rooms/591101  20230328184402   2023-03-28   
3  923248  https://www.airbnb.com/rooms/923248  20230328184402   2023-03-28   
4  927867  https://www.airbnb.com/rooms/927867  20230328184402   2023-03-28   

        source                                              name  \
0  city scrape                  Short North - Italianate Cottage   
1  city scrape                Private queen bedroom 1 - N.Campus   
2  city scrape                    Bellows Studio Loft  Apartment   
3  city scrape  1 Single Bed in a Shared Coed Dorm at the Hostel   
4  city scrape                   Full Private Room at the Hostel   

                                         description  \
0  Just steps from High Street and all the action...   
1  P

### Handle missing values

In [12]:
data.dropna(subset=['price'], inplace=True)  # Drop rows with missing price values

### Convert data types

In [13]:
data['price'] = data['price'].str.replace('$', '').str.replace(',', '').astype(float)  # Convert price to numeric

  data['price'] = data['price'].str.replace('$', '').str.replace(',', '').astype(float)  # Convert price to numeric


### Remove outliers

In [14]:
data = data[data['price'] <= 1000]  # Keep listings with price less than or equal to $1000


In [15]:
data.head()

Unnamed: 0,id,listing_url,scrape_id,last_scraped,source,name,description,neighborhood_overview,picture_url,host_id,...,review_scores_communication,review_scores_location,review_scores_value,license,instant_bookable,calculated_host_listings_count,calculated_host_listings_count_entire_homes,calculated_host_listings_count_private_rooms,calculated_host_listings_count_shared_rooms,reviews_per_month
0,90676,https://www.airbnb.com/rooms/90676,20230328184402,2023-03-28,city scrape,Short North - Italianate Cottage,Just steps from High Street and all the action...,The Short North Italianate Cottage is located ...,https://a0.muscache.com/pictures/950e43cd-53f3...,483306,...,4.88,4.93,4.78,2022-2475,f,3,3,0,0,4.64
1,543140,https://www.airbnb.com/rooms/543140,20230328184402,2023-03-28,city scrape,Private queen bedroom 1 - N.Campus,"Private, second-floor queen bed with desk, ple...",We are close to a lot of things!,https://a0.muscache.com/pictures/e720cdf0-e36b...,2350409,...,4.89,4.77,4.8,2019-1344,f,3,0,3,0,0.96
2,591101,https://www.airbnb.com/rooms/591101,20230328184402,2023-03-28,city scrape,Bellows Studio Loft Apartment,Famous American artist George Bellows home wit...,A historic neighborhood of beautiful victorian...,https://a0.muscache.com/pictures/083ed5ee-7b8d...,2889677,...,4.91,4.89,4.88,2019-1230,f,1,0,1,0,2.16
3,923248,https://www.airbnb.com/rooms/923248,20230328184402,2023-03-28,city scrape,1 Single Bed in a Shared Coed Dorm at the Hostel,This is a shared dormitory room of up to 5 peo...,We are located in the vibrant University Distr...,https://a0.muscache.com/pictures/29aabf51-4e6f...,4965048,...,4.89,4.69,4.85,2019-1314,f,8,2,4,2,2.49
4,927867,https://www.airbnb.com/rooms/927867,20230328184402,2023-03-28,city scrape,Full Private Room at the Hostel,The Wayfaring Buckeye Hostel is a social place...,We are located in the vibrant University Distr...,https://a0.muscache.com/pictures/08033ebe-286c...,4965048,...,4.9,4.72,4.7,2019-1314,f,8,2,4,2,0.59


### Standardize columns

In [16]:
data['host_response_rate'] = data['host_response_rate'].str.rstrip('%').astype(float) / 100  # Convert response rate to decimal


In [17]:
data.head()

Unnamed: 0,id,listing_url,scrape_id,last_scraped,source,name,description,neighborhood_overview,picture_url,host_id,...,review_scores_communication,review_scores_location,review_scores_value,license,instant_bookable,calculated_host_listings_count,calculated_host_listings_count_entire_homes,calculated_host_listings_count_private_rooms,calculated_host_listings_count_shared_rooms,reviews_per_month
0,90676,https://www.airbnb.com/rooms/90676,20230328184402,2023-03-28,city scrape,Short North - Italianate Cottage,Just steps from High Street and all the action...,The Short North Italianate Cottage is located ...,https://a0.muscache.com/pictures/950e43cd-53f3...,483306,...,4.88,4.93,4.78,2022-2475,f,3,3,0,0,4.64
1,543140,https://www.airbnb.com/rooms/543140,20230328184402,2023-03-28,city scrape,Private queen bedroom 1 - N.Campus,"Private, second-floor queen bed with desk, ple...",We are close to a lot of things!,https://a0.muscache.com/pictures/e720cdf0-e36b...,2350409,...,4.89,4.77,4.8,2019-1344,f,3,0,3,0,0.96
2,591101,https://www.airbnb.com/rooms/591101,20230328184402,2023-03-28,city scrape,Bellows Studio Loft Apartment,Famous American artist George Bellows home wit...,A historic neighborhood of beautiful victorian...,https://a0.muscache.com/pictures/083ed5ee-7b8d...,2889677,...,4.91,4.89,4.88,2019-1230,f,1,0,1,0,2.16
3,923248,https://www.airbnb.com/rooms/923248,20230328184402,2023-03-28,city scrape,1 Single Bed in a Shared Coed Dorm at the Hostel,This is a shared dormitory room of up to 5 peo...,We are located in the vibrant University Distr...,https://a0.muscache.com/pictures/29aabf51-4e6f...,4965048,...,4.89,4.69,4.85,2019-1314,f,8,2,4,2,2.49
4,927867,https://www.airbnb.com/rooms/927867,20230328184402,2023-03-28,city scrape,Full Private Room at the Hostel,The Wayfaring Buckeye Hostel is a social place...,We are located in the vibrant University Distr...,https://a0.muscache.com/pictures/08033ebe-286c...,4965048,...,4.9,4.72,4.7,2019-1314,f,8,2,4,2,0.59


### Clean text data

In [18]:
data['description'] = data['description'].str.lower()  # Convert description to lowercase


In [19]:
data.head()

Unnamed: 0,id,listing_url,scrape_id,last_scraped,source,name,description,neighborhood_overview,picture_url,host_id,...,review_scores_communication,review_scores_location,review_scores_value,license,instant_bookable,calculated_host_listings_count,calculated_host_listings_count_entire_homes,calculated_host_listings_count_private_rooms,calculated_host_listings_count_shared_rooms,reviews_per_month
0,90676,https://www.airbnb.com/rooms/90676,20230328184402,2023-03-28,city scrape,Short North - Italianate Cottage,just steps from high street and all the action...,The Short North Italianate Cottage is located ...,https://a0.muscache.com/pictures/950e43cd-53f3...,483306,...,4.88,4.93,4.78,2022-2475,f,3,3,0,0,4.64
1,543140,https://www.airbnb.com/rooms/543140,20230328184402,2023-03-28,city scrape,Private queen bedroom 1 - N.Campus,"private, second-floor queen bed with desk, ple...",We are close to a lot of things!,https://a0.muscache.com/pictures/e720cdf0-e36b...,2350409,...,4.89,4.77,4.8,2019-1344,f,3,0,3,0,0.96
2,591101,https://www.airbnb.com/rooms/591101,20230328184402,2023-03-28,city scrape,Bellows Studio Loft Apartment,famous american artist george bellows home wit...,A historic neighborhood of beautiful victorian...,https://a0.muscache.com/pictures/083ed5ee-7b8d...,2889677,...,4.91,4.89,4.88,2019-1230,f,1,0,1,0,2.16
3,923248,https://www.airbnb.com/rooms/923248,20230328184402,2023-03-28,city scrape,1 Single Bed in a Shared Coed Dorm at the Hostel,this is a shared dormitory room of up to 5 peo...,We are located in the vibrant University Distr...,https://a0.muscache.com/pictures/29aabf51-4e6f...,4965048,...,4.89,4.69,4.85,2019-1314,f,8,2,4,2,2.49
4,927867,https://www.airbnb.com/rooms/927867,20230328184402,2023-03-28,city scrape,Full Private Room at the Hostel,the wayfaring buckeye hostel is a social place...,We are located in the vibrant University Distr...,https://a0.muscache.com/pictures/08033ebe-286c...,4965048,...,4.9,4.72,4.7,2019-1314,f,8,2,4,2,0.59


In [21]:
data.columns

Index(['id', 'listing_url', 'scrape_id', 'last_scraped', 'source', 'name',
       'description', 'neighborhood_overview', 'picture_url', 'host_id',
       'host_url', 'host_name', 'host_since', 'host_location', 'host_about',
       'host_response_time', 'host_response_rate', 'host_acceptance_rate',
       'host_is_superhost', 'host_thumbnail_url', 'host_picture_url',
       'host_neighbourhood', 'host_listings_count',
       'host_total_listings_count', 'host_verifications',
       'host_has_profile_pic', 'host_identity_verified', 'neighbourhood',
       'neighbourhood_cleansed', 'neighbourhood_group_cleansed', 'latitude',
       'longitude', 'property_type', 'room_type', 'accommodates', 'bathrooms',
       'bathrooms_text', 'bedrooms', 'beds', 'amenities', 'price',
       'minimum_nights', 'maximum_nights', 'minimum_minimum_nights',
       'maximum_minimum_nights', 'minimum_maximum_nights',
       'maximum_maximum_nights', 'minimum_nights_avg_ntm',
       'maximum_nights_avg_ntm', 'ca

In [23]:
data.describe()

Unnamed: 0,id,scrape_id,host_id,host_response_rate,host_listings_count,host_total_listings_count,neighbourhood_group_cleansed,latitude,longitude,accommodates,...,review_scores_cleanliness,review_scores_checkin,review_scores_communication,review_scores_location,review_scores_value,calculated_host_listings_count,calculated_host_listings_count_entire_homes,calculated_host_listings_count_private_rooms,calculated_host_listings_count_shared_rooms,reviews_per_month
count,2341.0,2341.0,2341.0,2216.0,2341.0,2341.0,0.0,2341.0,2341.0,2341.0,...,2050.0,2050.0,2050.0,2050.0,2050.0,2341.0,2341.0,2341.0,2341.0,2050.0
mean,3.759875e+17,20230330000000.0,203410300.0,0.986625,74.044425,103.041008,,39.983172,-82.990244,5.028193,...,4.777185,4.883571,4.886312,4.737937,4.736815,15.098249,13.387868,1.639043,0.011534,2.53082
std,3.638235e+17,1.238546,157768600.0,0.069749,394.902523,549.523935,,0.040432,0.039825,3.205107,...,0.347013,0.23387,0.249507,0.362511,0.34644,21.824332,21.60846,5.742256,0.135067,2.030628
min,90676.0,20230330000000.0,78761.0,0.0,1.0,1.0,,39.86121,-83.1468,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.02
25%,44316770.0,20230330000000.0,59263230.0,1.0,2.0,3.0,,39.95622,-83.00773,2.0,...,4.71,4.87,4.87,4.67,4.67,2.0,1.0,0.0,0.0,1.0
50%,5.644468e+17,20230330000000.0,158439600.0,1.0,7.0,8.0,,39.97885,-82.99747,4.0,...,4.89,4.96,4.97,4.86,4.83,5.0,3.0,0.0,0.0,2.05
75%,7.268476e+17,20230330000000.0,325933000.0,1.0,25.0,30.0,,39.99666,-82.974822,6.0,...,5.0,5.0,5.0,4.97,4.93,21.0,19.0,0.0,0.0,3.57
max,8.567594e+17,20230330000000.0,506095200.0,1.0,3303.0,4535.0,,40.152417,-82.77934,16.0,...,5.0,5.0,5.0,5.0,5.0,97.0,97.0,36.0,2.0,12.88


### Save the cleaned data to a new CSV file

In [24]:
data.to_csv('cleaned_airbnb_data.csv', index=False)
