# ADS Capstone Project: Airbnb Pricing Prediction
#### By Renetta Nelson, Michael Nguyen and Jacqueline Urenda

# Data Preparation

***Note: Will be adding into the proper loading of csv files once we're able to figure out how to add large files into Github***

In [1]:
#importing the libraries

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings('ignore')

from sklearn.preprocessing import LabelEncoder

## Preparing the calendar dataset:

In [2]:
calendar = pd.read_csv("/Users/jackieurenda/Desktop/Capstone Datasets/calendar-9-18-22.csv")
calendar.head(5)

Unnamed: 0,listing_id,date,available,price,adjusted_price,minimum_nights,maximum_nights
0,29967,2022-09-18,f,$225.00,$225.00,4.0,365.0
1,29967,2022-09-19,f,$225.00,$225.00,4.0,365.0
2,29967,2022-09-20,f,$225.00,$225.00,4.0,365.0
3,29967,2022-09-21,f,$225.00,$225.00,4.0,365.0
4,29967,2022-09-22,f,$225.00,$225.00,4.0,365.0


In [3]:
calendar.dtypes

listing_id          int64
date               object
available          object
price              object
adjusted_price     object
minimum_nights    float64
maximum_nights    float64
dtype: object

Converting the data types into the correct format:

In [4]:
calendar['price'] = calendar['price'].str.replace('$', '').str.replace(',','')
calendar['price'] = pd.to_numeric(calendar['price'])

calendar['adjusted_price'] = calendar['adjusted_price'].str.replace('$', '').str.replace(',','')
calendar['adjusted_price'] = pd.to_numeric(calendar['adjusted_price'])

In [5]:
#Converting the date column into the correct data formart

calendar['date'] = pd.to_datetime(calendar['date'])

In [6]:

calendar.describe()

Unnamed: 0,listing_id,price,adjusted_price,minimum_nights,maximum_nights
count,5173672.0,5170977.0,5170977.0,5167851.0,5167851.0
mean,1.598379e+17,396.3504,394.7934,7.522396,304089.2
std,2.767907e+17,1185.625,1183.286,20.55726,25521460.0
min,29967.0,0.0,0.0,0.0,0.0
25%,25013540.0,125.0,125.0,2.0,365.0
50%,45409520.0,211.0,211.0,3.0,1125.0
75%,5.223218e+17,399.0,399.0,5.0,1125.0
max,7.181068e+17,100000.0,100000.0,999.0,2147484000.0


"price" and "adjusted_price" features are the same based on the descriptive statistics. One of these columns can be dropped.

In [7]:
#deleting "adjusted_price" column from the dataset:

del calendar['adjusted_price']
calendar.head(3)

Unnamed: 0,listing_id,date,available,price,minimum_nights,maximum_nights
0,29967,2022-09-18,f,225.0,4.0,365.0
1,29967,2022-09-19,f,225.0,4.0,365.0
2,29967,2022-09-20,f,225.0,4.0,365.0


Resolving null values in the dataset:

In [8]:
calendar.isnull().sum()

listing_id           0
date                 0
available            0
price             2695
minimum_nights    5821
maximum_nights    5821
dtype: int64

In [9]:
#imputing the pricing null values with the mean

calendar['price'] = calendar['price'].fillna(calendar['price'].mean())

In [10]:
#imputing min and max nights with the mode
calendar['minimum_nights'] = calendar['minimum_nights'].fillna(calendar['minimum_nights'].mode()[0])

calendar['maximum_nights'] = calendar['maximum_nights'].fillna(calendar['maximum_nights'].mode()[0])

In [11]:
calendar.isnull().sum()

listing_id        0
date              0
available         0
price             0
minimum_nights    0
maximum_nights    0
dtype: int64

 Converting categorical feature "available" to numeric

In [12]:
#Converting "available" feature to numeric

calendar['available'] = pd.factorize(calendar['available'])[0]

calendar.head(3)

Unnamed: 0,listing_id,date,available,price,minimum_nights,maximum_nights
0,29967,2022-09-18,0,225.0,4.0,365.0
1,29967,2022-09-19,0,225.0,4.0,365.0
2,29967,2022-09-20,0,225.0,4.0,365.0


"f" categorical variable converted numerically to 0. "t" categorical variable converted numerically to 1

## Preparing the San Diego Weather Dataset:

In [13]:
weather = pd.read_csv("/Users/jackieurenda/Desktop/Capstone Datasets/SD_Weather_Data.csv")

weather.head(3)

Unnamed: 0,Date,Precip,Max Temp,Min Temp
0,20220918,0,76.0,59.0
1,20220919,0,78.0,57.0
2,20220920,0,80.0,58.0


Converting the Date column into proper Date format:

In [14]:
#Converting the date column into the correct data formart


def convert_date(date_str):
    try:
        return pd.to_datetime(date_str, format='%Y%m%d')
    except ValueError:
        return pd.NaT
weather['Date'] = weather['Date'].apply(convert_date)

weather.head()


Unnamed: 0,Date,Precip,Max Temp,Min Temp
0,2022-09-18,0,76.0,59.0
1,2022-09-19,0,78.0,57.0
2,2022-09-20,0,80.0,58.0
3,2022-09-21,0,79.0,62.0
4,2022-09-22,0,81.0,56.0


Resolving the null values in the dataset:

In [15]:
weather.isnull().sum()

Date        27
Precip      26
Max Temp    27
Min Temp    27
dtype: int64

In [16]:
#Dropping Null Vaues in the Dataset

weather = weather.dropna()

In [17]:
weather.isnull().sum()

Date        0
Precip      0
Max Temp    0
Min Temp    0
dtype: int64

Converting the "Precip" feature to float:

In [18]:
# Convert decimal column to float

weather["Precip"] = weather["Precip"].astype(float)

weather.dtypes

Date        datetime64[ns]
Precip             float64
Max Temp           float64
Min Temp           float64
dtype: object

#### Feature Engineering:

Creating an average daily temperature feature:

In [19]:
weather['average temp'] = weather[['Max Temp', 'Min Temp']].mean(axis=1)

weather.head()

Unnamed: 0,Date,Precip,Max Temp,Min Temp,average temp
0,2022-09-18,0.0,76.0,59.0,67.5
1,2022-09-19,0.0,78.0,57.0,67.5
2,2022-09-20,0.0,80.0,58.0,69.0
3,2022-09-21,0.0,79.0,62.0,70.5
4,2022-09-22,0.0,81.0,56.0,68.5


In [20]:
#renaming the columbs to have same format as other datasets:

weather.rename(columns = {'Date':'date', 'Precip':'precip', 'Max Temp':'max_temp', 'Min Temp': 'min_temp'}, inplace = True)

In [21]:
weather.head()

Unnamed: 0,date,precip,max_temp,min_temp,average temp
0,2022-09-18,0.0,76.0,59.0,67.5
1,2022-09-19,0.0,78.0,57.0,67.5
2,2022-09-20,0.0,80.0,58.0,69.0
3,2022-09-21,0.0,79.0,62.0,70.5
4,2022-09-22,0.0,81.0,56.0,68.5


## Preparing the Listings Dataset:

In [22]:
listings = pd.read_csv("/Users/jackieurenda/Desktop/Capstone Datasets/listings-9-18-22.csv")

listings.head(3)

Unnamed: 0,id,listing_url,scrape_id,last_scraped,source,name,description,neighborhood_overview,picture_url,host_id,...,review_scores_communication,review_scores_location,review_scores_value,license,instant_bookable,calculated_host_listings_count,calculated_host_listings_count_entire_homes,calculated_host_listings_count_private_rooms,calculated_host_listings_count_shared_rooms,reviews_per_month
0,29967,https://www.airbnb.com/rooms/29967,20220918143850,2022-09-18,city scrape,"Great home, 10 min walk to Beach","This 2 bedroom, 2 full bathroom home offers al...",,https://a0.muscache.com/pictures/76103867/c9b5...,129123,...,4.91,4.8,4.76,,f,5,5,0,0,0.59
1,38245,https://www.airbnb.com/rooms/38245,20220918143850,2022-09-18,city scrape,Point Loma: Den downstairs,Stay in a 1928 Spanish Style home. Den is dow...,"This neighborhood is an expensive, high-end Po...",https://a0.muscache.com/pictures/f3073822-e493...,164137,...,4.79,4.61,4.4,,f,3,0,3,0,1.02
2,108349,https://www.airbnb.com/rooms/108349,20220918143850,2022-09-19,previous scrape,Felspar Townhome- Beach Living by Boardwalk & ...,Find location and privacy in our spacious town...,Pacific Beach is the most fun beach neighborho...,https://a0.muscache.com/pictures/a25c4e97-913d...,546519,...,4.81,4.89,4.69,,t,2,2,0,0,1.2


Dataset has a lot of features that needs to be reduced

In [23]:
listings.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 14188 entries, 0 to 14187
Data columns (total 75 columns):
 #   Column                                        Non-Null Count  Dtype  
---  ------                                        --------------  -----  
 0   id                                            14188 non-null  int64  
 1   listing_url                                   14188 non-null  object 
 2   scrape_id                                     14188 non-null  int64  
 3   last_scraped                                  14188 non-null  object 
 4   source                                        14188 non-null  object 
 5   name                                          14188 non-null  object 
 6   description                                   14060 non-null  object 
 7   neighborhood_overview                         9306 non-null   object 
 8   picture_url                                   14188 non-null  object 
 9   host_id                                       14188 non-null 

In [24]:
#dropping initial unneccesary features: 

drop_columns = ['listing_url', 'scrape_id','source', 'name', 'description', 
       'neighborhood_overview', 'picture_url', 'host_id', 'host_url', 'host_name', 'host_since', 'host_location',
         'host_about', 'host_about', 'host_response_time', 'host_response_rate', 'host_acceptance_rate', 
         'host_thumbnail_url','host_listings_count',
       'host_total_listings_count', 'host_verifications',
       'host_has_profile_pic', 'host_identity_verified', 'neighbourhood',
       'neighbourhood_cleansed', 'neighbourhood_group_cleansed',
       'latitude', 'longitude', 'minimum_nights', 'maximum_nights',
       'minimum_minimum_nights', 'maximum_minimum_nights',
       'minimum_maximum_nights', 'maximum_maximum_nights',
       'minimum_nights_avg_ntm', 'maximum_nights_avg_ntm',
       'calendar_updated', 'has_availability', 'availability_30',
       'availability_60', 'availability_90', 'availability_365',
       'calendar_last_scraped', 'last_scraped', 'host_picture_url', 'number_of_reviews_ltm', 'number_of_reviews_l30d', 'first_review',
       'last_review', 'license', 'calculated_host_listings_count',
       'calculated_host_listings_count_entire_homes',
       'calculated_host_listings_count_private_rooms',
       'calculated_host_listings_count_shared_rooms', 'reviews_per_month', 'bathrooms', 'amenities', 'price']

listings = listings.drop(columns=drop_columns)

Resolving Null Values:

In [25]:
listings.isnull().sum()

id                                0
host_is_superhost                13
host_neighbourhood             2057
property_type                     0
room_type                         0
accommodates                      0
bathrooms_text                    4
bedrooms                       1273
beds                            161
number_of_reviews                 0
review_scores_rating           1665
review_scores_accuracy         1686
review_scores_cleanliness      1686
review_scores_checkin          1688
review_scores_communication    1686
review_scores_location         1688
review_scores_value            1688
instant_bookable                  0
dtype: int64

In [26]:
#dropping null values for missing review scores and bathroom text



listings.dropna(subset=['host_is_superhost', 'host_neighbourhood', 'review_scores_rating', 'review_scores_accuracy','review_scores_communication', 'review_scores_location', 'bathrooms_text'], inplace=True)

listings.head()

Unnamed: 0,id,host_is_superhost,host_neighbourhood,property_type,room_type,accommodates,bathrooms_text,bedrooms,beds,number_of_reviews,review_scores_rating,review_scores_accuracy,review_scores_cleanliness,review_scores_checkin,review_scores_communication,review_scores_location,review_scores_value,instant_bookable
0,29967,f,Pacific Beach,Entire bungalow,Entire home/apt,8,2 baths,2.0,3.0,88,4.76,4.75,4.55,4.91,4.91,4.8,4.76,f
1,38245,f,Loma Portal,Private room in home,Private room,1,1 shared bath,1.0,1.0,149,4.33,4.35,4.22,4.72,4.79,4.61,4.4,f
2,108349,t,Pacific Beach,Entire townhouse,Entire home/apt,7,2.5 baths,3.0,5.0,162,4.75,4.79,4.55,4.88,4.81,4.89,4.69,t
3,126344,t,Pacific Beach,Entire home,Entire home/apt,8,2 baths,3.0,6.0,183,4.72,4.77,4.73,4.88,4.81,4.78,4.65,t
4,54001,t,La Jolla,Entire guesthouse,Entire home/apt,3,1 bath,2.0,3.0,296,4.94,4.92,4.95,4.97,4.98,4.97,4.89,f


In [27]:
#imputing bedrooms and beds with the mode
listings['bedrooms'] = listings['bedrooms'].fillna(listings['bedrooms'].mode()[0])

listings['beds'] = listings['beds'].fillna(listings['beds'].mode()[0])

In [28]:
listings.isnull().sum()

id                             0
host_is_superhost              0
host_neighbourhood             0
property_type                  0
room_type                      0
accommodates                   0
bathrooms_text                 0
bedrooms                       0
beds                           0
number_of_reviews              0
review_scores_rating           0
review_scores_accuracy         0
review_scores_cleanliness      0
review_scores_checkin          0
review_scores_communication    0
review_scores_location         0
review_scores_value            0
instant_bookable               0
dtype: int64

In [29]:
#Converting "host_is_superhost" feature to numeric

listings['host_is_superhost'] = pd.factorize(listings['host_is_superhost'])[0]


"f" categorical variable converted numerically to 0. "t" categorical variable converted numerically to 1

0 means host isn't a superhost, 1 means host is a superhost

In [30]:
#Converting "instant_bookable" feature to numeric

listings['instant_bookable'] = pd.factorize(listings['instant_bookable'])[0]

listings.head(5)

Unnamed: 0,id,host_is_superhost,host_neighbourhood,property_type,room_type,accommodates,bathrooms_text,bedrooms,beds,number_of_reviews,review_scores_rating,review_scores_accuracy,review_scores_cleanliness,review_scores_checkin,review_scores_communication,review_scores_location,review_scores_value,instant_bookable
0,29967,0,Pacific Beach,Entire bungalow,Entire home/apt,8,2 baths,2.0,3.0,88,4.76,4.75,4.55,4.91,4.91,4.8,4.76,0
1,38245,0,Loma Portal,Private room in home,Private room,1,1 shared bath,1.0,1.0,149,4.33,4.35,4.22,4.72,4.79,4.61,4.4,0
2,108349,1,Pacific Beach,Entire townhouse,Entire home/apt,7,2.5 baths,3.0,5.0,162,4.75,4.79,4.55,4.88,4.81,4.89,4.69,1
3,126344,1,Pacific Beach,Entire home,Entire home/apt,8,2 baths,3.0,6.0,183,4.72,4.77,4.73,4.88,4.81,4.78,4.65,1
4,54001,1,La Jolla,Entire guesthouse,Entire home/apt,3,1 bath,2.0,3.0,296,4.94,4.92,4.95,4.97,4.98,4.97,4.89,0


"f" categorical variable converted numerically to 0. "t" categorical variable converted numerically to 1

0 means listing isn't instant bookable, 1 means listing is instant bookable

In [31]:
listings['bathtroom'] = listings.bathrooms_text.str.extract('(\d+)')

#("\d+\.\d+",string)

listings.head()

Unnamed: 0,id,host_is_superhost,host_neighbourhood,property_type,room_type,accommodates,bathrooms_text,bedrooms,beds,number_of_reviews,review_scores_rating,review_scores_accuracy,review_scores_cleanliness,review_scores_checkin,review_scores_communication,review_scores_location,review_scores_value,instant_bookable,bathtroom
0,29967,0,Pacific Beach,Entire bungalow,Entire home/apt,8,2 baths,2.0,3.0,88,4.76,4.75,4.55,4.91,4.91,4.8,4.76,0,2
1,38245,0,Loma Portal,Private room in home,Private room,1,1 shared bath,1.0,1.0,149,4.33,4.35,4.22,4.72,4.79,4.61,4.4,0,1
2,108349,1,Pacific Beach,Entire townhouse,Entire home/apt,7,2.5 baths,3.0,5.0,162,4.75,4.79,4.55,4.88,4.81,4.89,4.69,1,2
3,126344,1,Pacific Beach,Entire home,Entire home/apt,8,2 baths,3.0,6.0,183,4.72,4.77,4.73,4.88,4.81,4.78,4.65,1,2
4,54001,1,La Jolla,Entire guesthouse,Entire home/apt,3,1 bath,2.0,3.0,296,4.94,4.92,4.95,4.97,4.98,4.97,4.89,0,1


In [32]:
listings.nunique()

id                             10645
host_is_superhost                  2
host_neighbourhood               340
property_type                     63
room_type                          4
accommodates                      16
bathrooms_text                    35
bedrooms                          12
beds                              23
number_of_reviews                492
review_scores_rating             146
review_scores_accuracy           144
review_scores_cleanliness        160
review_scores_checkin            129
review_scores_communication      127
review_scores_location           135
review_scores_value              152
instant_bookable                   2
bathtroom                         11
dtype: int64

Label Encoding host neighborhood, property type, room type, 

In [33]:
from sklearn.preprocessing import LabelEncoder

# creating instance of labelencoder
labelencoder = LabelEncoder()
# Assigning numerical values and storing in another column
listings['host_neighbourhood_cat'] = labelencoder.fit_transform(listings['host_neighbourhood'])
listings.head()


Unnamed: 0,id,host_is_superhost,host_neighbourhood,property_type,room_type,accommodates,bathrooms_text,bedrooms,beds,number_of_reviews,review_scores_rating,review_scores_accuracy,review_scores_cleanliness,review_scores_checkin,review_scores_communication,review_scores_location,review_scores_value,instant_bookable,bathtroom,host_neighbourhood_cat
0,29967,0,Pacific Beach,Entire bungalow,Entire home/apt,8,2 baths,2.0,3.0,88,4.76,4.75,4.55,4.91,4.91,4.8,4.76,0,2,219
1,38245,0,Loma Portal,Private room in home,Private room,1,1 shared bath,1.0,1.0,149,4.33,4.35,4.22,4.72,4.79,4.61,4.4,0,1,167
2,108349,1,Pacific Beach,Entire townhouse,Entire home/apt,7,2.5 baths,3.0,5.0,162,4.75,4.79,4.55,4.88,4.81,4.89,4.69,1,2,219
3,126344,1,Pacific Beach,Entire home,Entire home/apt,8,2 baths,3.0,6.0,183,4.72,4.77,4.73,4.88,4.81,4.78,4.65,1,2,219
4,54001,1,La Jolla,Entire guesthouse,Entire home/apt,3,1 bath,2.0,3.0,296,4.94,4.92,4.95,4.97,4.98,4.97,4.89,0,1,147


In [34]:
# creating instance of labelencoder
labelencoder = LabelEncoder()
# Assigning numerical values and storing in another column
listings['property_type_cat'] = labelencoder.fit_transform(listings['property_type'])
listings.head()

Unnamed: 0,id,host_is_superhost,host_neighbourhood,property_type,room_type,accommodates,bathrooms_text,bedrooms,beds,number_of_reviews,...,review_scores_accuracy,review_scores_cleanliness,review_scores_checkin,review_scores_communication,review_scores_location,review_scores_value,instant_bookable,bathtroom,host_neighbourhood_cat,property_type_cat
0,29967,0,Pacific Beach,Entire bungalow,Entire home/apt,8,2 baths,2.0,3.0,88,...,4.75,4.55,4.91,4.91,4.8,4.76,0,2,219,7
1,38245,0,Loma Portal,Private room in home,Private room,1,1 shared bath,1.0,1.0,149,...,4.35,4.22,4.72,4.79,4.61,4.4,0,1,167,35
2,108349,1,Pacific Beach,Entire townhouse,Entire home/apt,7,2.5 baths,3.0,5.0,162,...,4.79,4.55,4.88,4.81,4.89,4.69,1,2,219,19
3,126344,1,Pacific Beach,Entire home,Entire home/apt,8,2 baths,3.0,6.0,183,...,4.77,4.73,4.88,4.81,4.78,4.65,1,2,219,13
4,54001,1,La Jolla,Entire guesthouse,Entire home/apt,3,1 bath,2.0,3.0,296,...,4.92,4.95,4.97,4.98,4.97,4.89,0,1,147,12


In [35]:
# Assigning numerical values and storing in another column
listings['room_type_cat'] = labelencoder.fit_transform(listings['room_type'])
listings.head()

Unnamed: 0,id,host_is_superhost,host_neighbourhood,property_type,room_type,accommodates,bathrooms_text,bedrooms,beds,number_of_reviews,...,review_scores_cleanliness,review_scores_checkin,review_scores_communication,review_scores_location,review_scores_value,instant_bookable,bathtroom,host_neighbourhood_cat,property_type_cat,room_type_cat
0,29967,0,Pacific Beach,Entire bungalow,Entire home/apt,8,2 baths,2.0,3.0,88,...,4.55,4.91,4.91,4.8,4.76,0,2,219,7,0
1,38245,0,Loma Portal,Private room in home,Private room,1,1 shared bath,1.0,1.0,149,...,4.22,4.72,4.79,4.61,4.4,0,1,167,35,2
2,108349,1,Pacific Beach,Entire townhouse,Entire home/apt,7,2.5 baths,3.0,5.0,162,...,4.55,4.88,4.81,4.89,4.69,1,2,219,19,0
3,126344,1,Pacific Beach,Entire home,Entire home/apt,8,2 baths,3.0,6.0,183,...,4.73,4.88,4.81,4.78,4.65,1,2,219,13,0
4,54001,1,La Jolla,Entire guesthouse,Entire home/apt,3,1 bath,2.0,3.0,296,...,4.95,4.97,4.98,4.97,4.89,0,1,147,12,0


In [36]:
#creating a lookup list for converted categorical variablles

host_neihborhoods = listings[['host_neighbourhood', 'host_neighbourhood_cat']]
host_neihborhoods.drop_duplicates()


host_neihborhoods.head(3)


Unnamed: 0,host_neighbourhood,host_neighbourhood_cat
0,Pacific Beach,219
1,Loma Portal,167
2,Pacific Beach,219


In [37]:
property_type = listings[['property_type', 'property_type_cat']]
property_type.drop_duplicates()

property_type.head(3)

Unnamed: 0,property_type,property_type_cat
0,Entire bungalow,7
1,Private room in home,35
2,Entire townhouse,19


In [38]:
room_type = listings[['room_type', 'room_type_cat']]
room_type.drop_duplicates()

room_type.head(3)

Unnamed: 0,room_type,room_type_cat
0,Entire home/apt,0
1,Private room,2
2,Entire home/apt,0


Dropping additional columns from the dataset:

In [39]:
drop_columns2 = ['host_neighbourhood','property_type','room_type', 'bathrooms_text']

listings = listings.drop(columns=drop_columns2)

In [40]:
#renaning the "id" column to "listing_id"

listings.rename(columns = {'id':'listing_id'}, inplace = True)


listings.head(3)

Unnamed: 0,listing_id,host_is_superhost,accommodates,bedrooms,beds,number_of_reviews,review_scores_rating,review_scores_accuracy,review_scores_cleanliness,review_scores_checkin,review_scores_communication,review_scores_location,review_scores_value,instant_bookable,bathtroom,host_neighbourhood_cat,property_type_cat,room_type_cat
0,29967,0,8,2.0,3.0,88,4.76,4.75,4.55,4.91,4.91,4.8,4.76,0,2,219,7,0
1,38245,0,1,1.0,1.0,149,4.33,4.35,4.22,4.72,4.79,4.61,4.4,0,1,167,35,2
2,108349,1,7,3.0,5.0,162,4.75,4.79,4.55,4.88,4.81,4.89,4.69,1,2,219,19,0


## Combining the Datasets

In [41]:
final_df = pd.merge(calendar, weather)

In [42]:
final_df = pd.merge(calendar,weather)

In [43]:
final_df = pd.merge(final_df, listings)

In [44]:
final_df.head()

Unnamed: 0,listing_id,date,available,price,minimum_nights,maximum_nights,precip,max_temp,min_temp,average temp,...,review_scores_cleanliness,review_scores_checkin,review_scores_communication,review_scores_location,review_scores_value,instant_bookable,bathtroom,host_neighbourhood_cat,property_type_cat,room_type_cat
0,29967,2022-09-18,0,225.0,4.0,365.0,0.0,76.0,59.0,67.5,...,4.55,4.91,4.91,4.8,4.76,0,2,219,7,0
1,29967,2022-09-19,0,225.0,4.0,365.0,0.0,78.0,57.0,67.5,...,4.55,4.91,4.91,4.8,4.76,0,2,219,7,0
2,29967,2022-09-20,0,225.0,4.0,365.0,0.0,80.0,58.0,69.0,...,4.55,4.91,4.91,4.8,4.76,0,2,219,7,0
3,29967,2022-09-21,0,225.0,4.0,365.0,0.0,79.0,62.0,70.5,...,4.55,4.91,4.91,4.8,4.76,0,2,219,7,0
4,29967,2022-09-22,0,225.0,4.0,365.0,0.0,81.0,56.0,68.5,...,4.55,4.91,4.91,4.8,4.76,0,2,219,7,0


In [45]:
final_df.shape

(3882606, 27)

In [47]:
final_df.to_csv('/Users/jackieurenda/Downloads/sd_airbnb.csv')