In [1]:
# Dependencies 
import matplotlib.pyplot as plt
import matplotlib as mpl
import pandas as pd
import numpy as np

In [2]:
# Import airbnb csv data from Kaggle website

# set up file path variables
list_detail = "airbnb_data/listings_detail.csv"
list_summary = "airbnb_data/listings_summary.csv"
review_detail = "airbnb_data/reviews_detail.csv"
calendar_detail = "airbnb_data/calendar_detail.csv"


# read csv files to create data frames
ld_df = pd.read_csv(list_detail,dtype={'id': np.int64},low_memory=False)
ls_df = pd.read_csv(list_summary)
rd_df = pd.read_csv(review_detail)
cd_df = pd.read_csv(calendar_detail)



In [3]:
#Inspect data frame
ld_df.head()

Unnamed: 0,id,listing_url,scrape_id,last_scraped,name,summary,space,description,experiences_offered,neighborhood_overview,...,requires_license,license,jurisdiction_names,instant_bookable,is_business_travel_ready,cancellation_policy,require_guest_profile_picture,require_guest_phone_verification,calculated_host_listings_count,reviews_per_month
0,18461891,https://www.airbnb.com/rooms/18461891,20171002002103,2017-10-03,"Bright, comfortable 1B studio near everything!",*Looking to fill these specific dates: June 24...,Very well illuminated space includes comfortab...,*Looking to fill these specific dates: June 24...,none,There are Michelin-rated restaurants in the ar...,...,f,,,f,f,strict,f,f,1,
1,20702398,https://www.airbnb.com/rooms/20702398,20171002002103,2017-10-03,Quiet house on City Island,1 private bedroom and bathroom available for o...,,1 private bedroom and bathroom available for o...,none,This neighborhood is dope! Lot's of seafood re...,...,f,,,f,f,moderate,f,f,1,2.0
2,6627449,https://www.airbnb.com/rooms/6627449,20171002002103,2017-10-03,Large 1 BDRM in Great location,This ground floor apartment is light and airy ...,"We are close to fishing, boating, biking, hors...",This ground floor apartment is light and airy ...,none,City Island is a unique and a hidden gem of Ne...,...,f,,,f,f,strict,f,f,1,0.77
3,19949243,https://www.airbnb.com/rooms/19949243,20171002002103,2017-10-03,Stay aboard a sailboat,stay aboard a sailboat,,stay aboard a sailboat,none,,...,f,,,t,f,strict,f,f,1,
4,1886820,https://www.airbnb.com/rooms/1886820,20171002002103,2017-10-03,Quaint City Island Community.,Quiet island boating town on Long Island Soun...,"Master bed with queen bed, full bath and offi...",Quiet island boating town on Long Island Soun...,none,Small New England type town in the middle of ...,...,f,,,f,f,strict,f,f,1,


In [4]:
# Inspect rows and columns
ld_df.shape

(44317, 96)

In [5]:
# Inspect column names
ld_df.columns

Index(['id', 'listing_url', 'scrape_id', 'last_scraped', 'name', 'summary',
       'space', 'description', 'experiences_offered', 'neighborhood_overview',
       'notes', 'transit', 'access', 'interaction', 'house_rules',
       'thumbnail_url', 'medium_url', 'picture_url', 'xl_picture_url',
       'host_id', 'host_url', 'host_name', 'host_since', 'host_location',
       'host_about', 'host_response_time', 'host_response_rate',
       'host_acceptance_rate', 'host_is_superhost', 'host_thumbnail_url',
       'host_picture_url', 'host_neighbourhood', 'host_listings_count',
       'host_total_listings_count', 'host_verifications',
       'host_has_profile_pic', 'host_identity_verified', 'street',
       'neighbourhood', 'neighbourhood_cleansed',
       'neighbourhood_group_cleansed', 'city', 'state', 'zipcode', 'market',
       'smart_location', 'country_code', 'country', 'latitude', 'longitude',
       'is_location_exact', 'property_type', 'room_type', 'accommodates',
       'bathrooms',

In [6]:
# Clean up price data - remove $ symbol
cd_df['price'] = cd_df['price'].str.replace('$','')

In [7]:
#Clean up price data - remove commas
cd_df['price'] = cd_df['price'].str.replace(',','')

In [8]:
#Rename columns
cd_df.columns = ['Listing ID', 'Date', 'Available', 'Price']
cd_df.tail(20)

Unnamed: 0,Listing ID,Date,Available,Price
16182620,21178032,2017-10-21,t,50.0
16182621,21178032,2017-10-20,t,50.0
16182622,21178032,2017-10-19,t,50.0
16182623,21178032,2017-10-18,t,50.0
16182624,21178032,2017-10-17,t,50.0
16182625,21178032,2017-10-16,t,50.0
16182626,21178032,2017-10-15,f,
16182627,21178032,2017-10-14,f,
16182628,21178032,2017-10-13,f,
16182629,21178032,2017-10-12,t,50.0


In [9]:
# Change price column data type
cd_df['Price'] = cd_df['Price'].astype('float')

In [10]:
cd_df.dtypes

Listing ID      int64
Date           object
Available      object
Price         float64
dtype: object

In [11]:
# Rename "Listing ID" as "id" (to merge with other data frames)
cd_df = cd_df.rename(columns={'Listing ID':'id'})

In [12]:
cd_df.head()

Unnamed: 0,id,Date,Available,Price
0,2515,2018-10-01,t,99.0
1,2515,2018-09-30,t,89.0
2,2515,2018-09-29,t,99.0
3,2515,2018-09-28,t,99.0
4,2515,2018-09-27,t,99.0


In [13]:
# Calculating the Total Revenue genereated by each airbnb property, and the average revenue and the number of days rented
Top_Spend_sum = cd_df.groupby('id')['Price'].sum()
Top_Spend_mean = cd_df.groupby('id')['Price'].mean()
Top_Spend_count = cd_df.groupby('id')['Price'].count()

# Create a data frame from the groupby objects: sum, mean and count
Top_Spend = pd.DataFrame({'Total Spend by ID':Top_Spend_sum,'Average Spend by ID':Top_Spend_mean,'Number or Listings by ID':Top_Spend_count})

# Print output
Top_Spend.sort_values('Total Spend by ID',ascending=False)

Unnamed: 0_level_0,Total Spend by ID,Average Spend by ID,Number or Listings by ID
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
12955683,3639272.0,9998.000000,364
2953058,2920000.0,8000.000000,365
16810657,2151672.0,5911.186813,364
13910919,2121181.0,5843.473829,363
1448703,1825000.0,5000.000000,365
18051877,1820000.0,5000.000000,364
17416959,1753150.0,4829.614325,363
2952861,1642500.0,4500.000000,365
15307748,1384240.0,3802.857143,364
16594414,1357100.0,3718.082192,365


In [14]:
cd_df.dtypes

id             int64
Date          object
Available     object
Price        float64
dtype: object

In [15]:
cd_df.head()

Unnamed: 0,id,Date,Available,Price
0,2515,2018-10-01,t,99.0
1,2515,2018-09-30,t,89.0
2,2515,2018-09-29,t,99.0
3,2515,2018-09-28,t,99.0
4,2515,2018-09-27,t,99.0


In [16]:
# Merging of Top Spend and ListingData to create ListingDetail_Spend data frame
ListingDetail_Spend = pd.merge(Top_Spend,ld_df,on ='id')

In [17]:
ListingDetail_Spend.head()

Unnamed: 0,id,Total Spend by ID,Average Spend by ID,Number or Listings by ID,listing_url,scrape_id,last_scraped,name,summary,space,...,requires_license,license,jurisdiction_names,instant_bookable,is_business_travel_ready,cancellation_policy,require_guest_profile_picture,require_guest_phone_verification,calculated_host_listings_count,reviews_per_month
0,2515,26468.0,90.643836,292,https://www.airbnb.com/rooms/2515,20171002002103,2017-10-02,Stay at Chez Chic budget room #1,Step into our artistic spacious apartment and ...,-PLEASE BOOK DIRECTLY. NO NEED TO SEND A REQUE...,...,f,,,f,f,strict,f,f,3,1.43
1,2539,49446.0,135.468493,365,https://www.airbnb.com/rooms/2539,20171002002103,2017-10-03,Clean & quiet apt home by the park,Renovated apt home in elevator building.,"Spacious, renovated, and clean apt home, one b...",...,f,,,f,f,moderate,f,f,7,0.31
2,2595,79984.0,220.341598,363,https://www.airbnb.com/rooms/2595,20171002002103,2017-10-02,Midtown Castle,"Find your romantic getaway to this beautiful, ...","- Spacious (500+ft²), immaculate and nicely fu...",...,f,,,f,f,strict,t,t,2,0.23
3,3330,23590.0,70.0,337,https://www.airbnb.com/rooms/3330,20171002002103,2017-10-02,++ Brooklyn Penthouse Guestroom ++,"This is a spacious, clean, furnished master be...","Room Features: - clean, hardwood floors - 2 la...",...,f,,,f,f,strict,f,f,3,0.25
4,3647,54750.0,150.0,365,https://www.airbnb.com/rooms/3647,20171002002103,2017-10-02,THE VILLAGE OF HARLEM....NEW YORK !,,WELCOME TO OUR INTERNATIONAL URBAN COMMUNITY T...,...,f,,,f,f,strict,t,t,1,


In [18]:
ls_df.head()

Unnamed: 0,id,name,host_id,host_name,neighbourhood_group,neighbourhood,latitude,longitude,room_type,price,minimum_nights,number_of_reviews,last_review,reviews_per_month,calculated_host_listings_count,availability_365
0,18461891,"Bright, comfortable 1B studio near everything!",916092,Connie Mae,Queens,Ditmars Steinway,40.774142,-73.916246,Entire home/apt,110,6,0,,,1,0
1,20702398,Quiet house on City Island,1457680,James,Bronx,City Island,40.849191,-73.786509,Private room,50,1,2,2017-10-01,2.0,1,169
2,6627449,Large 1 BDRM in Great location,13886510,Arlene,Bronx,City Island,40.849775,-73.786609,Entire home/apt,125,3,21,2017-09-26,0.77,1,363
3,19949243,Stay aboard a sailboat,1149260,MoMo,Bronx,City Island,40.848838,-73.782276,Entire home/apt,100,3,0,,,1,90
4,1886820,Quaint City Island Community.,9815788,Steve,Bronx,City Island,40.841144,-73.783052,Entire home/apt,300,7,0,,,1,365


In [19]:
# Creating merged files of Summary, Listing, and Spend
Listing_Summary_Detail_Spend = pd.merge(ListingDetail_Spend,ls_df,on ='id')


In [20]:
# Display the first five rows.
Listing_Summary_Detail_Spend.head()

Unnamed: 0,id,Total Spend by ID,Average Spend by ID,Number or Listings by ID,listing_url,scrape_id,last_scraped,name_x,summary,space,...,latitude_y,longitude_y,room_type_y,price_y,minimum_nights_y,number_of_reviews_y,last_review_y,reviews_per_month_y,calculated_host_listings_count_y,availability_365_y
0,2515,26468.0,90.643836,292,https://www.airbnb.com/rooms/2515,20171002002103,2017-10-02,Stay at Chez Chic budget room #1,Step into our artistic spacious apartment and ...,-PLEASE BOOK DIRECTLY. NO NEED TO SEND A REQUE...,...,40.799205,-73.953676,Private room,59,2,156,2017-07-31,1.43,3,292
1,2539,49446.0,135.468493,365,https://www.airbnb.com/rooms/2539,20171002002103,2017-10-03,Clean & quiet apt home by the park,Renovated apt home in elevator building.,"Spacious, renovated, and clean apt home, one b...",...,40.647486,-73.97237,Private room,150,1,7,2017-09-17,0.31,7,365
2,2595,79984.0,220.341598,363,https://www.airbnb.com/rooms/2595,20171002002103,2017-10-02,Midtown Castle,"Find your romantic getaway to this beautiful, ...","- Spacious (500+ft²), immaculate and nicely fu...",...,40.753621,-73.983774,Entire home/apt,225,1,22,2017-09-16,0.23,2,363
3,3330,23590.0,70.0,337,https://www.airbnb.com/rooms/3330,20171002002103,2017-10-02,++ Brooklyn Penthouse Guestroom ++,"This is a spacious, clean, furnished master be...","Room Features: - clean, hardwood floors - 2 la...",...,40.708558,-73.942362,Private room,70,5,24,2016-09-30,0.25,3,337
4,3647,54750.0,150.0,365,https://www.airbnb.com/rooms/3647,20171002002103,2017-10-02,THE VILLAGE OF HARLEM....NEW YORK !,,WELCOME TO OUR INTERNATIONAL URBAN COMMUNITY T...,...,40.809018,-73.941902,Private room,150,3,0,,,1,365


In [25]:
Listing_Summary_Detail_Spend.to_csv("Resources/listing_summarydetail_spend.csv")