In [31]:
# Import necessary packages
import pandas as pd

# Import all files
airbnb_price = pd.read_csv('./datasets/airbnb_dataset/airbnb_price.csv')
airbnb_room = pd.read_excel('./datasets/airbnb_dataset/airbnb_room_type.xlsx')
airbnb_reviews = pd.read_csv('./datasets/airbnb_dataset/airbnb_last_review.tsv', sep='\t')

# Merge all files
listings = pd.merge(airbnb_price, airbnb_room, on='listing_id')
listings = pd.merge(listings, airbnb_reviews, on='listing_id')

In [32]:
# Dates of the earliest and most recent reviews
listings['last_review'] = pd.to_datetime(listings['last_review'], format='%B %d %Y')
first_review = listings['last_review'].min()
lastest_review = listings['last_review'].max()

print('The first review was entered on {}, while the last review was on {}'.format(first_review, lastest_review))

The first review was entered on 2019-01-01 00:00:00, while the last review was on 2019-07-09 00:00:00


In [33]:
# Number of listings that are private rooms
listings['room_type'] = listings['room_type'].str.lower()
private_listings = listings[listings['room_type'] == 'private room'].shape[0]
print(private_listings)

11356


In [34]:
# Average listing price
listings['price'] = listings['price'].str.strip('dollars').astype('int')
average_listing = listings['price'].mean().round(2)
print(average_listing)

141.78


In [35]:
# Combine all the values into a DataFrame
review_dates = pd.DataFrame({'first_reviewed':[first_review],
                             'last_reviewed':[lastest_review],
                             'nb_private_rooms':[private_listings],
                             'avg_price':[average_listing]})

print(review_dates)

  first_reviewed last_reviewed  nb_private_rooms  avg_price
0     2019-01-01    2019-07-09             11356     141.78
