In [2]:
import pandas as pd
import numpy as np

In [3]:
price = pd.read_csv("airbnb_price.csv")
room = pd.read_excel("airbnb_room_type.xlsx")
review = pd.read_csv("airbnb_last_review.tsv", delimiter='\t')

## Basic Analysis

In [4]:
no_pvt_rooms = len(room[room["room_type"].str.upper() == 'PRIVATE ROOM'])

In [5]:
price["price"] = price["price"].str.replace(" dollars",".0")

In [6]:
price["price"] = price["price"].astype(float)

In [7]:
avg_price = price["price"].mean().round(2)

In [8]:
review["last_review"] = pd.to_datetime(review["last_review"])

In [9]:
last_review = review["last_review"].max()

In [10]:
first_review = review["last_review"].min()

In [11]:
arrays = np.array([[first_review, last_review, no_pvt_rooms, avg_price]])

In [12]:
df = pd.DataFrame(arrays, columns = ['first_review', 'last_review', 'no_pvt_rooms', 'avg_price'])

In [13]:
df.to_csv("review_dates.csv", index=False)

## Intermediate Analysis

In [19]:
top = price.groupby(["nbhood_full"])["price"].mean().sort_values(ascending=False).head()

In [20]:
top =top.reset_index()

In [21]:
price.groupby(["nbhood_full"])["price"].count().sort_values(ascending=False)

nbhood_full
Brooklyn, Bedford-Stuyvesant    2209
Brooklyn, Williamsburg          1853
Manhattan, Harlem               1435
Brooklyn, Bushwick              1202
Manhattan, Hell's Kitchen       1119
                                ... 
Staten Island, Howland Hook        1
Staten Island, Rossville           1
Staten Island, Richmondtown        1
Staten Island, Prince's Bay        1
Staten Island, Willowbrook         1
Name: price, Length: 217, dtype: int64

In [22]:
price[price["nbhood_full"] == "Manhattan, NoHo"].count()

listing_id     41
price          41
nbhood_full    41
dtype: int64

In [23]:
listings = []
for i in top["nbhood_full"].unique():
    info = price[price["nbhood_full"] == f"{i}"].count()
    listings.append(info.price)

In [24]:
listings

[np.int64(2), np.int64(61), np.int64(36), np.int64(41), np.int64(191)]

In [27]:
top["listings"] = listings

In [28]:
top

Unnamed: 0,neighborhood,average_price,listings
0,"Brooklyn, Sea Gate",805.0,2
1,"Manhattan, Tribeca",396.704918,61
2,"Manhattan, Flatiron District",342.055556,36
3,"Manhattan, NoHo",335.02439,41
4,"Manhattan, SoHo",299.162304,191


In [29]:
top.rename(columns={
    'nbhood_full': 'neighborhood',
    'price': 'average_price',
    'listings': 'number_of_listings'
}, inplace=True)

In [30]:
top

Unnamed: 0,neighborhood,average_price,number_of_listings
0,"Brooklyn, Sea Gate",805.0,2
1,"Manhattan, Tribeca",396.704918,61
2,"Manhattan, Flatiron District",342.055556,36
3,"Manhattan, NoHo",335.02439,41
4,"Manhattan, SoHo",299.162304,191


## Word Analysis

In [43]:
room

Unnamed: 0,listing_id,description,room_type
0,2595,Skylit Midtown Castle,Entire home/apt
1,3831,Cozy Entire Floor of Brownstone,Entire home/apt
2,5099,Large Cozy 1 BR Apartment In Midtown East,Entire home/apt
3,5178,Large Furnished Room Near B'way,private room
4,5238,Cute & Cozy Lower East Side 1 bdrm,Entire home/apt
...,...,...,...
25204,36425863,Lovely Privet Bedroom with Privet Restroom,PRIVATE ROOM
25205,36427429,No.2 with queen size bed,PRIVATE ROOM
25206,36438336,Seas The Moment,Private room
25207,36442252,1B-1B apartment near by Metro,Entire home/apt


## Advanced Analysis

In [32]:
price_room = pd.merge(room,price,how="left")

In [49]:
private_room_avg = price_room[price_room["room_type"].str.lower() == 'private room']["price"].mean()
private_room_avg

np.float64(81.63816484677703)

In [50]:
shared_room_avg = price_room[price_room["room_type"].str.lower() == 'shared room']["price"].mean()
shared_room_avg

np.float64(53.465076660988075)

In [51]:
apt_avg = price_room[price_room["room_type"].str.lower() == 'entire home/apt']["price"].mean()
apt_avg

np.float64(197.16666666666666)

## Bar Chart of Averages

## Line Graph