-
Notifications
You must be signed in to change notification settings - Fork 0
/
eda_airbnb_ams.py
57 lines (40 loc) · 1.87 KB
/
eda_airbnb_ams.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
from plotly.offline import plot
df = pd.read_csv('cleaned_data.csv')
df['price/night'] = (df.price/df.minimum_nights)
df = df.drop(columns = ['price', 'minimum_nights', 'name', 'latitude', 'longitude',
'availability_365'])
df.isnull().sum(axis = 0)
df.reviews_per_month.isna()
df['reviews_per_month'] = df['reviews_per_month'].fillna(0)
#pivot table of price distribution
roomtype_price = pd.pivot_table(df, index = 'room_type', values = 'price/night')
roomtype_price = roomtype_price.reset_index()
colors = ['#708090', '#A9A9A9', '#7FFFD4', '#FF7F50']
plt.pie(roomtype_price['price/night'], colors = colors, labels = roomtype_price['room_type'],
autopct='%1.0f%%')
plt.title('Price distribution of room types')
plt.savefig('piechart_priceDist.png', dpi = 100)
plt.show()
neighbourhood_price = pd.pivot_table(df, index = ['neighbourhood', 'room_type'], values = 'price/night')
neighbourhood_price = neighbourhood_price.reset_index()
neighbourhood_price['neighbourhood'] = (neighbourhood_price.neighbourhood +' ' + neighbourhood_price.room_type)
neighbourhood_price = neighbourhood_price.drop(columns = ['room_type'])
fig = px.bar(neighbourhood_price[['neighbourhood', 'price/night']].sort_values('price/night', ascending = 'False'),
y = 'price/night', x = 'neighbourhood', color = 'neighbourhood', log_y = True,
template = 'ggplot2', title = 'neighbourhood vs price/night')
plot(fig)
"""df_dum = pd.get_dummies(df)
X = df_dum.drop('price/night', axis = 1)
y = df_dum.iloc[:,3].values
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.05)
import statsmodels.api as sm
X_sm = X = sm.add_constant(X)
model = sm.OLS(y, X_sm)
model.fit().summary()
"""