In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
import plotly.express as px
import plotly.graph_objs as go
from plotly.subplots import make_subplots
import plotly.figure_factory as ff
import plotly.io as pio

In [2]:
pio.templates.default = 'plotly_white'
px.set_mapbox_access_token(open(".mapbox_token").read())
PALETTE = px.colors.qualitative.Prism
MAIN_COLOR = 'rgb(231,41,138)'

In [3]:
def create_df(file_name, cols, na_cols=None):
  df = pd.read_csv(file_name).drop(cols, axis=1)
  df[na_cols] = df[na_cols].fillna('No info')
  return df

In [50]:
cols = ['license', 'neighbourhood_group']
na_cols = ['name']
df_listing = create_df('data/listings_short.csv', cols, na_cols)

In [51]:
len(df_listing)

3500

In [5]:
df_listing.isna().sum().sort_values(ascending=False)

last_review                       702
reviews_per_month                 702
id                                  0
name                                0
host_id                             0
host_name                           0
neighbourhood                       0
latitude                            0
longitude                           0
room_type                           0
price                               0
minimum_nights                      0
number_of_reviews                   0
calculated_host_listings_count      0
availability_365                    0
number_of_reviews_ltm               0
dtype: int64

In [6]:
df_reviews = pd.read_csv('data/reviews_2023_03.csv', parse_dates=['date'])
df_final = df_reviews.merge(df_listing, left_on='listing_id', right_on='id', how='inner').drop(['id_x', 'id_y'], axis=1)

In [7]:
len(df_final)

93656

In [8]:
df_final.isna().sum()

listing_id                        0
date                              0
reviewer_id                       0
reviewer_name                     0
comments                          2
name                              0
host_id                           0
host_name                         0
neighbourhood                     0
latitude                          0
longitude                         0
room_type                         0
price                             0
minimum_nights                    0
number_of_reviews                 0
last_review                       0
reviews_per_month                 0
calculated_host_listings_count    0
availability_365                  0
number_of_reviews_ltm             0
dtype: int64

In [9]:
df_final[df_final['comments'].isna() == True]

Unnamed: 0,listing_id,date,reviewer_id,reviewer_name,comments,name,host_id,host_name,neighbourhood,latitude,longitude,room_type,price,minimum_nights,number_of_reviews,last_review,reviews_per_month,calculated_host_listings_count,availability_365,number_of_reviews_ltm
47701,25051575,2019-04-23,244070000,David,,Stor villa med sol hela dagen,26990407,Moa,Skarpnäcks,59.27726,18.11034,Entire home/apt,1990,5,24,2023-01-03,0.42,1,27,7
62929,31301631,2019-05-30,263373046,Deepak,,Superior Studio Apartment with Sofa Bed,362541894,ApartDirect,Älvsjö,59.28055,18.01409,Entire home/apt,836,2,83,2023-03-13,1.63,17,266,24


In [10]:
df_final.dropna(subset=['comments'], inplace=True)

In [11]:
len(df_final)

93654

In [12]:
df_final.isna().sum()

listing_id                        0
date                              0
reviewer_id                       0
reviewer_name                     0
comments                          0
name                              0
host_id                           0
host_name                         0
neighbourhood                     0
latitude                          0
longitude                         0
room_type                         0
price                             0
minimum_nights                    0
number_of_reviews                 0
last_review                       0
reviews_per_month                 0
calculated_host_listings_count    0
availability_365                  0
number_of_reviews_ltm             0
dtype: int64

In [13]:
df_final['reviewer_id'].value_counts()

348992094    31
24622700     25
110641325    16
10805888     11
280949885    11
             ..
32023642      1
59702442      1
38577770      1
42229921      1
77261435      1
Name: reviewer_id, Length: 88204, dtype: int64

In [14]:
df_final.query('reviewer_id == 348992094').head(3)

Unnamed: 0,listing_id,date,reviewer_id,reviewer_name,comments,name,host_id,host_name,neighbourhood,latitude,longitude,room_type,price,minimum_nights,number_of_reviews,last_review,reviews_per_month,calculated_host_listings_count,availability_365,number_of_reviews_ltm
53092,31347264,2020-11-30,348992094,Roger,Bra läge och mycket smidig in och utcheckning,# 12 Studiolägenhet utan fönster(no window),8098890,Robin,Kungsholmens,59.33435,18.02585,Entire home/apt,621,2,224,2023-03-30,4.39,20,332,58
53097,31347264,2021-02-12,348992094,Roger,Bra läge och alltid bra kommunikation. Allt fu...,# 12 Studiolägenhet utan fönster(no window),8098890,Robin,Kungsholmens,59.33435,18.02585,Entire home/apt,621,2,224,2023-03-30,4.39,20,332,58
53099,31347264,2021-03-20,348992094,Roger,Bra läge och mycket bra boende,# 12 Studiolägenhet utan fönster(no window),8098890,Robin,Kungsholmens,59.33435,18.02585,Entire home/apt,621,2,224,2023-03-30,4.39,20,332,58


In [15]:
le = LabelEncoder()
df_final['listing_id_encod'] = le.fit_transform(df_final['listing_id'])

In [16]:
px.histogram(df_final, x='listing_id_encod')

In [17]:
num_reviews = (df_final.groupby(['listing_id_encod', 'neighbourhood'])
          .size().sort_values(ascending=False)
          .reset_index()
          .rename(columns={0: 'num_reviews'}))
num_reviews

Unnamed: 0,listing_id_encod,neighbourhood,num_reviews
0,1073,Södermalms,976
1,27,Östermalms,675
2,1329,Norrmalms,653
3,7,Södermalms,578
4,545,Södermalms,515
...,...,...,...
2793,2180,Norrmalms,1
2794,2177,Östermalms,1
2795,2170,Älvsjö,1
2796,2161,Södermalms,1


In [98]:
px.bar(num_reviews.nlargest(50, 'num_reviews'), y='num_reviews', 
       color='neighbourhood', color_discrete_sequence=PALETTE)

In [19]:
fig = px.bar(num_reviews.groupby('neighbourhood')['num_reviews'].sum().sort_values(ascending=True), orientation='h')
fig.update_traces(marker_color=MAIN_COLOR)

In [20]:
df_listing.dropna(inplace=True)

In [21]:
len(df_listing)

2798

In [22]:
fig = ff.create_hexbin_mapbox(
    data_frame=df_listing, lat="latitude", lon="longitude",
    nx_hexagon=10, opacity=0.9, labels={"color": "Point Count"},
    color_continuous_scale=PALETTE,
    #show_original_data=True
)
fig.update_layout(margin=dict(b=0, t=0, l=0, r=0))
fig.show()

In [23]:
df_listing['price'].agg(['max', 'min', 'mean'])

max     480500.000000
min          0.000000
mean      1697.782702
Name: price, dtype: float64

In [24]:
df_listing.query('price > 20000')

Unnamed: 0,id,name,host_id,host_name,neighbourhood,latitude,longitude,room_type,price,minimum_nights,number_of_reviews,last_review,reviews_per_month,calculated_host_listings_count,availability_365,number_of_reviews_ltm
755,19529551,Sunny apartment 27 mins to Centralen,137155339,Hillerodsgrand,Rinkeby-Tensta,59.39556,17.94929,Entire home/apt,480500,7,8,2019-08-01,0.12,1,29,0
2202,53198829,2:a i Hässelby,430664934,Camilla,Hässelby-Vällingby,59.37333,17.83749,Entire home/apt,450590,2,11,2022-12-11,0.73,1,1,5
2270,54126655,Small privately owned hotel 12 rooms and 22 beds,280607063,Hotel Söder,Södermalms,59.315224,18.077539,Private room,22995,1,1,2022-12-30,0.33,7,364,1


In [38]:
df_final.query('listing_id == 53198829')

Unnamed: 0,listing_id,date,reviewer_id,reviewer_name,comments,name,host_id,host_name,neighbourhood,latitude,...,room_type,price,minimum_nights,number_of_reviews,last_review,reviews_per_month,calculated_host_listings_count,availability_365,number_of_reviews_ltm,listing_id_encod
83874,53198829,2022-01-07,235853369,Amygdalia,Camilla was an excellent host! She was kind an...,2:a i Hässelby,430664934,Camilla,Hässelby-Vällingby,59.37333,...,Entire home/apt,450590,2,11,2022-12-11,0.73,1,1,5,1960
83875,53198829,2022-01-25,59681973,Kassem,Highly recommended,2:a i Hässelby,430664934,Camilla,Hässelby-Vällingby,59.37333,...,Entire home/apt,450590,2,11,2022-12-11,0.73,1,1,5,1960
83876,53198829,2022-01-28,59681973,Kassem,"Camilla is a very good host, very responsive a...",2:a i Hässelby,430664934,Camilla,Hässelby-Vällingby,59.37333,...,Entire home/apt,450590,2,11,2022-12-11,0.73,1,1,5,1960
83877,53198829,2022-02-04,127334280,Chaudhary,Camilla is an excellent host. Very sweet and c...,2:a i Hässelby,430664934,Camilla,Hässelby-Vällingby,59.37333,...,Entire home/apt,450590,2,11,2022-12-11,0.73,1,1,5,1960
83878,53198829,2022-02-20,342541532,Resha,"Kan stark rekommendera lägenheten, precis som ...",2:a i Hässelby,430664934,Camilla,Hässelby-Vällingby,59.37333,...,Entire home/apt,450590,2,11,2022-12-11,0.73,1,1,5,1960
83879,53198829,2022-03-21,443651639,Habid,Super cool! Definitely one of the best Airbnb’...,2:a i Hässelby,430664934,Camilla,Hässelby-Vällingby,59.37333,...,Entire home/apt,450590,2,11,2022-12-11,0.73,1,1,5,1960
83880,53198829,2022-04-16,422594941,Gloria,Camilla’s flat is really beautiful and equippe...,2:a i Hässelby,430664934,Camilla,Hässelby-Vällingby,59.37333,...,Entire home/apt,450590,2,11,2022-12-11,0.73,1,1,5,1960
83881,53198829,2022-05-02,235853369,Amygdalia,Camilla är bäst! Jag rekommenderar henne stark...,2:a i Hässelby,430664934,Camilla,Hässelby-Vällingby,59.37333,...,Entire home/apt,450590,2,11,2022-12-11,0.73,1,1,5,1960
83882,53198829,2022-05-29,448748591,Peter,"Bästa kvalitet inom alla områden, kommunikatio...",2:a i Hässelby,430664934,Camilla,Hässelby-Vällingby,59.37333,...,Entire home/apt,450590,2,11,2022-12-11,0.73,1,1,5,1960
83883,53198829,2022-06-06,123730673,Chelsea,Camilla’s place was great. The rooms were clea...,2:a i Hässelby,430664934,Camilla,Hässelby-Vällingby,59.37333,...,Entire home/apt,450590,2,11,2022-12-11,0.73,1,1,5,1960


In [26]:
px.histogram(df_listing, x='price')

In [83]:
df_listing['room_type'].value_counts()

Entire home/apt    2743
Private room        682
Shared room          40
Hotel room           34
Name: room_type, dtype: int64

In [94]:
fig = px.scatter_mapbox(df_listing, 
                            lat="latitude", lon="longitude", 
                            color="room_type", size="number_of_reviews",
                            hover_data = {'latitude': False, 'longitude': False, 'price_min_calend': True},
                            #color_discrete_sequence=PALETTE,
                            size_max=15, zoom=10)
fig.show()

In [91]:
fig = px.scatter_mapbox(df_listing, 
                            lat="latitude", lon="longitude", 
                            color="room_type", size="number_of_reviews",
                            hover_data = {'latitude': False, 'longitude': False, 'price_min_calend': True},
                            #color_discrete_sequence=PALETTE,
                            size_max=15, zoom=10)
fig.show()

In [57]:
df_calendar = pd.read_csv('data/calendar.csv')
df_calendar

Unnamed: 0,listing_id,date,available,price,adjusted_price,minimum_nights,maximum_nights
0,75590,2023-03-31,f,$949.00,$949.00,30.0,100.0
1,75590,2023-04-01,f,$949.00,$949.00,30.0,100.0
2,75590,2023-04-02,t,$949.00,$949.00,30.0,100.0
3,75590,2023-04-03,t,$949.00,$949.00,30.0,100.0
4,75590,2023-04-04,t,$949.00,$949.00,30.0,100.0
...,...,...,...,...,...,...,...
1276035,533880,2024-03-25,f,$621.00,$621.00,10.0,90.0
1276036,533880,2024-03-26,f,$621.00,$621.00,10.0,90.0
1276037,533880,2024-03-27,f,$621.00,$621.00,10.0,90.0
1276038,533880,2024-03-28,f,$621.00,$621.00,10.0,90.0


In [58]:
df_calendar['price'] = df_calendar['price'].str.replace('[\$,]', '', regex=True)
df_calendar['price'].agg(['max', 'min'])

max    99999.00
min        0.00
Name: price, dtype: object

In [65]:
min_price_calend = df_calendar.groupby('listing_id')['price'].min().astype(float).to_dict()

In [66]:
df_listing['price_min_calend'] = df_listing['id'].map(min_price_calend)

In [69]:
(df_listing['price'] - df_listing['price_min_calend']).sum()

956345.0

In [71]:
df_listing.query('price_min_calend > 90000')

Unnamed: 0,id,name,host_id,host_name,neighbourhood,latitude,longitude,room_type,price,minimum_nights,number_of_reviews,last_review,reviews_per_month,calculated_host_listings_count,availability_365,number_of_reviews_ltm,price_min_calend
3110,776549730665441181,Helhet,46194379,Jakob,Kungsholmens,59.3238,18.0045,Entire home/apt,99999,1,0,,,1,365,0,99999.0


In [72]:
df_listing.drop(3110, axis=0, inplace=True)

In [73]:
df_listing.query('price_min_calend > 90000')

Unnamed: 0,id,name,host_id,host_name,neighbourhood,latitude,longitude,room_type,price,minimum_nights,number_of_reviews,last_review,reviews_per_month,calculated_host_listings_count,availability_365,number_of_reviews_ltm,price_min_calend


In [74]:
px.histogram(df_listing, x='price_min_calend')

In [39]:
df_calendar.query('listing_id == 54126655')

Unnamed: 0,listing_id,date,available,price,adjusted_price,minimum_nights,maximum_nights
421210,54126655,2023-03-31,t,"$22,995.00","$22,995.00",1.0,365.0
421211,54126655,2023-04-01,t,"$22,995.00","$22,995.00",1.0,365.0
421212,54126655,2023-04-02,t,"$22,995.00","$22,995.00",1.0,365.0
421213,54126655,2023-04-03,t,"$22,995.00","$22,995.00",1.0,365.0
421214,54126655,2023-04-04,t,"$22,995.00","$22,995.00",1.0,365.0
...,...,...,...,...,...,...,...
421570,54126655,2024-03-25,t,"$22,995.00","$22,995.00",1.0,365.0
421571,54126655,2024-03-26,t,"$22,995.00","$22,995.00",1.0,365.0
421572,54126655,2024-03-27,t,"$22,995.00","$22,995.00",1.0,365.0
421573,54126655,2024-03-28,t,"$22,995.00","$22,995.00",1.0,365.0


In [34]:
df_listing[df_listing['host_name'].str.contains('Clara') == True]

Unnamed: 0,id,name,host_id,host_name,neighbourhood,latitude,longitude,room_type,price,minimum_nights,number_of_reviews,last_review,reviews_per_month,calculated_host_listings_count,availability_365,number_of_reviews_ltm
247,5795343,Exclusive 4 bedroom apt top location,30065832,Clara,Norrmalms,59.34282,18.03569,Entire home/apt,4000,5,20,2022-08-13,0.21,1,38,2
431,10673822,Barnvänlig lägenhet på Södermalm,55134895,Clara,Södermalms,59.30768,18.08655,Entire home/apt,2500,6,4,2022-11-04,0.48,1,56,4
702,18621820,"Stylish and super central, 65 Sq.m flat",129365804,Clara,Norrmalms,59.33563,18.052,Entire home/apt,1390,3,22,2019-08-27,0.31,2,33,0
962,24876881,Modern studio in Stockholm,54988683,Clara,Bromma,59.33078,17.96443,Entire home/apt,379,180,19,2018-09-05,0.32,1,180,0
1515,39526023,One room available for rent,303668791,Clara,Kungsholmens,59.3317,18.02564,Private room,380,1,10,2023-03-17,0.25,1,20,9
1554,40419926,Cosy single room with shared bathroom in the c...,260094228,Clara,Södermalms,59.31894,18.06008,Private room,605,1,16,2023-03-21,0.41,8,348,12
1556,40459005,Beautiful twin room with shared bathroom in th...,260094228,Clara,Södermalms,59.31972,18.05856,Private room,712,1,8,2023-03-20,0.21,8,351,3
1558,40459575,"Ground floor, single room with private bathroom",260094228,Clara,Södermalms,59.31918,18.05997,Private room,781,1,4,2022-05-16,0.1,8,308,2
1559,40459643,"Ground floor, twin room private bathroom, loca...",260094228,Clara,Södermalms,59.31888,18.06026,Private room,850,1,2,2023-01-22,0.71,8,348,2
1562,40459957,Beautiful twin room with private bathroom,260094228,Clara,Södermalms,59.31823,18.05816,Private room,987,1,1,2023-01-09,0.37,8,326,1
