In [1]:
#imort packages
import json
import pandas as pd
import numpy as np
import seaborn as sns
import plotly.express as px
import matplotlib.pyplot as plt
import streamlit as st
from streamlit_option_menu import option_menu

In [2]:
#get data
file = open('sample_airbnb.json')
data = json.load(file)
datas=pd.DataFrame(data)

In [None]:
#pd.set_option('display.max_columns', None)
#pd.set_option('display.max_rows', None)
datas.head(2)

### correct the data type

In [4]:
datas['bedrooms'] = datas['bedrooms'].astype('Int64')
datas['beds'] = datas['beds'].astype('Int64') 
datas['minimum_nights'] = datas['minimum_nights'].astype(int)
datas['maximum_nights'] = datas['maximum_nights'].astype(int)
datas['bathrooms'] = pd.to_numeric(datas['bathrooms'], errors='coerce').fillna(-1).astype(int)
datas['first_review'] = pd.to_datetime(datas['first_review'])
datas['last_review'] = pd.to_datetime(datas['last_review'])
datas['calendar_last_scraped'] = pd.to_datetime(datas['calendar_last_scraped'])
datas['last_scraped'] = pd.to_datetime(datas['last_scraped'])

In [None]:
datas.info()

### Handling the missing values

In [6]:
datas['first_review'].fillna(datas['first_review'].mode()[0], inplace=True)
datas['last_review'].fillna(datas['last_review'].mode()[0], inplace=True)
datas['bedrooms'].fillna(datas['bedrooms'].median(), inplace=True)
datas['beds'].fillna(datas['beds'].median(), inplace=True)
datas['bathrooms'].fillna(datas['bathrooms'].median(), inplace=True)
datas['security_deposit'].fillna(datas['security_deposit'].median(), inplace=True)
datas['cleaning_fee'].fillna(datas['cleaning_fee'].median(), inplace=True)
datas['weekly_price'].fillna(datas['weekly_price'].median(), inplace=True)
datas['monthly_price'].fillna(datas['monthly_price'].median(), inplace=True)
datas['reviews_per_month'].fillna(datas['reviews_per_month'].median(), inplace=True)

In [None]:
datas.isnull().sum()

### correct the dataframe for analysis

In [8]:
def crt_df():
    add=[]
    for i in datas['address']:
        add.append(i)

    add_df=pd.DataFrame(add)

    loc=[]
    for i in add_df['location']:
        loc.append(i)

    loc_df=pd.DataFrame(loc)
    loc_df['is_location_exact'] = loc_df['is_location_exact'].map({False:'No',True:'Yes'})
    loc_df[['longitude', 'latitude']] = pd.DataFrame(loc_df['coordinates'].tolist(), index=loc_df.index)
    loc_df.drop(columns=['coordinates'], inplace=True)

    availabil=[]
    for i in datas['availability']:
        availabil.append(i)

    availability_df=pd.DataFrame(availabil)

    host=[]
    for i in datas['host']:
        host.append(i)

    host_df=pd.DataFrame(host)
    # remove unwanted columns
    columns_to_drop = ['summary','calendar_last_scraped','last_scraped','notes','space','reviews_per_month','availability','last_review',
                        'first_review','transit','access','interaction','amenities','address']
    temp_df = datas.drop(columns=columns_to_drop)
    #merge all df
    airbnb_data = pd.concat([temp_df, add_df, loc_df,availability_df,host_df], axis=1)

    return airbnb_data

In [9]:
airbnb_data=crt_df()
# df saved csv file
airbnb_data.to_csv('Airbnb_data.csv',index=False)

### visualise the data

In [10]:
# geo visualization map
def geo_visual(data_set):
    airbnb= pd.read_csv(data_set)

    fig = px.scatter_mapbox(airbnb, lat="latitude", lon="longitude", hover_name="country", hover_data=["name","price","is_location_exact"],
                            color_discrete_sequence=["fuchsia"], zoom=2, height=500)
    fig.update_layout(mapbox_style="open-street-map")
    fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
    fig.show()

In [None]:
geo_visual("Airbnb_data.csv")

In [12]:
#Top 20 Most Expensive Listings
def top_20_price(data_set):
    airbnb= pd.read_csv(data_set)
    top_20_data =airbnb.nlargest(20,'price')
    fig_line = px.line(top_20_data, 
                    x="name", 
                    y="price", 
                    hover_name="country",
                    title="Top 20 Most Expensive Listings", 
                    width=1000, 
                    height=1000, 
                    markers=True, 
                    color_discrete_sequence=["blue"])

    fig_line.update_traces(mode="lines+markers")

    fig_line.show()

In [None]:
top_20_price("Airbnb_data.csv")

In [14]:
#lowest 20 price listings
def low_20_price(data_set):
    airbnb= pd.read_csv(data_set)
    lowest_20_data = airbnb.nsmallest(20,"price")
    fig_bar = px.bar(lowest_20_data, 
                    x="price", 
                    y="name",
                    hover_name="country",
                    title="Lowest 20 Listings", 
                    width=1000, 
                    height=800, 
                    color_discrete_sequence=["yellowgreen"],
                    orientation='h')

    fig_bar.show()

In [None]:
low_20_price("Airbnb_data.csv")

In [16]:
def availability_pie_chart(data_set):
    airbnb = pd.read_csv(data_set)

    airbnb['total_availability'] = airbnb[["availability_30", "availability_60", "availability_90", "availability_365"]].sum(axis=1)

    fig_pie = px.pie(data_frame=airbnb, names='room_type', values='total_availability',
                     width=600, height=500, title='ROOM TYPE AND AVAILABILITY', hole=0.5,
                     color_discrete_sequence=px.colors.sequential.Darkmint_r)
    fig_pie.show()

In [None]:
availability_pie_chart("Airbnb_data.csv")

In [18]:
def acc_price_list(data_set):
    airbnb = pd.read_csv(data_set)
    fig_accommodates = px.bar(airbnb, x="accommodates", y="security_deposit", title="ACCOMMODATES AND PRICE LIST",
                            hover_data=["price", "extra_people"], hover_name="name",
                            color="security_deposit", color_continuous_scale='Bluered',
                            width=1000, height=600)
    fig_accommodates.show()

In [None]:
acc_price_list("Airbnb_data.csv")