# Import Libraries

In [1]:
import dash
import dash_core_components as dcc
import dash_html_components as html
import dash_bootstrap_components as dbc
import pandas as pd
import plotly.graph_objs as go
from dash.dependencies import Input, Output, State
import plotly.express as px
import pickle
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report, confusion_matrix, plot_confusion_matrix
import folium

# Import Data

## Airbnb Data - not one-hot encoded

In [2]:
airbnb_data = pd.read_csv("large_clean_data/airbnb_model_not_encoded.csv", index_col=0)

airbnb_data['last_scraped'] = pd.to_datetime(airbnb_data['last_scraped'], format='%Y/%m/%d')

airbnb_data['Year'] = pd.DatetimeIndex(airbnb_data['last_scraped']).year

end_of_yr_data = airbnb_data[airbnb_data['last_scraped'].dt.month==12]

current_listings = airbnb_data[airbnb_data['last_scraped']>'2021-01-31']

all_room_types = current_listings.room_type.unique()

all_neighborhoods = current_listings.neighborhood.unique()

## Zillow Data

In [5]:
nashville_prices_df = pd.read_csv('cleaned_data/zillow_data_clean.csv',index_col=0)

# Prep for Model Results Dashboard

In [6]:
X_train = pd.read_csv("cleaned_data/X_train_data.csv", index_col=0)
y_train = pd.read_csv("cleaned_data/y_train_data.csv", index_col=0)
X_test = pd.read_csv("cleaned_data/X_test_data.csv", index_col=0)
y_test = pd.read_csv("cleaned_data/y_test_data.csv", index_col=0)

with open('random_forest.pickle','rb') as file:
    rand_forest = pickle.load(file)

with open('gradient_boost.pickle','rb') as file:
    grad_boost = pickle.load(file)
    
with open('knn.pickle','rb') as file:
    knn = pickle.load(file)
    
with open('xg_boost.pickle','rb') as file:
    xg_boost = pickle.load(file)

# Dashboard 1: EDA

In [8]:
app = dash.Dash(external_stylesheets=[dbc.themes.MINTY])

fig_home = px.line(nashville_prices_df, x=nashville_prices_df.index, y=nashville_prices_df.columns)

app.layout = html.Div([
    dbc.Container([
        dbc.Row([
            dbc.Col(html.H1("Nashville Housing EDA"), 
                    className="mb-2")
        ]),
        dbc.Row([
            dbc.Col(html.H6(children='Exploring the data from Airbnb and Zillow'), 
                    className="mb-4")
        ]),
        dbc.Row([
        dbc.Col(dbc.Card(html.H3(children='Airbnb Presence Over Time',
                                className="text-center text-light bg-primary"), body=True, color="primary",
                        style={"padding": 10})
                    , className="mb-4")
        ]),
        
        dcc.Graph(id='indicator-graphic'),
        dcc.Slider(
            id='year--slider',
            min=end_of_yr_data['Year'].min(),
            max=end_of_yr_data['Year'].max(),
            value=end_of_yr_data['Year'].max(),
            marks={str(year): str(year) for year in end_of_yr_data['Year'].unique()},
            step=None),
        
        dbc.Row([
            dbc.Col(dbc.Card(html.H3(children='Nashville Home Values by Zipcode',
                                     className="text-center text-light bg-primary"), body=True, color="primary",
                            style={"padding": 10})
                    , className="mb-4")
        ]),
        
        dcc.Graph(id='home-values-graph',
             figure=fig_home),
        
        dbc.Row([
            dbc.Col(dbc.Card(html.H3(children="Average Price by Neighborhood",
                                     className="text-center text-light bg-primary"), body=True, color="primary",
                            style={"padding": 10})
                    , className="mb-4")
        ]),
        
        dcc.Checklist(id='my-checklist',
            options=[
                {"label": x, "value": x}
                for x in all_room_types
            ],
            value = all_room_types[0:3],
                     ),
        dcc.Graph(id='neighborhood-price-graph'),
        
        dbc.Row([
            dbc.Col(dbc.Card(html.H3(children="Listing Count by Room Type",
                                     className="text-center text-light bg-primary"), body=True, color="primary",
                            style={"padding": 10})
                    , className="mb-4")
        ]),
        
        html.Label('Multi-Select Neighborhood Dropdown'),
        dcc.Dropdown(id='my-dropdown',
                    options=[{"label": x, "value": x} for x in all_neighborhoods],
                    multi=True,
                    value = all_neighborhoods),
        dcc.Graph(id='count-type-graph')
    ])
])

In [9]:
@app.callback(
    Output('indicator-graphic', 'figure'),
    Input('year--slider', 'value'))
def update_graph(date_value):
    dff = end_of_yr_data[end_of_yr_data['Year'] == date_value]

    fig = px.scatter(x=dff['longitude'],
                     y=dff['latitude'],
                     color=dff['neighborhood'],
                     hover_name=dff['price'])

    fig.update_layout(margin={'l': 40, 'b': 40, 't': 10, 'r': 0}, hovermode='closest')

    return fig

In [10]:
@app.callback(
    Output('neighborhood-price-graph', 'figure'),
    Input('my-checklist', 'value'))
def update_price_chart(room_types):
    mask = current_listings.room_type.isin(room_types)
    
    grouped_df = current_listings[mask].groupby('neighborhood').mean().reset_index()
    
    fig = px.bar(grouped_df,
                x='neighborhood', 
                y='price', color='neighborhood')
    return fig

In [11]:
@app.callback(
    Output('count-type-graph', 'figure'),
    Input('my-dropdown', 'value'))
def update_price_chart(neighborhoods):
    mask = current_listings.neighborhood.isin(neighborhoods)
    fig = px.histogram(current_listings[mask], x="room_type", color='neighborhood')
    
    return fig

In [12]:
if __name__ == '__main__':
    app.run_server()

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
   Use a production WSGI server instead.
 * Debug mode: off


 * Running on http://127.0.0.1:8050/ (Press CTRL+C to quit)


# Dashboard 2: Model Results

In [13]:
app = dash.Dash(__name__, external_stylesheets=[dbc.themes.MINTY])

app.layout = html.Div([
    dbc.Container([
        dbc.Row([
            dbc.Col(html.H1("Classification Model Results"), 
                    className="mb-2")
        ]),
        dbc.Row([
            dbc.Col(html.H6(children='Model results used to predict whether an airbnb will be highly available'), 
                    className="mb-4")
        ]),

    dcc.RadioItems(
        id='model_type',
        options=[{'label':'Random Forest', 'value':'forest'},
                 {'label': 'XGBoost', 'value':'xg_boost'},
                 {'label': 'Gradient Boosting', 'value':'grad_boost'}],
        value='forest',
    ),        
    dbc.Row([
        dbc.Col(dbc.Card(html.H3(children='Confusion Matrix',
                                className="text-center text-light bg-primary"), body=True, color="primary")
                    , className="mb-4")
        ]),
    dcc.Graph(id='model-conf-matrix'),
    dbc.Row([
            dbc.Col(dbc.Card(html.H3(children='Feature Importances',
                                     className="text-center text-light bg-primary"), body=True, color="primary")
                    , className="mb-4")
        ]),
    dcc.Graph(id='model-importance-results')
    
    ])
])

In [14]:
@app.callback(
    Output('model-conf-matrix', 'figure'),
    Input('model_type', 'value'))
def update_conf_matrix(model):
    if model == 'forest':
        cm = list(confusion_matrix(y_test, rand_forest.predict(X_test)))
    elif model == 'xg_boost':
        cm = list(confusion_matrix(y_test, xg_boost.predict(X_test)))
    else:
        cm = list(confusion_matrix(y_test, grad_boost.predict(X_test)))

    labels = ['low', 'high']
    data = go.Heatmap(z=cm, y=labels, x=labels,colorscale='Emrld')
    annotations = []
    for i, row in enumerate(cm):
        for j, value in enumerate(row):
            annotations.append(
                {
                    "x": labels[i],
                    "y": labels[j],
                    "font": {"color": "white"},
                    "text": str(value),
                    "xref": "x1",
                    "yref": "y1",
                    "showarrow": False
                }
            )
    layout = {
        "title": "Confusion Matrix",
        "xaxis": {"title": "Predicted value"},
        "yaxis": {"title": "Real value"},
        "annotations": annotations
    }
    fig = go.Figure(data=data, layout=layout)
    return fig

In [15]:
@app.callback(
    Output('model-importance-results', 'figure'),
    Input('model_type', 'value'))
def update_model_results(model):

    if model == 'forest':
        feature_imports = rand_forest.feature_importances_
        conf_model = rand_forest
    elif model == 'xg_boost':
        feature_imports = xg_boost.feature_importances_
        conf_model = xg_boost
    else:
        feature_imports = grad_boost.feature_importances_
        conf_model = grad_boost


    feature_names = X_train.columns
    most_imp_features = pd.DataFrame([f for f in zip(feature_names,feature_imports)], 
                                 columns=["Feature", "Importance"]).nlargest(10, "Importance")
    most_imp_features.sort_values(by="Importance", inplace=True)

    fig = px.bar(most_imp_features, x='Importance', y='Feature', color='Importance', orientation='h',
            height=400, title='Most Important Features', color_continuous_scale='emrld')

    return fig

In [16]:
if __name__ == '__main__':
    app.run_server(debug=False)

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
   Use a production WSGI server instead.
 * Debug mode: off


 * Running on http://127.0.0.1:8050/ (Press CTRL+C to quit)


# Dashboard 3: Prediction Function

In [17]:
X_train.columns

Index(['accommodates', 'bathrooms', 'beds', 'price', 'instant_bookable',
       'property_type_Guest suite', 'property_type_Condominium',
       'property_type_House', 'property_type_Other', 'property_type_Townhouse',
       'room_type_Hotel room', 'room_type_Private room',
       'room_type_Shared room', 'neighborhood_Bellevue',
       'neighborhood_Donelson', 'neighborhood_Downtown',
       'neighborhood_East_Nashville', 'neighborhood_Madison',
       'neighborhood_Midtown', 'neighborhood_North_Nashville',
       'neighborhood_Parkwood', 'neighborhood_South_Nashville',
       'neighborhood_Southeast_Nashville', 'neighborhood_West_Nashville',
       'neighborhood_Whites_Creek'],
      dtype='object')

In [20]:
property_types = ['Condominium','House','Other','Townhouse', 'Guest suite']
room_types = ['Hotel room','Private room','Shared room']
neighborhoods = ['Bellevue','Donelson', 'Downtown',
                 'East_Nashville', 'Madison','Midtown', 
                 'North_Nashville','Parkwood', 'South_Nashville',
                 'Southeast_Nashville', 'West_Nashville','Whites_Creek']

In [21]:
def make_prediction(clf, property_type, room_type, neighborhood, price, 
                    accommodates=2,bathrooms=2, beds=2, instant_bookable=1):
    true_prop_value = f'property_type_{property_type}'
    true_room_value = f'room_type_{room_type}'
    true_hood_value = f'neighborhood_{neighborhood}'
    list_of_values = []
    list_of_values.extend([accommodates, bathrooms, beds, price, instant_bookable])
    
    property_cols = ['property_type_Condominium', 'property_type_House',
                     'property_type_Other', 'property_type_Townhouse','property_type_Guest suite']
    property_values = []
    for col in property_cols:
        if true_prop_value == col:
            property_values.append(1)
        else:
            property_values.append(0)
    
    list_of_values.extend(property_values)
    
    room_cols = ['room_type_Hotel room','room_type_Private room','room_type_Shared room']
    
    room_values = []
    for col in room_cols:
        if true_room_value == col:
            room_values.append(1)
        else:
            room_values.append(0)
    
    list_of_values.extend(room_values)
    
    
    neighborhood_cols = ['neighborhood_Bellevue','neighborhood_Donelson', 'neighborhood_Downtown',
                         'neighborhood_East_Nashville', 'neighborhood_Madison',
                         'neighborhood_Midtown', 'neighborhood_North_Nashville',
                         'neighborhood_Parkwood', 'neighborhood_South_Nashville',
                         'neighborhood_Southeast_Nashville', 'neighborhood_West_Nashville',
                         'neighborhood_Whites_Creek']
    
    neighborhood_values = []
    for col in neighborhood_cols:
        if true_hood_value == col:
            neighborhood_values.append(1)
        else:
            neighborhood_values.append(0)
    
    list_of_values.extend(neighborhood_values)
    
    df = pd.DataFrame(list_of_values)
    df = df.T
    df.columns = X_train.columns
    
    X = df
    
    y_hat = clf.predict(X)
    
    if y_hat ==1:
        return 'REJECT AIRBNB APPLICATION'
    else:
        return 'ACCEPT AIRBNB APPLICATION'


In [22]:
app = dash.Dash(__name__, external_stylesheets=[dbc.themes.MINTY])

app.layout = html.Div([
    dbc.Container([
        dbc.Row([
            dbc.Col(html.H1("Model Predictions - Will a New Airbnb Be Highly Available?"), 
                    className="mb-2")
        ]),
        dbc.Row([
            dbc.Col(html.H6(children='Using the XGBoost model will help the city determine \
            whether a property will be non-owner occupied. Non owner-occupied units will \
            have their applications rejected.'), 
                    className="mb-4")
        ]),
        dbc.Row([
        dbc.Col(dbc.Card(html.H3(children='Permit Application Decision',
                                className="text-center text-light bg-primary"), body=True, color="primary")
                    , className="mb-4")
        ]),
        dbc.Row([
            dbc.Col(html.Div(["Property Type: ",
                             dcc.Dropdown(
                                 id='property-type',
                                 options=[{'label': i, 'value':i} for i in property_types],
                                 value = 'House')],style={'padding': 10}))
        ]),
        dbc.Row([
            dbc.Col(html.Div(["Room Type: ",
                             dcc.Dropdown(
                                 id='room-type',
                                 options=[{'label': i, 'value':i} for i in room_types],
                                 value = 'Private room')],style={'padding': 10}))
        ]),
        dbc.Row([
            dbc.Col(html.Div(["Neighborhood: ",
                             dcc.Dropdown(
                                 id='neighborhood',
                                 options=[{'label': i, 'value':i} for i in neighborhoods],
                                 value = 'Downtown')],style={'padding': 10}))
        ]),
        dbc.Row([
            dbc.Col(html.Div(["Is Instant Bookable: ",
                             dcc.Dropdown(
                                 id='instant-book',
                                 options=[{'label': 'Yes', 'value':1},
                                          {'label': 'No', 'value':0}],
                                 value = 1)],style={'padding': 10}))
        ]),
        dbc.Row([
            dbc.Col(html.Div(["Price: ",
                             dcc.Input(
                                 id='price-per-night',
                                 type='number',
                                 value=150)],style={'padding': 10}))
        ]),
        dbc.Row([
            dbc.Col(html.Div(["Accommodates: ",
                             dcc.Input(
                                 id='accom-number',
                                 type='number',
                                 value=4)],style={'padding': 10}))
        ]),
        dbc.Row([
            dbc.Col(html.Div(["Bathrooms: ",
                             dcc.Input(
                                 id='bath-number',
                                 type='number',
                                 value=2)],style={'padding': 10}))
        ]),
        dbc.Row([
            dbc.Col(html.Div(["Beds: ",
                             dcc.Input(
                                 id='bed-number',
                                 type='number',
                                 value=2)],style={'padding': 10}))
        ]),
#         html.Button(id='submit-button-state', n_clicks=0, children='Submit'),
        dbc.Row([
            dbc.Col(dbc.Card(children=[html.H3(id='output-result',
                                               className="text-center")], body=True, color="primary", inverse=True),
                   width={"size": 6, "offset": 3}, className="text-center")
        ]),
#         html.Div(id='output-result')
    ])
])

    

In [23]:
@app.callback(
    Output('output-result', 'children'),
#     Input('submit-button-state', 'n-clicks'),
    Input('property-type', 'value'),
    Input('room-type', 'value'),
    Input('neighborhood', 'value'),
    Input('instant-book', 'value'),
    Input('price-per-night', 'value'),
    Input('accom-number', 'value'),
    Input('bath-number', 'value'),
    Input('bed-number', 'value'))
def application_result(property_type, room_type, neighborhood, 
                       insta_book, price, accommodates, bathrooms, beds):
    prediction = make_prediction(xg_boost, property_type, room_type, neighborhood, price, 
                    accommodates,bathrooms, beds, insta_book)
    return prediction

In [24]:
if __name__ == '__main__':
    app.run_server(debug=False)

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
   Use a production WSGI server instead.
 * Debug mode: off


 * Running on http://127.0.0.1:8050/ (Press CTRL+C to quit)


# Index Dashboard

In [26]:

app = dash.Dash(external_stylesheets=[dbc.themes.MINTY])

# make a GitHub link navitem 
nav_item = dbc.NavItem(dbc.NavLink("GitHub", href="#"))

# make a reuseable dropdown for the different examples
dropdown = dbc.DropdownMenu(
    children=[
        dbc.DropdownMenuItem("EDA", href="/eda"),
        dbc.DropdownMenuItem("Classification Models", href="/models"),
        dbc.DropdownMenuItem("Prediction Generator", href="/prediction"),
    ],
    nav=True,
    in_navbar=True,
    label="Menu",
)


# this example that adds a logo to the navbar brand
navbar = dbc.Navbar(
    dbc.Container(
        [
            html.A(
                # Use row and col to control vertical alignment of logo / brand
                dbc.Row(
                    [
                        dbc.Col(html.Img(src="/assets/house.png", height="30px")),
                        dbc.Col(dbc.NavbarBrand("Nashville Housing and Airbnb Analysis", className="ml-2")),
                    ],
                    align="center",
                    no_gutters=True,
                ),
                href="https://plot.ly",
            ),
            dbc.NavbarToggler(id="navbar-toggler2"),
            dbc.Collapse(
                dbc.Nav(
                    [nav_item, dropdown], className="ml-auto", navbar=True
                ),
                id="navbar-collapse2",
                navbar=True,
            ),
        ]
    ),
    color="dark",
    dark=True,
    className="mb-5",
)


app.layout = html.Div([
    dcc.Location(id='url', refresh=False),
    navbar, html.Div(id='page-content')
])


# we use a callback to toggle the collapse on small screens
def toggle_navbar_collapse(n, is_open):
    if n:
        return not is_open
    return is_open


# the same function (toggle_navbar_collapse) is used in all three callbacks
for i in [2]:
    app.callback(
        Output(f"navbar-collapse{i}", "is_open"),
        [Input(f"navbar-toggler{i}", "n_clicks")],
        [State(f"navbar-collapse{i}", "is_open")],
    )(toggle_navbar_collapse)
    

@app.callback(Output('page-content', 'children'),
              [Input('url', 'pathname')])
def display_page(pathname):
    if pathname == '/models':
        return models.layout
    elif pathname == '/prediction':
        return prediction.layout
    else:
        return home.EDA
    
if __name__ == "__main__":
    app.run_server(debug=False)

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
   Use a production WSGI server instead.
 * Debug mode: off


 * Running on http://127.0.0.1:8050/ (Press CTRL+C to quit)


# Folium

In [30]:
m = folium.Map(location=[36.1627, -86.7816], zoom_start=11)
m

In [31]:
markers = current_listings[['latitude','longitude','id']][:500]

In [32]:
for i in range(0,len(markers)):
    folium.Marker(location=[markers.iloc[i]['latitude'], markers.iloc[i]['longitude']]).add_to(m)

m