### Nika's notebook for  final submission: 
- **Includes Visuals for Affordability and Visuals for Monte Carlo**

In [39]:
# Import statements
import numpy as np
import pandas as pd
from pathlib import Path
%matplotlib inline

#visualizations import statements. Note that not everything was used for the final output
import panel as pn
pn.extension('plotly')
import plotly.express as px
import hvplot.pandas
import matplotlib.pyplot as plt
import holoviews as hv
import os
from dotenv import load_dotenv

In [40]:
#Reading csv files for the cleaned data

combined_csv = Path("../Data/Clean/combined_df.csv")
combined_df = pd.read_csv(combined_csv)

### Plots for housing affordability

In [41]:
#plot for affordability bar chart (used in presentation)

def affordability_bar_func():

    affordability_df=combined_df.sort_values("Affordability Rent/Median Inc", ascending=False).dropna()

    affordability_bar = px.bar(affordability_df,
                       x="MSA",
                       y="Affordability Rent/Median Inc",
                       color="Density",
                       color_continuous_scale='dense',
                       title= "<b>Percentage of Income spent on Rent</b>",
                       height=600
                       )

    affordability_bar .update_layout(
        xaxis_tickfont_size=11,
        xaxis = dict(
            tickmode = 'linear',
            dtick = 1,
        ),
        yaxis= dict(
        tickformat=',.0%',
        range= [0,.45],
        title=''
        )
    )                        
               
    return affordability_bar
                                 
affordability_bar_func() #this line is just for show in this notebook but the above function should be used for the dashboard

In [42]:
#plot for affordability scatter with trendline (used in presentation)

def affordability_scatter_func():

    affordability_df=combined_df.sort_values("Affordability Rent/Median Inc", ascending=False).dropna()

    affordability_scatter = px.scatter(affordability_df,
                       x='Median income (dollars)',
                       y='2019 Annualized Rent',
                       color='Density',   
                       width=600,
                       height=500,
                       trendline="ols",
                       trendline_color_override="blue",
                       title= "<b>US cities: Rent, Median Income and Population Density</b>",
                       hover_name="MSA",
                       color_continuous_scale='dense',
                       hover_data=["Affordability Rent/Median Inc"])

    affordability_scatter .update_traces(marker=dict(size=12))
               
    return affordability_scatter
                                 
affordability_scatter_func() #this line is just for show in this notebook but the above function should be used for the dashboard

### Boxplot for Monte Carlo

In [43]:
# Import the CSVs to Pandas DataFrames
mcoutput_path = Path("../Data/mc_output.csv")
mc_data = pd.read_csv(mcoutput_path)

In [44]:
# Plot for monte carlo 1,3,5 year returns (used in presentation)

def mc_boxplot_func():
    
    mc_df = pd.DataFrame(data=mc_data)
    
    mc_boxplot = px.box(mc_df, title="<b>Monte Carlo Forecast on Sale Prices</b>",
                        y=["1 Year Avg Return","3 Year Avg Return", "5 Year Avg Return"],
                        width=600,
                        height=500
                       )

    mc_boxplot.update_layout(
        yaxis= dict(
        tickformat=',.0%',
        range= [-0.03,.12],
        title='',
        dtick=.01
        ),
        xaxis= dict(
        title='',
        )
    )  
    
    return mc_boxplot

mc_boxplot_func() #this line is just for show in this notebook but the above function should be used for the dashboard

### Other plots that may or may not be used in final dashboard

In [46]:
# Parallel Categories Plot

def parallelcoord_func():


    # Pulling a subset of data from combined df
    # Min Population - leave equal to 0 if no conditions
    min_population=0
    columns_needed=['MSA',
                    'Murder Rate',
                    'Median income (dollars)',
                    'Capitalization Rate Last',
                    'Density',
                    '5 Year Avg Return',
                    'Sale Price Last',
                    'Monthly Rent Last',
                    ]
    data_frame=combined_df[combined_df['Population']>min_population]

    # data_frame.set_index('MSA', inplace=True)
    
    paralleldata_frame=data_frame[columns_needed].dropna(subset=columns_needed)



    plotparallelcoord=px.parallel_coordinates(
            paralleldata_frame,
            labels={"Median income (dollars)": "Median Income",
                    "Capitalization Rate Last": "Cap Rate",
                    "5 Year Avg Return": "Forecast Return (5yr)",
                    "% Sale Price Change Since Feb 2019": "Sale Price Change", 
                    "% Rent Change Since Feb 2019": "Rent Price Change"},
            color="Median income (dollars)",
            title="Relationships between various real estate variables")

    return plotparallelcoord

parallelcoord_func()