In [None]:
# initial imports
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import hvplot.pandas
import plotly.express as px
from pathlib import Path
from dotenv import load_dotenv


%matplotlib inline

In [None]:
# Set up API credentials
# Read the Mapbox API key
load_dotenv()
map_box_api = os.getenv("mapbox")

# Set the Mapbox API
px.set_mapbox_access_token(map_box_api)

In [None]:
# Read the census data into a Pandas DataFrame
file_path = Path("sfo_neighborhoods_census_data.csv")
sfo_data = pd.read_csv(file_path, index_col="year")
sfo_data.head()

In [None]:
# Calculate the mean number of housing units per year (hint: use groupby) 
figHousingUnits = sfo_data['housing_units'].groupby('year').mean()
figHousingUnits

In [None]:
# Use the Pandas plot function to plot the average housing units per year.
# Note: You will need to manually adjust the y limit of the chart using the min and max values from above.
minUnits = min(figHousingUnits)
maxUnits = max(figHousingUnits)
stdUnits = figHousingUnits.std()

# Optional Challenge: Use the min, max, and std to scale the y limits of the chart
plt.bar(
    figHousingUnits.index,
    figHousingUnits,
)
plt.ylim(
    (minUnits - stdUnits),
    (maxUnits + stdUnits)
)
plt.suptitle(
    'Housing Units in San Francisco from 2010 to 2016',
    weight='bold'
)
plt.xlabel('Year')
plt.ylabel('Housing Units')
plt.show()
plt.close()

In [None]:
# Calculate the average gross rent and average sale price per square foot
salesRent = sfo_data[['sale_price_sqr_foot', 'gross_rent']].groupby('year').mean()
salesRent

In [None]:
# Plot the Average Gross Rent per Year as a Line Chart 
plt.plot(
    salesRent.index, 
    salesRent['gross_rent']
)
plt.xlim(
    salesRent.index[0],
    salesRent.index[-1]
)
plt.suptitle(
    'Average Gross Rent in San Francisco',
    weight='bold'
)
plt.xlabel('Year')
plt.ylabel('Gross Rent')
plt.show()
plt.close()


In [None]:
# Plot the Average Sales Price per Year as a line chart
plt.plot(salesRent.index, salesRent['sale_price_sqr_foot'])
plt.xlim(salesRent.index[0],salesRent.index[-1])
plt.suptitle('Average Sale Price per Square Foot in San Francisco', weight='bold')
plt.xlabel('Year')
plt.ylabel('Avg. Sale Price')
plt.show()
plt.close()


In [None]:
# Group by year and neighborhood and then create a new dataframe of the mean values
sfoDF = sfo_data.groupby(['year', 'neighborhood']).mean()
sfoDF.head(10)

In [None]:
# Use hvplot to create an interactive line chart of the average price per sq ft.
# The plot should have a dropdown selector for the neighborhood
sfoDF['sale_price_sqr_foot'].hvplot(
    xlabel='year',
    ylabel='average sale per square foot',
    label=' ',
    groupby='neighborhood',
    height=300,
    width=450
)

In [None]:
# Getting the data from the top 10 expensive neighborhoods
mostExpensive = sfo_data.groupby('neighborhood').mean().sort_values(ascending=False, by='sale_price_sqr_foot')

In [None]:
# Plotting the data from the top 10 expensive neighborhoods
mostExpensive['sale_price_sqr_foot'].hvplot.bar(
    ylabel='average sale price per square foot',
    xlabel='neighborhood',
    label='top 10 most expensive neighborhoods',
    rot=40,
    height=400,
    width=650,
)

In [None]:
# Parallel Coordinates Plot
px.parallel_coordinates(
    mostExpensive.head(10),
    color='sale_price_sqr_foot'
)


In [None]:
# Parallel Categories Plot
parallel_df = mostExpensive.reset_index()
parallel_df = parallel_df.round(decimals=2)
px.parallel_categories(
    parallel_df.head(5),
    color='sale_price_sqr_foot'
)

In [None]:

# Load neighborhoods coordinates data
file_path = Path("neighborhoods_coordinates.csv")
df_neighborhood_locations = pd.read_csv(file_path)
df_neighborhood_locations.head()

In [None]:
# Calculate the mean values for each neighborhood
allNeighborhoods = sfo_data.groupby(by="neighborhood").mean()
allNeighborhoods.reset_index(inplace=True)
allNeighborhoods.rename(columns={"neighborhood": "Neighborhood"}, inplace=True)
allNeighborhoods.head()


In [None]:
# Join the average values with the neighborhood locations
allNeighborhoods = pd.merge(
    df_neighborhood_locations, allNeighborhoods, on="Neighborhood"
)
allNeighborhoods.head()

In [None]:
# Create a scatter mapbox to analyze neighborhood info
px.set_mapbox_access_token(mapbox_token)
map = px.scatter_mapbox(
    allNeighborhoods,
    lat="Lat",
    lon="Lon",
    size="sale_price_sqr_foot",
    color="gross_rent",
    size_max=15,
    zoom=11,
    hover_name="Neighborhood",
    title="Averange Sale Price Per Square Foot and Gross Rent in San Francisco",
 
    
)
map.show()