# Chloropleth creator for RSNA Data (rental)

This script generates a chloropleth using the RSNA data from Zillow for each of the four cities, over each year. Render time is 3 minutes per frame, so it takes about half an hour to generate all the chloropleths.  
Because render time is so long, I export the images to an animated gif using the imageio library

In [28]:
import numpy as np
import pandas as pd
import os
from pathlib import Path
import json
import matplotlib.pyplot as plt
import plotly.express as px
import imageio
import warnings
warnings.filterwarnings('ignore')

In [29]:
#Read in the data from the raw Zillow CSV for ZALL
zrental_path = Path("../data/clean_data/RSNA.csv")
zrental_df = pd.read_csv(zrental_path, infer_datetime_format=True, parse_dates=True)

zrental_df["date"] = pd.to_datetime(zrental_df["date"])
zrental_df.head()

Unnamed: 0,date,region,avg_value,zipcode,city
0,2014-02-28,75201; TX; Dallas-Fort Worth-Arlington; Dallas...,1727,75201,Dallas
1,2014-05-31,75201; TX; Dallas-Fort Worth-Arlington; Dallas...,2013,75201,Dallas
2,2014-08-31,75201; TX; Dallas-Fort Worth-Arlington; Dallas...,1902,75201,Dallas
3,2014-09-30,75201; TX; Dallas-Fort Worth-Arlington; Dallas...,1907,75201,Dallas
4,2014-10-31,75201; TX; Dallas-Fort Worth-Arlington; Dallas...,1873,75201,Dallas


In [30]:
#Read in the salary data from the IRS csv
salary_df = pd.read_csv("../data/clean_data/cnc_sal_data.csv", infer_datetime_format= True, parse_dates= True)
salary_df["Avg_AGI"] = salary_df["AGI"] / salary_df["NO. OF RETURNS"] * 1000
salary_df = salary_df.dropna()
salary_df.head()

Unnamed: 0,YEAR,ZIP CODE,NO. OF RETURNS,AGI,Avg_AGI
0,2007,73301,1798,7230,4021.134594
1,2007,73949,43,1915,44534.883721
2,2007,75001,7128,693056,97230.078563
3,2007,75002,26669,2002247,75077.693202
4,2007,75006,21953,1053748,48000.182207


In [31]:
#Clean up the salary Dataframe because we no longer need a couple of columns
salary_df = salary_df.drop(columns=["AGI","NO. OF RETURNS"])
salary_df.head()

Unnamed: 0,YEAR,ZIP CODE,Avg_AGI
0,2007,73301,4021.134594
1,2007,73949,44534.883721
2,2007,75001,97230.078563
3,2007,75002,75077.693202
4,2007,75006,48000.182207


In [32]:
#Clean up the Zillow CSV to Group By Year to get average house value by zip code by year, then rename the columns to match the salary data
zrental_year_df = zrental_df.groupby(["zipcode","city",zrental_df["date"].dt.year]).mean()
zrental_year_df = zrental_year_df.reset_index()
zrental_year_df = zrental_year_df.rename(columns={"date" : "YEAR", "zipcode" : "ZIP CODE", "city":"CITY"})
zrental_year_df.head()

Unnamed: 0,ZIP CODE,CITY,YEAR,avg_value
0,75201,Dallas,2014,1886.166667
1,75201,Dallas,2015,1914.333333
2,75201,Dallas,2016,1919.454545
3,75201,Dallas,2017,1889.833333
4,75201,Dallas,2018,1884.583333


In [33]:
#Merge the Zillow data and the IRS data by year and zip code, then create a column for Opportunity Ratio
combined_df = pd.merge(zrental_year_df, salary_df, on=['YEAR', 'ZIP CODE'])
combined_df["OP_RATIO"] = combined_df['Avg_AGI'] / combined_df["avg_value"]
combined_df.head()

Unnamed: 0,ZIP CODE,CITY,YEAR,avg_value,Avg_AGI,OP_RATIO
0,75201,Dallas,2014,1886.166667,271637.992376,144.015901
1,75201,Dallas,2015,1914.333333,299883.371824,156.651596
2,75201,Dallas,2016,1919.454545,254543.944266,132.612645
3,75201,Dallas,2017,1889.833333,413424.3,218.762307
4,75201,Dallas,2018,1884.583333,289949.903475,153.853586


In [34]:
#Import the TX zipcode GeoGSON file
with open('../data/tx_texas_zip_codes_geo.min.json') as f:
    tx_zip = json.load(f)

In [20]:
for year in range(2014,2019):
    houston_df = combined_df[(combined_df["CITY"] == "Houston") & (combined_df["YEAR"]==year)]
    austin_df = combined_df[(combined_df["CITY"] == "Austin") & (combined_df["YEAR"]==year)]
    sa_df = combined_df[(combined_df["CITY"] == "San Antonio") & (combined_df["YEAR"]==year)]
    dfw_df = combined_df[((combined_df["CITY"] == "Dallas") | (combined_df["CITY"] == "Fort Worth")) & (combined_df["YEAR"]==year)]

    houston_fig = px.choropleth(houston_df, geojson=tx_zip, locations='ZIP CODE', color='OP_RATIO',
                    featureidkey="properties.ZCTA5CE10",
                    color_continuous_scale="Viridis",
                    range_color=(0.3,0.9),
                    scope="usa",
                    labels={'OP_RATIO':'Opportunity Ratio'},
                    title = f"Opportunity Ratio for Houston in {year} Using Rental Data"
                    )
    houston_fig.update_geos(fitbounds="locations", visible=False)
    type(houston_fig) # prevents fig from printing to notebook and blowing up notebook size
    houston_fig.write_image(f"images/rental_rsna/houston_{year}.png", engine="kaleido")

    austin_fig = px.choropleth(austin_df, geojson=tx_zip, locations='ZIP CODE', color='OP_RATIO',
                    featureidkey="properties.ZCTA5CE10",
                    color_continuous_scale="Viridis",
                    range_color=(0.3,0.9),
                    scope="usa",
                    labels={'OP_RATIO':'Opportunity Ratio'},
                    title = f"Opportunity Ratio for Austin in {year} using Rental Data"
                    )
    austin_fig.update_geos(fitbounds="locations", visible=False)
    type(austin_fig) # prevents fig from printing to notebook and blowing up notebook size
    austin_fig.write_image(f"images/rental_rsna/austin_{year}.png", engine="kaleido")
    
    sa_fig = px.choropleth(sa_df, geojson=tx_zip, locations='ZIP CODE', color='OP_RATIO',
                    featureidkey="properties.ZCTA5CE10",
                    color_continuous_scale="Viridis",
                    range_color=(0.3,0.9),
                    scope="usa",
                    labels={'OP_RATIO':'Opportunity Ratio'},
                    title = f"Opportunity Ratio for San Antonio in {year} using Rental Data"
                    )
    sa_fig.update_geos(fitbounds="locations", visible=False)
    type(sa_fig) # prevents fig from printing to notebook and blowing up notebook size
    sa_fig.write_image(f"images/rental_rsna/sa_{year}.png", engine="kaleido")
    
    dfw_fig = px.choropleth(dfw_df, geojson=tx_zip, locations='ZIP CODE', color='OP_RATIO',
                    featureidkey="properties.ZCTA5CE10",
                    color_continuous_scale="Viridis",
                    range_color=(0.3,0.9),
                    scope="usa",
                    labels={'OP_RATIO':'Opportunity Ratio'},
                    title = f"Opportunity Ratio for Dallas-Fort Worth in {year} using Rental Data"
                    )
    dfw_fig.update_geos(fitbounds="locations", visible=False)
    type(dfw_fig) # prevents fig from printing to notebook and blowing up notebook size
    dfw_fig.write_image(f"images/rental_rsna/dfw_{year}.png", engine="kaleido")
    
    print(year)

2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018


In [21]:
houston_images=[]
austin_images=[]
sa_images=[]
dfw_images=[]
for year in range(2007,2019):
    houston_images.append(imageio.imread(f'images/condo_zcon/austin_{year}.png'))
    austin_images.append(imageio.imread(f'images/condo_zcon/austin_{year}.png'))
    sa_images.append(imageio.imread(f'images/condo_zcon/sa_{year}.png'))
    dfw_images.append(imageio.imread(f'images/condo_zcon/dfw_{year}.png'))
imageio.mimsave('images/condo_zcon/austin.gif', austin_images, fps=1)
imageio.mimsave('images/condo_zcon/sa.gif', sa_images, fps=1)
imageio.mimsave('images/condo_zcon/dfw.gif', dfw_images, fps=1)