#Setup

In [None]:
!pip install pandas
!pip install rtree, pygeos, fiona, pyproj, gdal, shapely, basemap
!pip install geopandas
!pip install networkx

# Install packages
!pip install folium
!pip install sodapy -q
!pip install adjustText
!pip install plotly --upgrade
!pip install contextily
!pip install mapclassify
!pip install chart-studio

In [None]:
#hosted runtime

from google.colab import drive
drive.mount('/content/drive', force_remount=True)

drivepath = '/content/drive/My Drive/Columbia/3 Fall 22/RA/Detroit/'

In [None]:
#local runtime

#drivepath = '/Users/kirthi/kb3185@columbia.edu - Google Drive/My Drive/Columbia/3 Fall 22/RA/Detroit'

In [None]:
import warnings
import os
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd
from shapely.geometry import shape
from shapely.geometry import Point


from urllib.parse import urlencode
import urllib.request, json 

import networkx as nx

import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline 
import random
from random import randint

import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.offline as py
import plotly.express as px
from sklearn.preprocessing import minmax_scale
py.init_notebook_mode(connected=False)

In [None]:
import geopandas as gpd
from pyproj import CRS
from shapely import wkt

In [None]:
# import more packages
import plotly.io as pio
pio.renderers.default = "colab"
access_token = 'pk.eyJ1IjoiYmtpcnRoaSIsImEiOiJja3VpdzFiMnoycnYyMzBtYXM1aW1uemd0In0.knHw7eYrN8DvrrBHZugW8Q'
px.set_mapbox_access_token(access_token) # for interactive plotly viz

import plotly.graph_objects as go
import chart_studio.plotly as py
import chart_studio.tools as tls

#Data Cleaning

In [None]:
gpd_path = drivepath + '/Data/ZCTA Boundaries/tl_2021_us_zcta520/tl_2021_us_zcta520.shp'
gdf = gpd.read_file(gpd_path)
gdf = gdf[['ZCTA5CE20','ALAND20','AWATER20','INTPTLAT20','INTPTLON20','geometry']]

In [None]:
def CleanTables(year):
  filename = 'ZCTA Census Data/Raw Tables/ACSDP5Y'+ str(year) +'.DP05-Data.csv'
  path = os.path.join(drivepath,'Data/',filename)

  df = pd.read_csv(path)

  a = ['GEO_ID','NAME']
  for i in range(df.shape[1]):
    if (len(df.columns[i])<=10) & (str(df.columns[i][-1])!='M'):
      try:
        int(df.columns[i][-4:-2])
      except:
        continue
      else:      
        if int(df.columns[i][-3:-1]) < 65:
          a.append(df.columns[i])

  df = df[a]

  i = 2
  for i in range(df.shape[1]):
    if df.iloc[:1,i][0][:10]=='Estimate!!':
      df.iloc[:1,i][0] = df.iloc[:1,i][0][10:]
    i =+ 1


  df_dict = df.iloc[0].to_dict()
  del df_dict['GEO_ID']
  del df_dict['NAME']
  df.rename(columns=df_dict, inplace = True)
  df = df.drop(0).reset_index().drop('index',axis=1)
  df[['DROP', 'ZCTA5CE20']] = df['NAME'].str.split(' ', 1, expand=True)
  df['NAME'] = df['ZCTA5CE20']
  df.drop(columns=['ZCTA5CE20','DROP','GEO_ID'],axis=1,inplace=True)
  df.rename(columns={"NAME": "ZCTA5CE20"}, inplace=True)
  df = df.merge(gdf, on='ZCTA5CE20')

  # Geopandas GeoDataFrame
  #df = gpd.GeoDataFrame(df, geometry='geometry')

  return df

In [None]:
for i in range(2011,2021):
  filename = 'Outputs/ZCTA Census Data/'+ str(i) + '_Census.csv'
  path = drivepath+filename
  #CleanTables(i).to_csv(path)
  print(i)

#Plotting

In [None]:
def GetCol(col_name,year):
  df = CleanTables(year)
  df = gpd.GeoDataFrame(df, crs='EPSG:4326', geometry= 'geometry')
  df = df[['ZCTA5CE20',col_name,'geometry']]
  df = df.set_index('ZCTA5CE20')
  df.rename(columns={col_name:'Median_Age'},inplace = True)
  df['Median_Age'] = pd.to_numeric(df['Median_Age'] , errors='coerce').astype(float)
  df['Year'] = year
  df.dropna(inplace=True)
  return df

In [None]:
med_age_df = GetCol('SEX AND AGE!!Median age (years)',2011)

for i in range(2012,2021):
  try:
    temp_df = GetCol('SEX AND AGE!!Total population!!Median age (years)',i)
  except:
    temp_df = GetCol('SEX AND AGE!!Median age (years)',i)

  med_age_df = pd.concat((med_age_df,temp_df), ignore_index = True)
  #CleanTables(i).to_csv(path)
  print(i)

In [None]:
filename = 'Outputs/ZCTA Census Data/med_age_df.csv'
path = drivepath+filename
med_age_df.to_csv(path)

In [None]:
fig = px.choropleth_mapbox(med_age_df, geojson=med_age_df.geometry,
                        locations=med_age_df.index,
                        color="Median_Age",
                        animation_frame="Year",
                        center=dict(lat=45.070430, lon=-84.653517), zoom=6,
                        mapbox_style="dark",
                        opacity=0.6,
                        color_continuous_scale="Plasma",
                        labels={"Median_Age": "Median age (years)"},
                       )

fig.update_layout(autosize=False,
                 margin = dict(l = 0, r = 0, t = 0, b = 0),
                 paper_bgcolor ='rgba(0,0,0,0)',
                 width=1200,
                 height=1200,
                 font_color= '#ffffff'
                 )

fig.show(renderer="colab")

In [None]:
df_path = drivepath + '/Outputs/Plots/med_age_df.html'
fig.write_html(df_path, full_html=False, include_plotlyjs='cdn')

#Ignore

In [None]:
df_2020 = GetCol('SEX AND AGE!!Total population!!Median age (years)',2020)

In [None]:
fig = px.choropleth_mapbox(df_2020, geojson=df_2020.geometry,
                        locations=df_2020.index,
                        color="Median_Age",
                        center=dict(lat=45.070430, lon=-84.653517), zoom=5,
                        mapbox_style="dark",
                        opacity=0.6,
                        color_continuous_scale="Plasma",
                        labels={"Median_Age": "Median age (years)"},
                       )

fig.update_layout(autosize=False,
                 margin = dict(l = 0, r = 0, t = 0, b = 0),
                 paper_bgcolor ='rgba(0,0,0,0)',
                 width=800,
                 height=500,
                 font_color= '#ffffff'
                 )

fig.show(renderer="colab")