In [18]:
#!/usr/bin/python
# import required modules and load the dataset

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.basemap import Basemap
from matplotlib import cm
from matplotlib.patches import Polygon
from matplotlib.colors import rgb2hex
import math
import json

### The data file is from wonder.cdc.gov 
with open('../Climate_data/heat_wave_days.csv','r') as f:
    df_data = pd.read_csv(f)
### pandas will have converted the County Code 5-digit character string to int, so:
df_data['CC'] = df_data['County Code'].apply(lambda x: "%05d" % (x))
df_data.drop('County Code', axis = 1, inplace = True)
df_data = df_data.rename(columns = {'CC':'County Code'})

In [19]:
OUTFILE = "yearly_average_heat_wave_days.csv"
# calculate average by year:
df_yearly_mean = df_data.groupby('Year')['N_days'].mean().to_frame()
df_yearly_mean.reset_index(level='Year', inplace=True)

df_yearly_mean['RM'] = df_yearly_mean['N_days'].rolling(window = 5, center = True).mean()
with open("yearly_average_heat_wave_days.csv", 'w') as f:
    df_yearly_mean.to_csv(f, index=False)
    


In [48]:
# create and write out color data for mapping in javascript code
# desired color is normalized on range of minimum and maximum of 5-year county means
# across entire dataset.
# create 6 subsets based on date range
five_yr_avg = pd.DataFrame()
base_year = 1981
max_days = 0.
min_days = 153. #total number of days in data period
for idx in range(6):
    ymin = base_year + idx * 5
    ymax = ymin + 4
    df = df_data[(df_data['Year'] >= ymin) & (df_data['Year'] <= ymax)]
    df = df.groupby('County Code')['N_days'].mean().to_frame()
    df.reset_index(level = 'County Code', inplace = True)
    # rounding here to closest integer value to facilitate lookup in color
    # map array for map display
    df['N_days'] = np.round(df['N_days']).astype(int)
    if df['N_days'].min() < min_days:
        min_days = df['N_days'].min()
    if df['N_days'].max() > max_days:
        max_days = df['N_days'].max()
    if five_yr_avg.empty:
        five_yr_avg = df
    else:
        five_yr_avg = five_yr_avg.merge(df, on = 'County Code')
# min_days and max_days are now the minimum and maximum of all county-level 5-year averages
# use matplotlib to create a color map on the range max_days for use by geojson map
cmap = plt.cm.Oranges

# did this 2 ways: making a dataframe with columns for number of days and color mapping,
# and a dictionary exported as json
n_days_list = pd.Series(data = range(max_days +1), name = 'N_days')    
color_map = pd.Series([rgb2hex(cmap((float(x))/max_days)[:3]) for x in n_days_list], name = 'Color')
n_days_list = n_days_list.to_frame()
color_map = color_map.to_frame()
df_cmap = n_days_list.merge(color_map, left_index = True, right_index = True)

c_map_dict = {}
for n in range(max_days + 1):
    c_map_dict[n] = rgb2hex(cmap((float(n))/max_days)[:3])
    
# reconfigure the dataframe with the 5-year averages for each epoch so the json
# structure will be optimal
five_yr_avg.columns = ['County Code', '0', '1', '2', '3', '4', '5']
five_yr_avg.set_index('County Code', inplace = True)
five_yr_avg.to_json('five_yr_avg.json')

with open('color_map.csv', 'w') as f:
    df_cmap.to_csv(f, index = False)

with open('color_map.json', 'w') as f:
    json.dump(c_map_dict, f)