# Data Viz
### Purpose
The purpose of this notebook is to create some data visualizations to be used in the project sponser update.

### Author: 
Ian Davis
### Date: 
2020-09-05
### Update Date: 
2020-09-05

### Inputs 
1.3-rec-connecting-fips-ecosystem-data.csv - Comma separate file of the Christmas Bird Count and matches to 1 or more NOAA weather stations.
- Data Dictonary can be found here: http://www.audubon.org/sites/default/files/documents/cbc_report_field_definitions_2013.pdf

### Output Files
Supporting data files:
- 1.3.1-ijd-circles_and_fips.csv
- 2.1-ijd-df_ts_circle_tmax.csv
- 2.1-ijd-df_ts_noaa_tmax.csv

Plot html's:
- choropleth_ff_2010.html
- choropleth_json_2010.html
- circles.html
- distance.html
- elevation.html
- matches.html
- ts-map-anim_tmax.html
- ts-map_tmax.html
- ts_tmax.html

## Steps or Proceedures in the notebook 
- Set runtime options
- Import data
- Plots
    - Histogram of station distances

## References
- Figure Factory: # https://stackoverflow.com/questions/54734667/error-installing-geopandas-a-gdal-api-version-must-be-specified-in-anaconda
- FIPS query: https://gis.stackexchange.com/questions/294641/python-code-for-transforming-lat-long-into-fips-codes
- FIPS query: https://geo.fcc.gov/api/census/#!/block/get_block_find
- Geojson: https://geoffboeing.com/2015/10/exporting-python-data-geojson/
- Installing geopandas: https://stackoverflow.com/questions/54734667/error-installing-geopandas-a-gdal-api-version-must-be-specified-in-anaconda
- Colorscale: https://plotly.com/python/county-choropleth/
- Colors: http://www.impactlab.org/map/#usmeas=absolute&usyear=1981-2010&gmeas=absolute&gyear=1986-2005

### See data dictionary: 

http://www.audubon.org/sites/default/files/documents/cbc_report_field_definitions_2013.pdf

In [1]:
import numpy as np
import pandas as pd
import datetime
from scipy import stats
import sys
import gzip
import shutil

# for plotting
import plotly.express as px
import plotly.offline as ply
import plotly.graph_objects as go
import plotly.figure_factory as ff
import matplotlib

# for GIS
import requests
import urllib
from urllib.request import urlopen
import json

# add scripts folder to path
sys.path.insert(1, '../scripts')

# user import
from calcs import main_calcs
from calcs import haversine_formula

In [2]:
# File Paths
PATH_TO_PAIRED_DATA = '../data/Cloud_data/1.3-rec-connecting-fips-ecosystem-data.txt'
PATH_TO_FIPS = "../data/Cloud_data/1.3.1-ijd-circles_and_fips.csv"
PATH_TO_TS_CIRCLE = "../data/Cloud_data/2.1-ijd-df_ts_circle_tmax.csv"
PATH_TO_TS_NOAA = "../data/Cloud_data/2.1-ijd-df_ts_noaa_tmax.csv"

# User Options
offline_fips = True    # Get FIPS codes from offline .csv file
offline_ts = True      # Read in time-series dataframes from .csv files
popen = True           # Auto-open plot HTML files when generated

## Functions

In [3]:
# Function to query and get FIPS county codes.
def get_fips(lat, lon):
    # Encode parameters
    params = urllib.parse.urlencode({'latitude': lat, 'longitude': lon, 'format': 'json'})
    # Contruct request URL
    url = 'https://geo.fcc.gov/api/census/block/find?' + params

    # Get response from API
    response = requests.get(url)

    # Parse json in response
    data = response.json()

    fips = data['County']['FIPS']

    print(lat, lon, fips)

    try:
        return str(fips)
    except:
        return ''

In [4]:
# Convert dataframe to geojson (UN-USED BELOW)
def df_to_geojson(df, properties, lat='latitude', lon='longitude'):
    geojson = {'type':'FeatureCollection', 'features':[]}
    for _, row in df.iterrows():
        feature = {'type':'Feature',
                   'properties':{},
                   'geometry':{'type':'Point',
                               'coordinates':[]}}
        feature['geometry']['coordinates'] = [row[lon],row[lat]]
        for prop in properties:
            feature['properties'][prop] = row[prop]
        geojson['features'].append(feature)
    return geojson

## Read in Data

In [5]:
# Select subset of columns to read in
# IJD: I was running into memory issues loading the whole dataset
# Only read in certain columns
fields = ['circle_name',
          'circle_id',
          'id',
          'ui',
          'lat',              # circle
          'lon',              # circle
          'county_fips',      # circle
          'country_state',
          'count_year',
          'count_date',
          'latitude',         # noaa
          'longitude',        # noaa
          'circle_elev',      # circle
          'elevation',        # noaa
          'min_snow',         # circle
          'max_snow',         # circle
          'am_rain',          # circle
          'pm_rain',          # circle
          'max_temp',         # circle
          'min_temp',         # circle
          'temp_max_value',   # noaa
          'temp_min_value'    # noaa
          ]

In [6]:
# read in data
df_paired = pd.read_csv(PATH_TO_PAIRED_DATA,
                        compression='gzip',
                        sep='\t',
                        skipinitialspace=True,
                        usecols=fields)


Columns (62) have mixed types.Specify dtype option on import or set low_memory=False.



# Data Screening & Filtering

In [7]:
# Copied in some calculations from calcs.py
for chunk in np.array_split(df_paired, 4):
    df_paired.loc[chunk.index, 'distance'] = df_paired.loc[chunk.index, ['lat', 'lon', 'latitude', 'longitude']].apply(haversine_formula, axis=1)

    # calculate elevation difference between circles and stations
    df_paired.loc[chunk.index, 'elev_diff'] = df_paired.loc[chunk.index, 'circle_elev'] - df_paired.loc[chunk.index, 'elevation']
    df_paired.loc[chunk.index, 'elev_diff'] = df_paired.loc[chunk.index, 'elev_diff'].abs()
    
    # Convert NOAA temperatures from a tenth of a degree to degrees
    df_paired.loc[:, 'noaa_tmax_value'] = df_paired.loc[:, 'temp_max_value'] / 10.0 * 1.8 + 32.0
    df_paired.loc[:, 'noaa_tmin_value'] = df_paired.loc[:, 'temp_min_value'] / 10.0 * 1.8 + 32.0

    # Remove temperature errors
    df_paired.loc[df_paired['max_temp'] > 150.0, 'max_temp'] = np.nan
    df_paired.loc[df_paired['noaa_tmax_value'] > 150.0, 'noaa_tmax_value'] = np.nan

In [8]:
# Create a Temportary String to Merge on
df_paired['temp_key_str'] = round(df_paired['lat'],3).astype(str) + round(df_paired['lon'],3).astype(str)
print("The number of unique Lat Lon combos in the dataset is: ")
df_paired['temp_key_str'].nunique()

df_circle = df_paired[["lat", "lon", "temp_key_str", "circle_name", "county_fips"]]
print(df_circle.shape)
df_circle = df_circle.drop_duplicates("temp_key_str")
print(df_circle.shape)

The number of unique Lat Lon combos in the dataset is: 
(756378, 5)
(3848, 5)


In [9]:
# Make sure all state strings are uppercase
df_paired.loc[df_paired['country_state'] == 'us-fl', 'country_state'] = 'US-FL'
df_paired.loc[df_paired['country_state'] == 'us-ma', 'country_state'] = 'US-MA'
df_paired.loc[df_paired['country_state'] == 'us-wi', 'country_state'] = 'US-WI'
df_paired.loc[df_paired['country_state'] == 'us-mn', 'country_state'] = 'US-MN'

df_paired['country_state'].unique()

array(['US-HI', 'US-FL', 'US-TX', 'US-LA', 'US-AL', 'US-MS', 'US-GA',
       'US-AZ', 'US-NM', 'US-SC', 'US-CA', 'US-AR', 'US-OK', 'US-NC',
       'US-TN', 'US-NV', 'US-MO', 'US-VA', 'US-KY', 'US-UT', 'US-KS',
       'US-IL', 'US-CO', 'US-WV', 'US-IN', 'US-MD', 'US-DE', 'US-OH',
       'US-DC', 'US-NJ', 'US-PA', 'US-NE', 'US-NY', 'US-IA', 'US-CT',
       'US-WY', 'US-RI', 'US-MA', 'US-MI', 'US-ID', 'US-OR', 'US-WI',
       'US-SD', 'US-NH', 'US-VT', 'US-ME', 'US-MN', 'US-MT', 'US-WA',
       'US-ND', 'US-AK'], dtype=object)

In [10]:
len(df_paired['country_state'].unique())

51

In [11]:
# Create states only column
df_paired.loc[:, 'state'] = df_paired.loc[:, 'country_state'].apply(lambda x: x.lstrip('US'))
df_paired.loc[:, 'state'] = df_paired.loc[:, 'state'].apply(lambda x: x.lstrip('-'))

In [12]:
print(df_paired.loc[:, 'state'].unique())
print(len(df_paired.loc[:, 'state'].unique())) 

['HI' 'FL' 'TX' 'LA' 'AL' 'MS' 'GA' 'AZ' 'NM' 'SC' 'CA' 'AR' 'OK' 'NC'
 'TN' 'NV' 'MO' 'VA' 'KY' 'UT' 'KS' 'IL' 'CO' 'WV' 'IN' 'MD' 'DE' 'OH'
 'DC' 'NJ' 'PA' 'NE' 'NY' 'IA' 'CT' 'WY' 'RI' 'MA' 'MI' 'ID' 'OR' 'WI'
 'SD' 'NH' 'VT' 'ME' 'MN' 'MT' 'WA' 'ND' 'AK']
51


### Get County FIPS codes

In [None]:
# Get County FIPS codes (already ran this and saved the output to a .csv file)

if not offline_fips:
    df_circle.loc[:, 'county_fips'] = df_circle.loc[:, ['lat', 'lon']].apply(lambda x: get_fips(x['lat'], x['lon']), axis=1)
    # Save to CSV
    df_circle.to_csv(PATH_TO_FIPS, index=False)
else:
    df_circle = pd.read_csv(PATH_TO_FIPS,
                            dtype={'lat': float,
                                   'lon': float,
                                   'temp_key_str': str,
                                   'circle_name': str,
                                   'county_fips': str})

# Drop existing FIPS column
df_paired.drop('county_fips', axis=1, inplace=True)
# Merge with original dataset
df_paired = pd.merge(df_paired,
                     df_circle[['temp_key_str', 'county_fips']],
                     on=['temp_key_str'],
                     how='left',
                     copy=False
                     )

# Convert FIPS errors to nan's
df_paired.loc[df_paired['county_fips'] == 'None'] = np.nan
df_paired.loc[df_paired['county_fips'] == ''] = np.nan

# Sanity check (should be less that 2000 NaN's for county_fips
print('The number of county_fips Nan\'s is:')
print(df_paired['county_fips'].isna().sum())

## Make Plots

### Unique Circle Locations

In [None]:
# Map plot of all unique circles
fig = go.Figure(data=go.Scattergeo(
        lon = df_circle['lon'],
        lat = df_circle['lat'],
        text = df_circle['circle_name'],
        mode = 'markers',
        marker=dict(opacity=0.4,
                    color="black")
        ))
fig.update_layout(
        title = 'Unique Circle Locations',
        geo_scope='usa',
    )
ply.plot(fig, filename='../plots/circles.html', auto_open=popen)

### Circle to NOAA Station Distances

In [None]:
# Plot histogram of distances
fig = go.Figure()
fig.add_trace(go.Histogram(x=df_paired['distance'],
                           nbinsx=50))
fig.update_layout(title="Histogram of Distances Between Circles and Stations",
                  template="simple_white")
fig.update_xaxes(title_text='Distance [m]')
fig.update_yaxes(title_text='Counts')
ply.plot(fig, filename='../plots/distance.html', auto_open=popen)

### Number of Matched Stations

In [None]:
# Plot histogram of station matches
fig = go.Figure()
fig.add_trace(go.Histogram(x=df_paired['ui'].value_counts(),
                           xbins=dict(start=0,
                                      end=50,
                                      size=1)))
fig.update_layout(title="Histogram of NOAA Station Matches",
                  template="simple_white")
fig.update_xaxes(title_text='Number of Stations Matched')
fig.update_yaxes(title_text='Counts')
ply.plot(fig, filename='../plots/matches.html', auto_open=popen)

### Elevation Differences

In [None]:
# Plot histogram of elevation difference
fig = go.Figure()
fig.add_trace(go.Histogram(x=df_paired['elev_diff'],
                           xbins=dict(start=0,
                                      end=500,
                                      size=10)))
fig.update_layout(title="Histogram of Elevation Difference Between Circles and Stations",
                  template="simple_white")
fig.update_xaxes(title_text='Elevation Change [m]')
fig.update_yaxes(title_text='Counts')
ply.plot(fig, filename='../plots/elevation.html', auto_open=popen)

### Figure Factory Choropleth

In [None]:
# Subset for 2010 data
df_2010 = df_paired.loc[df_paired['count_year'] == 2010, ['lat', 'lon', 'country_state', 'max_temp', 'min_temp', 'county_fips']]
df_2010 = df_2010.dropna(axis=0, subset=['max_temp', 'county_fips'])

In [None]:
# Setup
fips = df_2010['county_fips'].tolist()
values = df_2010['max_temp'].tolist()

colorscale = [
    '#00ACC1',
    '#26C6DA',
    '#B2EBF2',
    "#FFF9C4",
    '#FFEE58',
    '#FBC02D',
    '#FF7043',
    '#E64A19',
]

In [None]:
fig = ff.create_choropleth(
    fips=fips, values=values, scope=['usa'],
    show_state_data=True,
    show_hover=True,
    asp = 2.9,
    title_text = 'Maximum Circle Temps - 2010',
    legend_title = 'Temperature [F]',
    binning_endpoints=[-100.0, 0.0, 10.0, 20.0, 30.0, 40.0, 50.0],
    colorscale=colorscale,
)
fig.layout.template = None

ply.plot(fig, filename='../plots/choropleth_ff.html', auto_open=popen)

### Choropleth Map from GeoJSON

In [None]:
# Download U.S. counties in JSON format
with urlopen('https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json') as response:
    counties = json.load(response)
    
# Write to txt file
#with open('../data/Cloud_Data/counties.json', 'w', encoding='utf-8') as f:
#    json.dump(counties, f, ensure_ascii=False, indent=4)

In [None]:
fig = px.choropleth_mapbox(df_2010, geojson=counties, locations='county_fips', color='max_temp',
                           color_continuous_scale="Reds",
                           range_color=(0, 100),
                           mapbox_style="carto-positron",
                           zoom=3, center = {"lat": 37.0902, "lon": -95.7129},
                           opacity=0.5,
                           labels={'max_temp':'Maximum Temperature'}
                          )
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
ply.plot(fig, filename='../plots/choropleth_json2.html', auto_open=popen)

In [None]:
df_paired['max_temp'].max()

In [None]:
# Look at California counties
df_2010.loc[df_2010['country_state'] == 'US-CA', ['lat', 'lon', 'county_fips', 'max_temp']].tail(20)

In the counties JSON variable, CA is state "06".
There are 58 counties in CA (as of 2010 census), and it looks like all are included in the JSON variable.

In [None]:
# Create CA only dataframe
df_2010_CA = df_2010.loc[df_2010['country_state'] == 'US-CA']

In [None]:
# Verifying that CA temperature data exists
df_2010_CA['max_temp'].hist()

### Time-Series Chart Averaged by State

In [13]:
# Unique states
states = df_paired['state'].unique()
states = np.sort(states)
states

array(['AK', 'AL', 'AR', 'AZ', 'CA', 'CO', 'CT', 'DC', 'DE', 'FL', 'GA',
       'HI', 'IA', 'ID', 'IL', 'IN', 'KS', 'KY', 'LA', 'MA', 'MD', 'ME',
       'MI', 'MN', 'MO', 'MS', 'MT', 'NC', 'ND', 'NE', 'NH', 'NJ', 'NM',
       'NV', 'NY', 'OH', 'OK', 'OR', 'PA', 'RI', 'SC', 'SD', 'TN', 'TX',
       'UT', 'VA', 'VT', 'WA', 'WI', 'WV', 'WY'], dtype=object)

In [14]:
# Unique years
years = df_paired['count_year'].unique()
years = np.sort(years)
years

array([1901, 1902, 1903, 1904, 1905, 1906, 1907, 1908, 1909, 1910, 1911,
       1912, 1913, 1914, 1915, 1916, 1917, 1918, 1919, 1920, 1921, 1922,
       1923, 1924, 1925, 1926, 1927, 1928, 1929, 1930, 1931, 1932, 1933,
       1934, 1935, 1936, 1937, 1938, 1939, 1940, 1941, 1942, 1943, 1944,
       1945, 1946, 1947, 1948, 1949, 1950, 1951, 1952, 1953, 1954, 1955,
       1956, 1957, 1958, 1959, 1960, 1961, 1962, 1963, 1964, 1965, 1966,
       1967, 1968, 1969, 1970, 1971, 1972, 1973, 1974, 1975, 1976, 1977,
       1978, 1979, 1980, 1981, 1982, 1983, 1984, 1985, 1986, 1987, 1988,
       1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
       2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
       2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018], dtype=int64)

In [15]:
np.sort(years)

array([1901, 1902, 1903, 1904, 1905, 1906, 1907, 1908, 1909, 1910, 1911,
       1912, 1913, 1914, 1915, 1916, 1917, 1918, 1919, 1920, 1921, 1922,
       1923, 1924, 1925, 1926, 1927, 1928, 1929, 1930, 1931, 1932, 1933,
       1934, 1935, 1936, 1937, 1938, 1939, 1940, 1941, 1942, 1943, 1944,
       1945, 1946, 1947, 1948, 1949, 1950, 1951, 1952, 1953, 1954, 1955,
       1956, 1957, 1958, 1959, 1960, 1961, 1962, 1963, 1964, 1965, 1966,
       1967, 1968, 1969, 1970, 1971, 1972, 1973, 1974, 1975, 1976, 1977,
       1978, 1979, 1980, 1981, 1982, 1983, 1984, 1985, 1986, 1987, 1988,
       1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
       2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
       2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018], dtype=int64)

In [16]:
df_paired.loc[(df_paired['count_year'] == 2010) & (df_paired['state'] == 'CA'), 'max_temp'].mean()

61.329990884229716

In [17]:
# Create a time-series dataframe for the Circles Max Temp
if not offline_ts:

    # Create a blank time-series dataframe
    df_ts_circle_tmax = pd.DataFrame(index=years,
                                     columns=states)

    # Loop through years and states to get averages
    for yr in years:
        for st in states:
            df_ts_circle_tmax.loc[yr, st] = df_paired.loc[(df_paired['count_year'] == yr) &
                                                          (df_paired['state'] == st),
                                                          'max_temp'].mean()
        
    df_ts_circle_tmax.to_csv(PATH_TO_TS_CIRCLE)

else:
    df_ts_circle_tmax = pd.read_csv(PATH_TO_TS_CIRCLE,
                                    index_col=0)

In [18]:
# Create a time-series dataframe for the NOAA Max Temp

if not offline_ts:
    # Create a blank time-series dataframe
    df_ts_noaa_tmax = pd.DataFrame(index=years,
                                   columns=states)

    # Loop through years and states to get averages
    for yr in years:
        for st in states:
            df_ts_noaa_tmax.loc[yr, st] = df_paired.loc[(df_paired['count_year'] == yr) &
                                                        (df_paired['state'] == st),
                                                        'noaa_tmax_value'].mean()
        
    df_ts_noaa_tmax.to_csv(PATH_TO_TS_NOAA)

else:
    df_ts_noaa_tmax = pd.read_csv(PATH_TO_TS_NOAA,
                                  index_col=0)

In [19]:
# Ensure daaframe contains data
df_ts_circle_tmax.tail()

Unnamed: 0,AK,AL,AR,AZ,CA,CO,CT,DC,DE,FL,...,SD,TN,TX,UT,VA,VT,WA,WI,WV,WY
2014,23.916667,54.683544,50.987805,61.738903,65.168728,43.372162,37.065476,42.0,44.617647,74.549296,...,28.987578,50.507212,58.792683,32.670466,46.730479,25.376923,45.832609,23.694274,50.625,33.403846
2015,30.551562,59.431373,50.151445,51.239075,59.652495,38.98525,39.596273,39.0,52.415094,74.643347,...,31.924658,49.891247,55.592638,36.940367,51.240933,33.297101,45.288201,33.839623,44.159292,28.99095
2016,32.917241,61.068627,52.8,48.32398,55.693576,42.583204,43.487805,45.0,47.584906,72.773678,...,30.360248,53.888078,59.562101,39.067273,51.652393,34.628099,40.321298,30.254095,47.838095,35.515385
2017,27.47482,65.803738,47.622222,66.123529,54.854844,30.078493,45.538117,50.0,46.268293,77.7,...,32.772727,51.446753,66.073937,30.214592,54.16129,31.545455,34.906883,22.354776,52.4,31.204633
2018,31.463576,55.12963,47.11413,66.586957,63.814057,44.619803,31.6,48.0,35.566038,71.262662,...,31.832258,41.975831,54.557354,41.488995,40.038363,17.607333,43.062619,25.487572,37.228333,37.817427


In [20]:
# Ensure dataframe contains data
df_ts_noaa_tmax.tail()

Unnamed: 0,AK,AL,AR,AZ,CA,CO,CT,DC,DE,FL,...,SD,TN,TX,UT,VA,VT,WA,WI,WV,WY
2014,22.042727,62.6,54.876364,56.48878,63.609565,40.1375,34.82375,41.36,54.5,75.178276,...,35.162857,46.996,59.835313,34.602286,46.730698,20.4575,43.320851,23.516226,51.911429,35.126667
2015,32.395122,58.145,49.827826,51.46,55.416744,35.957429,36.341176,37.7,55.58,74.607742,...,27.176,50.396923,57.586777,38.192,50.755122,32.6525,43.16766,34.355728,43.134286,31.833846
2016,32.71561,59.7,56.604615,52.054348,52.470909,37.736857,42.99125,44.96,38.3,77.110323,...,31.742857,51.283684,60.3275,36.592195,51.378636,37.58,38.271364,33.0116,42.6,31.433333
2017,25.403529,68.135,50.994286,56.541364,52.091236,33.526197,46.892632,38.03,32.54,79.991,...,31.004706,50.065455,68.14339,37.089091,53.687568,28.053846,32.873,19.532955,50.104211,30.315714
2018,32.714194,54.02,49.004706,61.064375,60.357452,40.899672,31.43,38.48,39.38,72.775094,...,31.232,43.94,57.768868,41.016364,40.825,16.52,41.484138,25.853409,33.5975,31.2584


In [29]:
# Combine the dataframes and convert to long format
df_ts_circle_tmax_long = pd.melt(df_ts_circle_tmax,
                                 var_name='state',
                                 value_name='circle_tmax',
                                 ignore_index=False)

df_ts_noaa_tmax_long = pd.melt(df_ts_noaa_tmax,
                                 var_name='state',
                                 value_name='circle_tmax',
                                 ignore_index=False)

In [30]:
df_ts_circle_tmax_long['state'].unique()

array(['AK', 'AL', 'AR', 'AZ', 'CA', 'CO', 'CT', 'DC', 'DE', 'FL', 'GA',
       'HI', 'IA', 'ID', 'IL', 'IN', 'KS', 'KY', 'LA', 'MA', 'MD', 'ME',
       'MI', 'MN', 'MO', 'MS', 'MT', 'NC', 'ND', 'NE', 'NH', 'NJ', 'NM',
       'NV', 'NY', 'OH', 'OK', 'OR', 'PA', 'RI', 'SC', 'SD', 'TN', 'TX',
       'UT', 'VA', 'VT', 'WA', 'WI', 'WV', 'WY'], dtype=object)

In [31]:
# Convert the index to a normal column
df_ts_circle_tmax_long['year'] = df_ts_circle_tmax_long.index
df_ts_circle_tmax_long.reset_index(drop=True, inplace=True)
df_ts_circle_tmax_long.tail()

Unnamed: 0,state,circle_tmax,year
6013,WY,33.403846,2014
6014,WY,28.99095,2015
6015,WY,35.515385,2016
6016,WY,31.204633,2017
6017,WY,37.817427,2018


In [37]:
# Make line plot
x_all = df_ts_circle_tmax.index
y1 = df_ts_circle_tmax.loc[:, 'FL']
name1 = 'FL-circle'
y2 = df_ts_noaa_tmax.loc[:, 'FL']
name2 = 'FL-noaa'
y3 = df_ts_circle_tmax.loc[:, 'TX']
name3 = 'TX-circle'
y4 = df_ts_noaa_tmax.loc[:, 'TX']
name4 = 'TX-noaa'
y5 = df_ts_circle_tmax.loc[:, 'WA']
name5 = 'WA-circle'
y6 = df_ts_noaa_tmax.loc[:, 'WA']
name6 = 'WA-noaa'
y7 = df_ts_circle_tmax.loc[:, 'VT']
name7 = 'VT-circle'
y8 = df_ts_noaa_tmax.loc[:, 'VT']
name8 = 'VT-noaa'


fig = go.Figure()
fig.add_trace(go.Scatter(x=x_all, y=y1,
                         name=name1,
                         line=dict(color='firebrick', dash='solid')))
fig.add_trace(go.Scatter(x=x_all, y=y2,
                         name=name2,
                         line=dict(color='firebrick', dash='dot')))
fig.add_trace(go.Scatter(x=x_all, y=y3,
                         name=name3,
                         line=dict(color='royalblue', dash='solid')))
fig.add_trace(go.Scatter(x=x_all, y=y4,
                         name=name4,
                         line=dict(color='royalblue', dash='dot')))
fig.add_trace(go.Scatter(x=x_all, y=y5,
                         name=name5,
                         line=dict(color='black', dash='solid')))
fig.add_trace(go.Scatter(x=x_all, y=y6,
                         name=name6,
                         line=dict(color='black', dash='dot')))
fig.add_trace(go.Scatter(x=x_all, y=y7,
                         name=name7,
                         line=dict(color='lightgreen', dash='solid')))
fig.add_trace(go.Scatter(x=x_all, y=y8,
                         name=name8,
                         line=dict(color='lightgreen', dash='dot')))
fig.update_layout(title="Time-Series Comparison b/w Circles and NOAA Stations: Tmax",
                  template="simple_white")
fig.update_xaxes(title_text='Year')
fig.update_yaxes(title_text='Max Temperature [F]',
                 range=[0, 100])
ply.plot(fig, filename='../plots/ts_tmax.html', auto_open=popen)

'../plots/ts_tmax.html'

### Make Choropleth Map with Time Scale Slider

ref: https://amaral.northwestern.edu/blog/step-step-how-plot-map-slider-represent-time-evolu
ref: https://support.sisense.com/hc/en-us/community/posts/360038301533-Plotly-Choropleth-With-Slider-Map-Charts-Over-Time-

In [47]:
# colorscale
scl = [[0.0, '#ffffff'],[0.2, '#ff9999'],[0.4, '#ff4d4d'], \
       [0.6, '#ff1a1a'],[0.8, '#cc0000'],[1.0, '#4d0000']] # reds

### create empty list for data object:    
data_slider = []

#### I populate the data object
# 'years' is a np.array from above
for year in years:
    # I select the year (and remove DC for now)
    df_selected_year = df_ts_circle_tmax_long[(df_ts_circle_tmax_long['state']!= 'DC' ) &  \
                                              (df_ts_circle_tmax_long['year']== year )]

    ### I create the text for mouse-hover for each state, for the current year    
    df_selected_year['text'] = '[degF]'

    ### create the dictionary with the data for the current year
    data_one_year = dict(
                        type='choropleth',
                        locations = df_selected_year['state'],
                        z=df_selected_year['circle_tmax'],
                        locationmode='USA-states',
                        colorscale = scl,
                        text = df_selected_year['text'],
                        zmin=0,
                        zmax=75,
                        colorbar= {'title':'Tempareature [degF]'}
                        )

    data_slider.append(data_one_year)  # I add the dictionary to the list of dictionaries for the slider
    
##  I create the steps for the slider
steps = []

for i in range(len(data_slider)):
    step = dict(method='restyle',
                args=['visible', [False] * len(data_slider)],
                label='{}'.format(i + 1901)) # label to be displayed for each step (year)
    step['args'][1][i] = True
    steps.append(step)

##  I create the 'sliders' object from the 'steps' 
sliders = [dict(active=0, pad={"t": 1}, steps=steps)]

# I set up the layout (including slider option)
layout = dict(geo=dict(scope='usa',
                       projection={'type': 'albers usa'}),
                       sliders=sliders,
              title='Circle Max Temperatures Averaged by State',
              updatemenus=[dict(
                               type="buttons",
                               buttons=[dict(label="Play",
                               method="animate",
                               args=[None])])])

# I create the figure object:
fig = dict(data=data_slider, layout=layout)

# to plot in the notebook
#ply.iplot(fig)

# to plot in a separete browser window
ply.plot(fig, filename='../plots/ts-map_tmax.html', auto_open=popen)



'../plots/ts-map_tmax.html'

### Animated Choropleth Map

ref:https://towardsdatascience.com/how-to-create-an-animated-choropleth-map-with-less-than-15-lines-of-code-2ff04921c60b

In [53]:
fig = px.choropleth(df_ts_circle_tmax_long, 
              locations = 'state',
              color="circle_tmax", 
              animation_frame="year",
              color_continuous_scale="Reds",
              locationmode='USA-states',
              scope="usa",
              range_color=(0, 75),
              title='Circle Max Temperatures Averaged by State',
              height=600,
              labels={'circle_tmax': 'Temperature [degF]',
                      'year': 'Year '}
             )

fig.update_layout(legend_title_text='Temperature [degF]')
ply.plot(fig, filename='../plots/ts-map-anim_tmax.html', auto_open=popen)

'../plots/ts-map-anim_tmax.html'