<a href="https://colab.research.google.com/github/philiplindsay/storytelling-with-data/blob/master/Murder_Hornets.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**FINAL VIDEO**

https://youtu.be/0_5KRSTJ8wk

**Relevant Links**

HTML data navigation site: https://www1.ncdc.noaa.gov/pub/data/normals/

Example notebook from professor Manning: https://colab.research.google.com/drive/1Y7M5yKMPN9rOmxHlQ66MyIY6cREb_BW6?usp=sharing#scrollTo=T7QH7voOjtUW

In [0]:
#import relevant packages
import numpy as np
import pandas as pd
import plotly.express as px

In [0]:
#import monthly average temperatures dataset
temp_data = 'https://www1.ncdc.noaa.gov/pub/data/normals/1981-2010/products/temperature/mly-tavg-normal.txt'
id_data = 'https://www1.ncdc.noaa.gov/pub/data/normals/1981-2010/station-inventories/allstations.txt'
rain_data = 'https://www1.ncdc.noaa.gov/pub/data/normals/1981-2010/products/precipitation/ann-prcp-normal.txt'
raw_temp = pd.read_csv(temp_data)
raw_id = pd.read_csv(id_data)
raw_rain = pd.read_csv(rain_data)

#turn temperature data in data frame
temp = pd.DataFrame(index = pd.Series(np.arange(raw_temp.shape[0])), columns = np.arange(13))
for i in np.arange(raw_temp.shape[0]):
  x = raw_temp.iloc[i].values[0].split()
  for j in np.arange(temp.shape[1]):
    temp.at[i, j] = x[j]

#set station id as index for dataset
temp.set_index(0, inplace = True)

#delete last value in columns 1-13 (it's just a standard for how good that piece of data is)
for i in range(1, 13):
    temp[i] = temp[i].astype(str).str[:-1].astype(np.str)

#create new dataframe with only minimum and maximum monthly values
temp = pd.DataFrame(index = temp.index, data=np.vstack([temp.min(axis = 1).values, temp.max(axis = 1).values]).T, columns=['min', 'max'])

#turn id data in data frame
id = pd.DataFrame(index = pd.Series(np.arange(raw_id.shape[0])), columns = np.arange(5))
for i in np.arange(raw_id.shape[0]):
  x = raw_id.iloc[i].values[0].split()
  for j in np.arange(id.shape[1]):
    id.at[i, j] = x[j]

#set station id as index for dataset
id.set_index(0, inplace = True)

#rename state variable
id = id.rename(columns = {4: 'State'})

#drop unecessary variables
id = id[['State']]

#turn rain data in data frame
rain = pd.DataFrame(index = pd.Series(np.arange(raw_rain.shape[0])), columns = np.arange(2))
for i in np.arange(raw_rain.shape[0]):
  x = raw_rain.iloc[i].values[0].split()
  for j in np.arange(rain.shape[1]):
    rain.at[i, j] = x[j]

#set station id as index for dataset
rain.set_index(0, inplace = True)

#delete last value in column (it's just a standard for how good that piece of data is)
rain[1] = rain[1].astype(str).str[:-1].astype(np.str)

#rename rain variable and make it numeric
rain = rain.rename(columns = {1: 'rain'})
rain["rain"] = pd.to_numeric(rain["rain"])

#merge ID, rain, and temperature data
merged = pd.merge(id, temp, left_index = True, right_index = True)
merged = pd.merge(merged, rain, left_index = True, right_index = True)

#average across the State
merged = merged.groupby('State').mean()

#generate temperature range variable
merged['range'] = merged['max'] - merged['min']

#generate dummy variable for temperate climate
merged['temperate'] = 1
merged['temperate'] = merged['temperate'].mask(merged['rain'] < 3200.0, 0)
merged['temperate'] = merged['temperate'].mask(merged['min'] < 266.0, 0)
merged['temperate'] = merged['temperate'].mask(merged['range'] > 560.0, 0)

#add column equal to index for graphing
merged['State'] = merged.index

In [0]:
#graph temperate environments across the United States
fig = px.choropleth(data_frame = merged,
              locations = 'State',
              locationmode = 'USA-states',
              color = merged['temperate'],
              scope = 'usa',
              color_continuous_scale = [(0, "blue"), (0.5, "blue"),  
                                        (0.5, "yellow"), (1, "yellow")],
              title = 'Environmental suitability for Asian Hornets')

#improve clarity of legend
fig.update_layout(coloraxis_colorbar = dict(
    title = None,
    tickvals = [0, 1],
    ticktext = ["not suitable", "suitable"],
    lenmode="pixels", len=100,
))

fig

In [0]:
#create deaths per capita data frame
death = {'death': ['hornet', 'lightning'], 'per_capita': [0.0316, 0.0152]}
death = pd.DataFrame(data = death)
death1 = {'death': ['hornet', 'lightning', 'car'], 'per_capita': [0.0316, 0.0152, 11.578]}
death1 = pd.DataFrame(data = death1)

In [0]:
px.bar(death, x = 'death', y = 'per_capita',
       labels = {'death':' ', 'per_capita':' '},
       title = 'Deaths per 100,000 people')

In [0]:
px.bar(death1, x = 'death', y = 'per_capita',
       labels = {'death':' ', 'per_capita':' '},
       title = 'Deaths per 100,000 people')