In [9]:
!pip install bqplot

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [10]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as mlp
import plotly.express as px
import bqplot
%matplotlib inline

### **Inspect the columns**
Read CSV files from the original dataset: https://github.com/washingtonpost/data-2C-beyond-the-limit-usa

In [11]:
# extract data
national_data = pd.read_csv('https://raw.githubusercontent.com/washingtonpost/data-2C-beyond-the-limit-usa/main/data/processed/climdiv_national_year.csv', dtype={"fips": str})
state_data = pd.merge(pd.read_csv('https://raw.githubusercontent.com/washingtonpost/data-2C-beyond-the-limit-usa/main/data/processed/climdiv_state_year.csv', dtype={"fips": str}), pd.read_csv('https://raw.githubusercontent.com/washingtonpost/data-2C-beyond-the-limit-usa/main/data/processed/model_state.csv', dtype={"fips": str}), on ='fips')
county_data = pd.merge(pd.read_csv('https://raw.githubusercontent.com/washingtonpost/data-2C-beyond-the-limit-usa/main/data/processed/climdiv_county_year.csv', dtype={"fips": str}), pd.read_csv('https://raw.githubusercontent.com/washingtonpost/data-2C-beyond-the-limit-usa/main/data/processed/model_county.csv', encoding = "latin-1", dtype={"fips": str}), on ='fips')

In [12]:
national_data.head()

Unnamed: 0,year,temp,tempc
0,1895,50.3375,10.1875
1,1896,51.993333,11.107407
2,1897,51.556667,10.864815
3,1898,51.431667,10.79537
4,1899,51.009167,10.560648


In [13]:
state_data.head()

Unnamed: 0,fips,year,temp,tempc,Fall,Spring,Summer,Winter,max_warming_season,Annual,STUSAB,STATE_NAME,STATENS
0,1,1895,61.641667,16.467593,-0.195668,-0.105862,-0.325009,0.458526,Winter,-0.035048,AL,Alabama,1779775
1,1,1896,64.266667,17.925926,-0.195668,-0.105862,-0.325009,0.458526,Winter,-0.035048,AL,Alabama,1779775
2,1,1897,64.191667,17.884259,-0.195668,-0.105862,-0.325009,0.458526,Winter,-0.035048,AL,Alabama,1779775
3,1,1898,62.983333,17.212963,-0.195668,-0.105862,-0.325009,0.458526,Winter,-0.035048,AL,Alabama,1779775
4,1,1899,63.1,17.277778,-0.195668,-0.105862,-0.325009,0.458526,Winter,-0.035048,AL,Alabama,1779775


In [29]:
county_data.describe()

Unnamed: 0,year,temp,tempc,Fall,Spring,Summer,Winter,Annual
count,388375.0,388375.0,388375.0,388375.0,388375.0,388375.0,388375.0,388375.0
mean,1957.0,53.995737,12.219854,0.541821,0.827684,0.45596,1.414876,0.814849
std,36.083284,8.43161,4.684228,0.53583,0.535507,0.617663,0.713179,0.547218
min,1895.0,30.508333,-0.828704,-0.845813,-0.609171,-0.943591,-0.197573,-0.566222
25%,1926.0,47.65,8.694444,0.152437,0.434949,-0.007224,0.951139,0.400667
50%,1957.0,53.908333,12.171296,0.49703,0.831619,0.344198,1.322314,0.743877
75%,1988.0,60.533333,15.851852,0.899584,1.213813,0.871605,1.804106,1.204825
max,2019.0,78.816667,26.009259,2.52666,2.786215,2.583295,3.736705,2.53716


### **Filter the data to the state of interest (Illinois)**

In [15]:
state_data_illinois = state_data[state_data['STATE_NAME'] == 'Illinois']
state_data_illinois

Unnamed: 0,fips,year,temp,tempc,Fall,Spring,Summer,Winter,max_warming_season,Annual,STUSAB,STATE_NAME,STATENS
1250,17,1895,49.716667,9.842593,0.557305,1.153298,0.217481,1.308741,Winter,0.81442,IL,Illinois,1779784
1251,17,1896,52.200000,11.222222,0.557305,1.153298,0.217481,1.308741,Winter,0.81442,IL,Illinois,1779784
1252,17,1897,51.400000,10.777778,0.557305,1.153298,0.217481,1.308741,Winter,0.81442,IL,Illinois,1779784
1253,17,1898,51.300000,10.722222,0.557305,1.153298,0.217481,1.308741,Winter,0.81442,IL,Illinois,1779784
1254,17,1899,50.816667,10.453704,0.557305,1.153298,0.217481,1.308741,Winter,0.81442,IL,Illinois,1779784
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1370,17,2015,52.875000,11.597222,0.557305,1.153298,0.217481,1.308741,Winter,0.81442,IL,Illinois,1779784
1371,17,2016,54.733333,12.629630,0.557305,1.153298,0.217481,1.308741,Winter,0.81442,IL,Illinois,1779784
1372,17,2017,54.391667,12.439815,0.557305,1.153298,0.217481,1.308741,Winter,0.81442,IL,Illinois,1779784
1373,17,2018,52.475000,11.375000,0.557305,1.153298,0.217481,1.308741,Winter,0.81442,IL,Illinois,1779784


#### **Simple visulization to get a big picture of data**
explore the temperature of illinois

In [30]:
from google.colab import output
output.disable_custom_widget_manager()

In [44]:
# reference: lecture_13_timeseries_and_map_continued
x_sc = bqplot.DateScale()
y_sc = bqplot.LinearScale()

# Axis
x_ax = bqplot.Axis(scale=x_sc, label='Year')
y_ax1 = bqplot.Axis(scale=y_sc, label='Spring', orientation='vertical')
y_ax2 = bqplot.Axis(scale=y_sc, label='Summer', orientation='vertical')
y_ax3 = bqplot.Axis(scale=y_sc, label='Fall', orientation='vertical')
y_ax4 = bqplot.Axis(scale=y_sc, label='Winter', orientation='vertical')

lines = bqplot.Lines(x=state_data_illinois["year"], 
                     y=state_data_illinois["Spring"], 
                     scales={'x':x_sc, 'y':y_sc})
lines = bqplot.Lines(x=state_data_illinois["year"], 
                     y=state_data_illinois["Summer"], 
                     scales={'x':x_sc, 'y':y_sc})
lines = bqplot.Lines(x=state_data_illinois["year"], 
                     y=state_data_illinois["Fall"], 
                     scales={'x':x_sc, 'y':y_sc})
lines = bqplot.Lines(x=state_data_illinois["year"], 
                     y=state_data_illinois["Winter"], 
                     scales={'x':x_sc, 'y':y_sc})
line_figure = bqplot.Figure(marks = [lines], axes = [x_ax, y_ax1])
line_figure
# sc_x = bqplot.LinearScale()
# sc_y = bqplot.LinearScale()

# patch = bqplot.Lines(
#     x=[
#         state_data_illinois["year"],
#         state_data_illinois["year"],
#         state_data_illinois["year"],
#         state_data_illinois["year"]
#     ],
#     y=[
#         state_data_illinois["Spring"],
#         state_data_illinois["Summer"],
#         state_data_illinois["Fall"],
#         state_data_illinois["Spring"],
#     ],
#     fill_colors=["green", "red", "yellow", "blue"],
#     fill="inside",
#     stroke_width=10,
#     close_path=True,
#     scales={"x": sc_x, "y": sc_y},
#     display_legend=True,
# )

# fig = bqplot.Figure(marks=[lines],  axes = [x_ax, y_ax1])
# fig

Figure(axes=[Axis(label='Year', scale=DateScale()), Axis(label='Spring', orientation='vertical', scale=LinearS…

**Observation: the season data stays the same over years, which is probably generated**\
Thus it is resonable to focus on the column tempc and temp if we are going to explore how temperature is changing over the years

In [32]:
from google.colab import output
output.enable_custom_widget_manager()

### **Explore temperatures by State per Year**


**Shooting for something like this:**

In [21]:
import PIL.Image

In [22]:
!wget = https://www.someka.net/wp-content/uploads/2016/05/USA-Geographic-Heat-Map-Generator-Excel-Template-Someka-SS3.png

--2022-12-08 23:12:48--  http://=/
Resolving = (=)... failed: Name or service not known.
wget: unable to resolve host address ‘=’
--2022-12-08 23:12:48--  https://www.someka.net/wp-content/uploads/2016/05/USA-Geographic-Heat-Map-Generator-Excel-Template-Someka-SS3.png
Resolving www.someka.net (www.someka.net)... 162.159.135.42
Connecting to www.someka.net (www.someka.net)|162.159.135.42|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 57325 (56K) [image/png]
Saving to: ‘USA-Geographic-Heat-Map-Generator-Excel-Template-Someka-SS3.png’


2022-12-08 23:12:48 (5.09 MB/s) - ‘USA-Geographic-Heat-Map-Generator-Excel-Template-Someka-SS3.png’ saved [57325/57325]

FINISHED --2022-12-08 23:12:48--
Total wall clock time: 0.3s
Downloaded: 1 files, 56K in 0.01s (5.09 MB/s)


####**explore the difference between states**
Extract states

In [24]:
mydata = state_data[0:1375]
mydata

Unnamed: 0,fips,year,temp,tempc,Fall,Spring,Summer,Winter,max_warming_season,Annual,STUSAB,STATE_NAME,STATENS
0,01,1895,61.641667,16.467593,-0.195668,-0.105862,-0.325009,0.458526,Winter,-0.035048,AL,Alabama,1779775
1,01,1896,64.266667,17.925926,-0.195668,-0.105862,-0.325009,0.458526,Winter,-0.035048,AL,Alabama,1779775
2,01,1897,64.191667,17.884259,-0.195668,-0.105862,-0.325009,0.458526,Winter,-0.035048,AL,Alabama,1779775
3,01,1898,62.983333,17.212963,-0.195668,-0.105862,-0.325009,0.458526,Winter,-0.035048,AL,Alabama,1779775
4,01,1899,63.100000,17.277778,-0.195668,-0.105862,-0.325009,0.458526,Winter,-0.035048,AL,Alabama,1779775
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1370,17,2015,52.875000,11.597222,0.557305,1.153298,0.217481,1.308741,Winter,0.814420,IL,Illinois,1779784
1371,17,2016,54.733333,12.629630,0.557305,1.153298,0.217481,1.308741,Winter,0.814420,IL,Illinois,1779784
1372,17,2017,54.391667,12.439815,0.557305,1.153298,0.217481,1.308741,Winter,0.814420,IL,Illinois,1779784
1373,17,2018,52.475000,11.375000,0.557305,1.153298,0.217481,1.308741,Winter,0.814420,IL,Illinois,1779784


In [56]:
mydata_2000 = mydata[ mydata['year']==2000]
# compare_data = mydata.groupby([mydata['year']==2000, 'STATE_NAME'])["temp"].mean()

# compare_data = compare_data.pivot(index='year', 
#                                   columns='STATE_NAME', 
#                                   values='temp')
compare_data = mydata_2000.groupby(['STATE_NAME'])["temp"].mean()
compare_data = compare_data.reset_index()
compare_data

Unnamed: 0,STATE_NAME,temp
0,Alabama,63.566667
1,Arizona,61.675
2,Arkansas,60.891667
3,California,58.8
4,Colorado,47.0
5,Connecticut,48.533333
6,Delaware,54.741667
7,Florida,70.575
8,Georgia,63.35
9,Idaho,43.933333


In [58]:

x_sc = bqplot.OrdinalScale()
y_sc = bqplot.LinearScale()

bar = bqplot.Bars(x=compare_data['STATE_NAME'], y=compare_data['temp'], scales={"x": x_sc, "y": y_sc})
ax_x = bqplot.Axis(scale=x_sc)
ax_y = bqplot.Axis(scale=y_sc, tick_format="0.2f", orientation="vertical")

bqplot.Figure(marks=[bar], axes=[ax_x, ax_y], padding_x=0.025, padding_y=0.025)


Figure(axes=[Axis(scale=OrdinalScale()), Axis(orientation='vertical', scale=LinearScale(), tick_format='0.2f')…

In [49]:
us_map_data = bqplot.topo_load('map_data/USStatesMap.json')

In [59]:
state_mean_data

STATE_NAME   fips
Alabama      01      63.195400
Arizona      04      59.651200
Arkansas     05      60.543133
California   06      57.633267
Colorado     08      44.835667
Connecticut  09      48.327733
Delaware     10      54.524667
Florida      12      70.327667
Georgia      13      63.525533
Idaho        16      42.798600
Illinois     17      51.783733
Name: temp, dtype: float64

In [61]:

us_map_data = bqplot.topo_load('map_data/USStatesMap.json')

# Scale
geo_sc = bqplot.AlbersUSA()
clr_sc = bqplot.ColorScale(scheme='Oranges')

# Axis
clr_ax = bqplot.ColorAxis(scale=clr_sc, side='top')

# Mark
# us_map = bqplot.Map(map_data = us_map_data, 
#                     scales={'projection':geo_sc, 'color':clr_sc}, 
#                     color = clr)


### TODO: explore heat map by using the data of temperature of different county
