In [1]:
# Build and clean dataset
import pandas as pd
import altair as alt
import geopandas as gpd
from multiprocessing import Pool
import json
import plotly.express as px
import plotly.graph_objects as go

Based on https://www.ncei.noaa.gov/news/noaa-offers-climate-data-counties NClimDiv

In [2]:
month_map = {
    'jan': '01',
    'feb': '02',
    'mar': '03',
    'apr': '04',
    'may': '05',
    'june': '06',
    'july': '07',
    'aug': '08',
    'sept': '09',
    'oct': '10',
    'nov': '11',
    'dec': '12'
}

In [24]:
def read_data(file_path):
    data = []
    with open(file_path, 'r') as file:
        for line in file:
            record = {}
            record['state_code'] = line[0:2].strip()
            record['division_number'] = line[2:5].strip()
            record['element_code'] = line[5:6].strip()
            record['year'] = line[6:11].strip()
            record['jan_value'] = float(line[11:18].strip())
            record['feb_value'] = float(line[18:25].strip())
            record['mar_value'] = float(line[25:32].strip())
            record['apr_value'] = float(line[32:39].strip())
            record['may_value'] = float(line[39:46].strip())
            record['june_value'] = float(line[46:53].strip())
            record['july_value'] = float(line[53:60].strip())
            record['aug_value'] = float(line[60:67].strip())
            record['sept_value'] = float(line[67:74].strip())
            record['oct_value'] = float(line[74:81].strip())
            record['nov_value'] = float(line[81:88].strip())
            record['dec_value'] = float(line[88:95].strip())
            data.append(record)
    return data

In [25]:
file_path = "climdiv-tmpcdv-v1.0.0-20230504"  # Replace with the actual file path
data = read_data(file_path)
df = pd.DataFrame(data)
# Convert the data from wide to long format
melted = pd.melt(df, id_vars=['state_code', 'division_number', 'element_code', 'year'], var_name='month', value_name='value')
# Remove '_value' from the month column
melted['month'] = melted['month'].str.replace('_value', '')
# Convert the month column to numeric using month map
melted['month'] = melted['month'].map(month_map)
# Create a date column
melted['date'] = pd.to_datetime(melted['year'] + '-' + melted['month'])
melted.head()

Unnamed: 0,state_code,division_number,element_code,year,month,value,date
0,1,10,2,1895,1,37.7,1895-01-01
1,1,10,2,1896,1,39.7,1896-01-01
2,1,10,2,1897,1,37.3,1897-01-01
3,1,10,2,1898,1,44.7,1898-01-01
4,1,10,2,1899,1,39.5,1899-01-01


In [18]:
with open('county-to-climdivs.txt','r') as file:
    county_to_climdivs = file.readlines()
headers = county_to_climdivs[3].split()
county_to_climdivs = county_to_climdivs[4:]
county_to_climdivs = [line.split() for line in county_to_climdivs]
county_to_climdivs = pd.DataFrame(county_to_climdivs, columns=headers)
county_to_climdivs.head()

Unnamed: 0,POSTAL_FIPS_ID,NCDC_FIPS_ID,CLIMDIV_ID
0,1033,1033,101
1,1059,1059,101
2,1077,1077,101
3,1079,1079,101
4,1083,1083,101


In [30]:
county_to_climdivs['CLIMDIV_ID'].unique()

array(['0101', '0102', '0103', '0104', '0105', '0106', '0107', '0108',
       '0201', '0202', '0203', '0204', '0205', '0206', '0207', '0301',
       '0302', '0303', '0304', '0305', '0306', '0307', '0308', '0309',
       '0401', '0402', '0403', '0404', '0405', '0406', '0407', '0501',
       '0502', '0503', '0504', '0505', '0601', '0602', '0701', '0702',
       '0801', '0802', '0803', '0804', '0805', '0901', '0902', '0903',
       '0904', '0905', '0906', '0907', '0908', '0909', '1001', '1002',
       '1003', '1004', '1005', '1006', '1007', '1008', '1009', '1010',
       '1101', '1102', '1103', '1104', '1105', '1106', '1107', '1108',
       '1109', '1201', '1202', '1203', '1204', '1205', '1206', '1207',
       '1208', '1209', '1301', '1302', '1303', '1304', '1305', '1306',
       '1307', '1308', '1309', '1401', '1402', '1403', '1404', '1405',
       '1406', '1407', '1408', '1409', '1501', '1502', '1503', '1504',
       '1601', '1602', '1603', '1604', '1605', '1606', '1607', '1608',
      