In [19]:
import pandas as pd
import numpy as np
import requests

In [20]:
urls = {
    'avg_temp': "https://www.ncei.noaa.gov/pub/data/cirs/climdiv/climdiv-tmpccy-v1.0.0-20240506",
    'max_temp': "https://www.ncei.noaa.gov/pub/data/cirs/climdiv/climdiv-tmaxcy-v1.0.0-20240506",
    'min_temp': "https://www.ncei.noaa.gov/pub/data/cirs/climdiv/climdiv-tmincy-v1.0.0-20240506",
    'precipitation': "https://www.ncei.noaa.gov/pub/data/cirs/climdiv/climdiv-pcpncy-v1.0.0-20240506",
    'pdsi': "https://www.ncei.noaa.gov/pub/data/cirs/climdiv/climdiv-pdsicy-v1.0.0-20240506"
}

In [21]:
dfs = []

for var, url in urls.items():
    response = requests.get(url)
    data = response.text
    
    lines = data.strip().split('\n')
    
    parsed_data = []
    
    for line in lines:
        identification_number = line[:11]
        
        state_fips = identification_number[:2]
        county_fips = identification_number[2:5]
        year = int(identification_number[7:11])
        
        monthly_data = line[11:].split()
        
        row = {'state_fips': state_fips, 'county_fips': county_fips, 'year': year, **{f'month_{i}': float(val) for i, val in enumerate(monthly_data, start=1)}}
        
        if year >= 1970:
            parsed_data.append(row)
    
    df = pd.DataFrame(parsed_data)
    
    # Consolidate data from monthly to yearly
    if var == 'avg_temp':
        df['avg_temp'] = df[[f'month_{i}' for i in range(1, 13)]].mean(axis=1)
        df = df[['state_fips', 'county_fips', 'year', 'avg_temp']]
    elif var == 'max_temp':
        df['max_temp'] = df[[f'month_{i}' for i in range(1, 13)]].max(axis=1)
        df = df[['state_fips', 'county_fips', 'year', 'max_temp']]
    elif var == 'min_temp':
        df['min_temp'] = df[[f'month_{i}' for i in range(1, 13)]].min(axis=1)
        df = df[['state_fips', 'county_fips', 'year', 'min_temp']]
    elif var == 'precipitation':
        df['total_precip'] = df[[f'month_{i}' for i in range(1, 13)]].sum(axis=1)
        df = df[['state_fips', 'county_fips', 'year', 'total_precip']]
    elif var == 'pdsi':
        df['avg_pdsi'] = df[[f'month_{i}' for i in range(1, 13)]].mean(axis=1)
        df = df[['state_fips', 'county_fips', 'year', 'avg_pdsi']]
    
    dfs.append(df)

In [22]:
merged_df = dfs[0]
for df in dfs[1:]:
    merged_df = pd.merge(merged_df, df, on=['state_fips', 'county_fips', 'year'], how='outer')

merged_df.head()

Unnamed: 0,state_fips,county_fips,year,avg_temp,max_temp,min_temp,total_precip,avg_pdsi
0,1,1,1970,62.666667,92.1,28.1,50.06,-0.253333
1,1,1,1971,63.241667,90.1,33.8,64.32,1.790833
2,1,1,1972,63.708333,91.0,37.6,57.44,1.745833
3,1,1,1973,63.583333,90.7,32.8,60.8,0.8025
4,1,1,1974,63.808333,90.3,35.4,59.63,0.894167


In [23]:
# Lovely!
len(merged_df.index)

172535