In [2]:
import pandas as pd 

In [3]:
# Read a file and clean it up
#Link to NOAA data https://www.ncei.noaa.gov/pub/data/cirs/climdiv/

#returns df, all data in the file
#also returns cal, only California data
def cleanup_NOAA_txt(file: str) -> pd.DataFrame:
    col_names = ["County Data", "Jan", "Feb", "Mar", "Apr", "May", "Jun",
             "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]
    
    #County Data interpretation: https://www.ncei.noaa.gov/pub/data/cirs/climdiv/county-readme.txt
    df = pd.read_csv(file, delim_whitespace=True, names = col_names,dtype={"County Data": str})
    df["State Code"] = df["County Data"].str[:2]
    df["FIPS"]= df ["County Data"].str[2:5]
    df["Year"]= df["County Data"].str[-4:]

    #State code "04" is California
    cal = df[df["State Code"]=="04"]
    cal = cal.drop(columns=['County Data', 'State Code']) 

    return df, cal


In [10]:
#Creates max temp and Cooling Degree Days (CDD) dataframes for each year (1895-2015)
maxTempUS, maxTempCA = cleanup_NOAA_txt("../climdiv-tmaxcy-v1.0.0-20250905.txt")
cddUS, cddCA = cleanup_NOAA_txt("../climdiv-cddccy-v1.0.0-20250905.txt")

#Chose to drop months, based on a CDC manuscript doing the same when studying heat related illness
#Vaidyanathan A, Gates A, Brown C, Prezzato E, Bernstein A. Heat-Related Emergency Department Visits
#— United States, May–September 2023. MMWR Morb Mortal Wkly Rep 2024;73:324–329. 
#DOI: http://dx.doi.org/10.15585/mmwr.mm7315a1
months_to_drop = ["Jan", "Feb", "Mar", "Apr","Oct", "Nov", "Dec"]
maxTempSuCA = maxTempCA.drop(columns=months_to_drop)
cddSuCA = cddCA.drop(columns=months_to_drop)

In [9]:
#Creates max temp and CDD dataframes, averaged over decade periods (Norm) 
#https://www.ncei.noaa.gov/pub/data/cirs/climdiv/normals-readme.txt

# 0001  means 1901-1930
# 0002  means 1911-1940
# 0003  means 1921-1950
# 0004  means 1931-1960
# 0005  means 1941-1970
# 0006  means 1951-1980
# 0007  means 1961-1990
# 0008  means 1971-2000
# 0009  means 1981-2010
# 0010  means 1991-2020
# 0031  means 1901-2000
# 0032  means 1895-2020

maxNormTempUS, maxNormTempCA = cleanup_NOAA_txt("../climdiv-norm-tmaxcy-v1.0.0-20250905.txt")
cddNormUS, cddNormCA = cleanup_NOAA_txt("../climdiv-norm-cddccy-v1.0.0-20250905.txt")
maxNormTempSuCA = maxNormTempCA.drop(columns=months_to_drop)

#Drop cooler months, DOI: http://dx.doi.org/10.15585/mmwr.mm7315a1
months_to_drop = ["Jan", "Feb", "Mar", "Apr","Oct", "Nov", "Dec"]
maxNormTempSuCA = maxNormTempCA.drop(columns=months_to_drop)

In [14]:
#Dataset for 2023 CA max Temp and CDD 
#Align with 2023 heat illness data we have 
maxTempSu2023CA = maxTempSuCA[maxTempCA["Year"]=="2023"]
maxTempSu2023CA = maxTempSu2023CA.drop(columns=['Year'])
cddSu2023CA = cddSuCA[cddSuCA["Year"]=="2023"]
cddSu2023CA = cddSu2023CA.drop(columns=['Year'])


In [19]:
cddSu2023CA.sample(3)

Unnamed: 0,May,Jun,Jul,Aug,Sep,FIPS
22136,0.0,15.0,126.0,172.0,26.0,23
27245,140.0,255.0,509.0,477.0,226.0,101
27769,7.0,14.0,192.0,106.0,18.0,109


In [20]:
unique_categories = cddNormUS['FIPS'].value_counts()
print(unique_categories)

numYears = cddNormUS['Year'].nunique()
print(numYears)
print(len(cddSu2023CA))

003    576
005    576
001    564
009    564
007    552
      ... 
383     12
381     12
379     12
377     12
078     12
Name: FIPS, Length: 301, dtype: int64
12
58
