In [24]:
import json
import pandas as pd
import numpy as np
from metpy.calc import dewpoint_from_relative_humidity
from metpy.units import units
import os

def convert_dewpoint(row):
    return dewpoint_from_relative_humidity(row['Dry Bulb Temperature [°C]'] * units.degC, row['Relative Humidity [%]'] * units.percent).m

output_file = 'TMY3_aggregates.csv'
prev_calculated = set(pd.read_csv(output_file)['gisjoin'].to_list())

with open('county_lookup.json')as file:
    data = json.load(file)
    # i = 0
    for state in data:
        results = []
        if state in ['AK', 'HI', 'PR']:
            continue
        for county_info in data[state]:
            gisjoin = county_info['gisjoin']
            if gisjoin in prev_calculated:
                continue
            county = county_info['name']
            print(f'state: {state}, gisjoin: {gisjoin}, county: {county}')
            url = f'https://oedi-data-lake.s3.amazonaws.com/nrel-pds-building-stock/end-use-load-profiles-for-us-building-stock/2024/resstock_dataset_2024.1/resstock_tmy3/weather/state={state}/{gisjoin}_TMY3.csv'
            df = pd.read_csv(url)
            df['Dewpoint [°C]'] = df.apply(convert_dewpoint, axis=1)
            # print(df)
            hdd = sum([max(0,18.3 - temp) for temp in df['Dry Bulb Temperature [°C]']])/24
            cdd = sum([max(0,temp - 18.3) for temp in df['Dry Bulb Temperature [°C]']])/24
            county_results = [
                state,
                county,
                gisjoin,
                np.mean(df['Dry Bulb Temperature [°C]']),
                max(df['Dry Bulb Temperature [°C]']),
                min(df['Dry Bulb Temperature [°C]']),
                hdd,
                cdd,
                np.mean(df['Relative Humidity [%]']),
                np.mean(df['Dewpoint [°C]']),
                max(df['Dewpoint [°C]']),
                min(df['Dewpoint [°C]']),
                np.mean(df['Wind Speed [m/s]']),
                max(df['Wind Speed [m/s]']),
                np.mean(df['Global Horizontal Radiation [W/m2]']),
                max(df['Global Horizontal Radiation [W/m2]']),
                np.mean(df['Direct Normal Radiation [W/m2]']),
                max(df['Direct Normal Radiation [W/m2]']),
                np.mean(df['Diffuse Horizontal Radiation [W/m2]']),
                max(df['Diffuse Horizontal Radiation [W/m2]']),
            ]
            results.append(county_results)
            # i += 1
            # if i > 1:
            # break
        output_df = pd.DataFrame(results, columns=['state', 'county', 'gisjoin', 'avg temp', 'max temp', 'min temp', 'hdd', 'cdd', 'avg rh', 'average dp', 'max dp', 'min dp', 'avg wind speed', 'max wind speed', 'avg ghr', 'max ghr', 'avg dnr', 'max dnr', 'avg dhr', 'max dhr']).set_index('gisjoin')
        output_df.to_csv(output_file, mode='a', header=not os.path.exists(output_file))


In [53]:
# Remove G, 3rd character after G (0) and add a 1 to the end
import glob
from epw import epw


# prev_calculated = set(pd.read_csv(output_file)['gisjoin'].to_list())

with open('county_lookup.json')as file:
    data = json.load(file)
    for state in data:
        # results = []
        if state in ['AK', 'HI', 'PR']:
            continue
        for county_info in data[state]:
            gisjoin = county_info['gisjoin']
            county = county_info['name']
            county_code = gisjoin[1:3] + gisjoin[4:] + '1'
            for file in glob.glob(f'Data/fTMY3/{state}/**/*{county_code}*', recursive=True):
                year_range = file[-13:-4].replace("_","-")
                print(f'state: {state}, gisjoin: {gisjoin}, county: {county}, year range: {year_range}')
                e = epw()
                e.read(file)
                df = e.dataframe
                hdd = sum([max(0,18.3 - temp) for temp in df['Dry Bulb Temperature']])/24
                cdd = sum([max(0,temp - 18.3) for temp in df['Dry Bulb Temperature']])/24
                county_results = [[
                    state,
                    county,
                    gisjoin,
                    year_range,
                    np.mean(df['Dry Bulb Temperature']),
                    max(df['Dry Bulb Temperature']),
                    min(df['Dry Bulb Temperature']),
                    hdd,
                    cdd,
                    np.mean(df['Relative Humidity']),
                    np.mean(df['Dew Point Temperature']),
                    max(df['Dew Point Temperature']),
                    min(df['Dew Point Temperature']),
                    np.mean(df['Wind Speed']),
                    max(df['Wind Speed']),
                    np.mean(df['Global Horizontal Radiation']),
                    max(df['Global Horizontal Radiation']),
                    np.mean(df['Direct Normal Radiation']),
                    max(df['Direct Normal Radiation']),
                    np.mean(df['Diffuse Horizontal Radiation']),
                    max(df['Diffuse Horizontal Radiation']),
                ]]
                output_df = pd.DataFrame(county_results, columns=['state', 'county', 'gisjoin', 'year range', 'avg temp', 'max temp', 'min temp', 'hdd', 'cdd', 'avg rh', 'average dp', 'max dp', 'min dp', 'avg wind speed', 'max wind speed', 'avg ghr', 'max ghr', 'avg dnr', 'max dnr', 'avg dhr', 'max dhr'])
                folderpath = f'fTMY3_aggregates/{state}/{gisjoin}'
                os.makedirs(folderpath, exist_ok=True)
                output_df.to_csv(folderpath + f'/{year_range}.csv')
                # results.append(county_results)


state: AL, gisjoin: G0100010, county: Autauga County, year range: 2080-2099
state: AL, gisjoin: G0100010, county: Autauga County, year range: 2060-2079
state: AL, gisjoin: G0100010, county: Autauga County, year range: 1980-1999
state: AL, gisjoin: G0100010, county: Autauga County, year range: 2000-2019
state: AL, gisjoin: G0100010, county: Autauga County, year range: 2020-2039
state: AL, gisjoin: G0100010, county: Autauga County, year range: 2040-2059
state: AL, gisjoin: G0100030, county: Baldwin County, year range: 2000-2019
state: AL, gisjoin: G0100030, county: Baldwin County, year range: 1980-1999
state: AL, gisjoin: G0100030, county: Baldwin County, year range: 2040-2059
state: AL, gisjoin: G0100030, county: Baldwin County, year range: 2060-2079
state: AL, gisjoin: G0100030, county: Baldwin County, year range: 2080-2099
state: AL, gisjoin: G0100030, county: Baldwin County, year range: 2020-2039
state: AL, gisjoin: G0100050, county: Barbour County, year range: 2040-2059
state: AL, g