In [2]:
# compares results from dwfs (2022) to dwind model runs (2023)

import os
import numpy as np
import pandas as pd

global states

In [52]:
def get_dwfs_results(app, scenario, state):
    f_dwfs = f'run_{app.upper()}_{scenario}.pkl'
    p_dwfs = '/projects/dwind/dwfs_2022/'
    p_dwfs = os.path.join(p_dwfs, f'{app}_{scenario}', 'dwfs')
    
    p_dwfs_output = os.path.join(p_dwfs, 'output', f_dwfs)
    df_dwfs = pd.read_pickle(p_dwfs_output)
    state_abbr = states[state]
    df_dwfs = df_dwfs[df_dwfs['state_abbr'] == state_abbr]
    
    # get dwfs agent weights
    p_dwfs_weights = os.path.join(p_dwfs, 'analysis', 'weights')
    f_dwfs_weights = os.path.join(p_dwfs_weights, 'lkup_gid_to_weights_100k.csv')
    df_weights = pd.read_csv(f_dwfs_weights)
    df_dwfs = df_dwfs.drop(columns='weight')
    df_dwfs = df_dwfs.merge(df_weights, on='gid', how='left')
    
    return df_dwfs


def get_dwind_results(app, scenario, state):
    f_dwind = f'{state}_{app}_{scenario}.pkl'
    p_dwind = '/projects/dwind/runs_2023'
    p_dwind = os.path.join(p_dwind, state, app, f_dwind)
    df_dwind = pd.read_pickle(p_dwind)
    
    return df_dwind


def merge_results(app, scenario, state):
    # dwind outputs
    df_dwind = get_dwind_results(app, scenario, state)
    
    # dwfs outputs
    df_dwfs = get_dwfs_results(app, scenario, state)
    
    # merge dwind and dwfs dfs
    df = df_dwind.merge(
        df_dwfs,
        on='gid',
        how='inner'
    )
    
    return df

In [53]:
def compare_cols(df, col, diff=False, sum=False, med=False):
    df = df.rename(columns={
        f'{col}_x': f'{col}_dwind',
        f'{col}_y': f'{col}_dwfs'
        # 'weight_y': 'weight'
    })
    
    cols = ['gid', f'{col}_dwind', f'{col}_dwfs']
    df = df[cols]
    
    if diff:
        df['diff'] = df[f'{col}_dwind'] - df[f'{col}_dwfs']
        df['diff'] = abs(df['diff'])
        df['diff_pct'] = df['diff'] / df[f'{col}_dwind']
        
    if sum:
        dwind_sum = np.nansum(df[f'{col}_dwind'].values)
        dwfs_sum = np.nansum(df[f'{col}_dwfs'].values)
        
        diff = abs(dwind_sum - dwfs_sum)
        diff_pct = (diff / dwind_sum) * 100
        diff_pct = round(diff_pct)
        
        print(col, 'sum')
        print('dwind:', dwind_sum)
        print('dwfs:', dwfs_sum)
        print('diff:', f'{diff_pct}%')
        
    if med:
        dwind_med = np.nanmedian(df[f'{col}_dwind'].values)
        dwfs_med = np.nanmedian(df[f'{col}_dwfs'].values)
        
        diff = abs(dwind_med - dwfs_med)
        diff_pct = (diff / dwind_med) * 100
        diff_pct = round(diff_pct)
        
        print(col, 'median')
        print('dwind:', dwind_med)
        print('dwfs:', dwfs_med)
        print('diff:', f'{diff_pct}%')
    
    # print(col)
    # print(df)


def compare_actual(app, scenario, state):
    df = merge_results(app, scenario, state)
    
    cols = {
        # 'pgid',
        # 'hdf_index',
        # 'rev_gid_wind',
        # 'turbine_class',
        # 'turbine_height_m',
        'load_kwh': {
            'sum': True,
            'med': False
        },
        # 'max_demand_kw': {
        #     'sum': False,
        #     'med': True
        # },
        # 'turbine_instances': {
        #     'sum': True,
        #     'med': False  
        # },
        # 'wind_size_kw': {
        #     'sum': True,
        #     'med': False
        # },
        'wind_size_kw_btm': {
            'sum': True,
            'med': False
        },
        'wind_size_kw_techpot': {
            'sum': True,
            'med': False
        },
        'wind_naep': {
            'sum': True,
            'med': False
        },
        'wind_cf': {
            'sum': False,
            'med': True
        },
        # 'wind_aep': {
        #     'sum': True,
        #     'med': False
        # },
        # 'wind_aep_fom': {
        #     'sum': True,
        #     'med': False
        # },
        'wind_breakeven_cost_btm': {
            'sum': False,
            'med': True
        },
        'wind_breakeven_cost_fom': {
            'sum': False,
            'med': False
        }
        # 'wind_pysam_outputs_fom'
    }
    
    for col in cols:
        if cols[col]['sum']:
            compare_cols(df, col, sum=True)
            
        if cols[col]['med']:
            compare_cols(df, col, med=True)
            

    # df.to_csv(f'compare_actual_{state}_{app}_{scenario}.csv', index=False)

In [132]:
def compare_totals_by_col(df_dwind, df_dwfs, col, sum=False, med=False):    
    if sum:
        df_dwfs[col] = df_dwfs[col] * df_dwfs['weight']
        
        dwind_sum = np.nansum(df_dwind[col].values)
        dwfs_sum = np.nansum(df_dwfs[col].values)
        
        diff = abs(dwind_sum - dwfs_sum)
        diff_pct = (diff / dwind_sum) * 100
        diff_pct = round(diff_pct)
        
        print(col, 'sum')
        print('dwind:', dwind_sum)
        print('dwfs:', dwfs_sum)
        print('diff:', f'{diff_pct}%')
        print('\n')
        
    if med:
        dwind_med = np.nanmedian(df_dwind[col].values)
        dwfs_med = np.nanmedian(df_dwfs[col].values)
        
        diff = abs(dwind_med - dwfs_med)
        diff_pct = (diff / dwind_med) * 100
        diff_pct = round(diff_pct)
        
        print(col, 'median')
        print('dwind:', dwind_med)
        print('dwfs:', dwfs_med)
        print('diff:', f'{diff_pct}%')
        print('\n')
        
        # 80th percentile
        # dwind_med = np.nanpercentile(df_dwind[col].values, 80)
        # dwfs_med = np.nanpercentile(df_dwfs[col].values, 80)
        
        # diff = abs(dwind_med - dwfs_med)
        # diff_pct = (diff / dwind_med) * 100
        # diff_pct = round(diff_pct)
        
        # print(col, '80th percentile')
        # print('dwind:', dwind_med)
        # print('dwfs:', dwfs_med)
        # print('diff:', f'{diff_pct}%')
        # print('\n')
        
        # mean
        # dwind_med = np.mean(df_dwind[col].values)
        # dwfs_med = np.mean(df_dwfs[col].values)
        
        # diff = abs(dwind_med - dwfs_med)
        # diff_pct = (diff / dwind_med) * 100
        # diff_pct = round(diff_pct)
        
        # print(col, 'mean')
        # print('dwind:', dwind_med)
        # print('dwfs:', dwfs_med)
        # print('diff:', f'{diff_pct}%')
        # print('\n')

In [139]:
def compare_aggregates(app, scenario, state):
    breakeven = f'wind_breakeven_cost_{app}'
    
    # dwind outputs
    df_dwind = get_dwind_results(app, scenario, state)
    df_dwind = df_dwind[~df_dwind[breakeven].isna()]
    df_dwind = df_dwind[df_dwind[breakeven] > -1]
    
    # print(np.max(df_dwind[breakeven].values))
    # print(np.min(df_dwind[breakeven].values))
    
    # dwfs outputs
    df_dwfs = get_dwfs_results(app, scenario, state)
    df_dwfs = df_dwfs[~df_dwfs[breakeven].isna()]
    df_dwfs = df_dwfs[df_dwfs[breakeven] > -1]
    
    # print(np.max(df_dwfs[breakeven].values))
    # print(np.min(df_dwfs[breakeven].values))

    cols = {
            # 'pgid',
            # 'hdf_index',
            # 'rev_gid_wind',
            # 'turbine_class',
            # 'turbine_height_m',
            # 'load_kwh': {
            #     'sum': True,
            #     'med': False
            # },
            # 'max_demand_kw': {
            #     'sum': False,
            #     'med': True
            # },
            # 'turbine_instances': {
            #     'sum': True,
            #     'med': False  
            # },
            f'wind_size_kw_{app}': {
                'sum': True,
                'med': False
            },
            # 'wind_naep': {
            #     'sum': True,
            #     'med': False
            # },
            # 'wind_aep': {
            #     'sum': True,
            #     'med': False
            # },
            # 'wind_cf': {
            #     'sum': False,
            #     'med': True
            # },
            f'wind_breakeven_cost_{app}': {
                'sum': False,
                'med': True
            }
            # 'wind_pysam_outputs_fom'
        }
        
    for col in cols:
        if cols[col]['sum']:
            compare_totals_by_col(df_dwind, df_dwfs, col, sum=True)
            
        if cols[col]['med']:
            compare_totals_by_col(df_dwind, df_dwfs, col, med=True)

In [136]:
# completed states
states = {
    'minnesota': 'MN'
    # 'wisconsin',
    # 'iowa',
    # 'illinois'
    # 'indiana',
    # 'michigan',
    # 'ohio',
    # 'newyork',
    # 'kentucky',
    # 'california'
}

# completed scenarios
scenarios = [
    # 'baseline_2022'
    'baseline_2035'
]

In [140]:
# fom
for state in states:
    for scenario in scenarios:
        # compare_actual('fom', scenario, state)
        compare_aggregates('fom', scenario, state)
        
        

wind_size_kw_fom sum
dwind: 263161505.0
dwfs: 460721404.3616895
diff: 75%


wind_breakeven_cost_fom median
dwind: 1379.3136363957437
dwfs: 761.9151258693616
diff: 45%




In [141]:
# btm
for state in states:
    for scenario in scenarios:
        # compare_actual('btm', scenario, state)
        compare_aggregates('btm', scenario, state)

wind_size_kw_btm sum
dwind: 52249385.0
dwfs: 2660923.9309899053
diff: 95%


wind_breakeven_cost_btm median
dwind: 4011.0552508085257
dwfs: 6018.089551565368
diff: 50%




In [None]:
# mn fom summary: less technical potential in dwind, higher breakeven costs in dwind
# mn btm summary: more technical potential in dwind, lower breakeven cost in dwind