In [None]:
import json
import numpy as np
import pandas as pd
from pprint import pprint
import gzip
import os

from powersimdata.scenario.scenario import Scenario
from postreise.analyze.transmission.utilization import generate_cong_stats, get_utilization

from data_processing.azure_blob_uploaders import BlobUtil
from pathlib import Path


## Loadzone2state

In [None]:

# TODO: this will cause issues if two loadzones have the same name
loadzone2state = {
    "Bay Area": "California",
    "Central California": "California",
    "Northern California": "California",
    "Southeast California": "California",
    "Southwest California": "California",

    "Florida Panhandle": "Florida",
    "Florida North": "Florida",
    "Florida South": "Florida",

    "Georgia North": "Georgia",
    "Georgia South": "Georgia",

    "Chicago North Illinois": "Illinois",
    "Illinois Downstate": "Illinois",

    "Michigan Northern": "Michigan",
    "Michigan Southern": "Michigan",

    "Minnesota Northern": "Minnesota",
    "Minnesota Southern": "Minnesota",

    "Missouri East": "Missouri",
    "Missouri West": "Missouri",

    "Montana Eastern": "Montana",
    "Montana Western": "Montana",

    "New York City": "New York",
    "Upstate New York": "New York",

    "Western North Carolina": "North Carolina",

    "New Mexico Eastern": "New Mexico",
    "New Mexico Western": "New Mexico",

    "Ohio River": "Ohio",
    "Ohio Lake Erie": "Ohio",

    "Pennsylvania Eastern": "Pennsylvania",
    "Pennsylvania Western": "Pennsylvania",

    "Coast": "Texas",
    "East": "Texas",
    "East Texas": "Texas",
    "El Paso": "Texas",
    "Far West": "Texas",
    "North": "Texas",
    "North Central": "Texas",
    "South": "Texas",
    "South Central": "Texas",
    "Texas Panhandle": "Texas",
    "West": "Texas",

    "Virginia Mountains": "Virginia",
    "Virginia Tidewater": "Virginia",
}

# Save dataframes to a json file

In [None]:

def save_dfs_to_json_file(dataframes, path):
    df_records = []
    
    print("Getting dataframe records...")
    for df in dataframes:
        # Create a list of df rows
        df_records += df.to_dict('records')
        print(".")
    
    print("Done getting records")
    print("Saving to file:", path)
    with open(path, 'w') as fp:
        json.dump(df_records, fp)

    print(f"Done! file saved at {path}")

# Process Scenario Data

# Plant

We need to upload all rollups for the data because
....but not in python (╯°□°）╯︵ ┻━┻

At some point we may want more rollups like week and month
but for now we do not need them

All the rollups we will create
We do not store plant x day in cosmos
```python
{
    'by_plant': {
        'by_day': {
            'with_curtailment': None,
            'non_renewable': None
        },
        'by_year': {
            'with_curtailment': None,
            'non_renewable': None
        }
    },
    'by_zone': {
        'by_day': {
            'with_curtailment': None,
            'non_renewable': None
        },
        'by_year': {
            'with_curtailment': None,
            'non_renewable': None
        }
    },
    'by_interconnect': {
        'by_day': {
            'with_curtailment': None,
            'non_renewable': None
        },
        'by_year': {
            'with_curtailment': None,
            'non_renewable': None
        }
    },
}
```

In [None]:
# PLANT x DAY
# This makes the rest of the rollups easy!

def get_plant_by_day(scenario_id, grid, pg):

    print("\ngetting PLANT x DAY \n")
    # Roll up data by day
    pg = pg.resample('d').sum()

    # Turn timestamps into strings
    pg.index = pg.index.strftime('%Y-%m-%d')

    # Unpivot data so cols are now timestamp, plant id, generation value
    # There are now ~900k rows: 2.5k plants * 366 days 
    pg = pg.reset_index().melt(id_vars='UTC')

    pg.columns = ['timestamp', 'plant_id', 'generation']

    # Add extra data
    pg['scenario_id'] = scenario_id
    pg['LOC_ROLLUP'] = 'PLANT'
    pg['TIME_ROLLUP'] = 'DAY'
    pg = pg.join(grid.plant[['zone_name', 'type', 'interconnect']], on='plant_id')
    pg = pg.rename(columns={ 'type': 'resource_type', 'zone_name': 'zone' })
    
    # Replace loadzone with state name
    pg['zone'] = pg['zone'].replace(loadzone2state)

    # order cols nicely
    pg = pg[['plant_id', 'timestamp', 'resource_type', 'zone', 'interconnect', 'scenario_id', 'LOC_ROLLUP', 'TIME_ROLLUP', 'generation']]

    # TODO: make this cleaner...
    pg_renewable = pg.loc[(pg['resource_type'] == 'solar') | (pg['resource_type'] == 'wind') | (pg['resource_type'] == 'offshore_wind')]
    pg_nonrenewable = pg.loc[(pg['resource_type'] != 'solar') & (pg['resource_type'] != 'wind') & (pg['resource_type'] != 'offshore_wind')]

    # Separate renewables and non-renewables so we can add curtailment to wind and solar
    #     print("RENEWABLE")
    #     print(pg_renewable)
    #     print()
    #     print("NON RENEWABLE")
    #     print(pg_nonrenewable)

    return [pg_renewable, pg_nonrenewable]

## Curtailment

In [None]:
def get_curtailment_by_day(scenario_id, grid, pg, pg_renewables, wind, solar):
    print("\ngetting CURTAILMENT x DAY \n")
    
    pg_by_day = pg.resample('d').sum()
    wind_by_day = wind.resample('d').sum()
    solar_by_day = solar.resample('d').sum()
    
    wind_plants = grid.plant.loc[(grid.plant['type'] == 'wind')].index
    offshore_wind_plants = grid.plant.loc[(grid.plant['type'] == 'wind_offshore')].index
    solar_plants = grid.plant.loc[(grid.plant['type'] == 'solar')].index
    
    
    curtailed_wind = wind_by_day[wind_plants] - pg_by_day[wind_plants]
    curtailed_offshore_wind = wind_by_day[offshore_wind_plants] - pg_by_day[offshore_wind_plants]
    curtailed_solar = solar_by_day[solar_plants] - pg_by_day[solar_plants]

    # Merge
    curtailed = pd.merge(curtailed_wind, curtailed_solar, left_index=True, right_index=True)
    curtailed = pd.merge(curtailed, curtailed_offshore_wind, left_index=True, right_index=True)

    # Turn timestamps into strings
    curtailed.index = curtailed.index.strftime('%Y-%m-%d')

    # Unpivot data so cols are now timestamp, plant id, value
    curtailed = curtailed.reset_index().melt(id_vars='UTC')

    curtailed.columns = ['timestamp', 'plant_id', 'curtailment']

    # join curtailed and pg_renewables
    print("curtailed df shape", curtailed.shape, "pg renewables shape", pg_renewables.shape)
    pg_and_curtailment = pd.merge(pg_renewables, curtailed,  how='left', on=['timestamp', 'plant_id'])

    #     print('Num rows where curatilment > 0:', len(pg_and_curtailment.loc[(pg_and_curtailment['curtailment'] > 0)].index))
    #     print()
    #     print(pg_and_curtailment)
    return pg_and_curtailment


## Plant rollups

In [None]:
# All rollups at once
def create_list_of_pg_rollups(pg_and_curtailment, pg_nonrenewable):
    pg_dfs = []
    for loc_rollup in ['PLANT', 'ZONE', 'INTERCONNECT', 'USA']:
        for time_rollup in ['DAY', 'YEAR']:
            # Skip plant x day, we already have it
            if loc_rollup == 'PLANT' and time_rollup == 'DAY':
                continue
                
            print(f"getting {loc_rollup} x {time_rollup}")
            

            for pg in [pg_and_curtailment.copy(), pg_nonrenewable.copy()]:
                pg['LOC_ROLLUP'] = loc_rollup
                pg['TIME_ROLLUP'] = time_rollup

                groupby_cols = [
                    'timestamp', 
                    'plant_id', 'zone', 'interconnect', 'scenario_id', 
                    'LOC_ROLLUP', 'TIME_ROLLUP', 
                    'resource_type'
                ]

                if time_rollup == 'YEAR':
                    pg['timestamp'] = '2016'
                    
                if loc_rollup == 'ZONE':
                    pg = pg.drop(columns=['plant_id'])
                    groupby_cols.remove('plant_id')
                    
                elif loc_rollup == 'INTERCONNECT':
                    pg = pg.drop(columns=['plant_id', 'zone'])
                    groupby_cols.remove('plant_id')
                    groupby_cols.remove('zone')
                    
                elif loc_rollup == 'USA':
                    pg = pg.drop(columns=['plant_id', 'zone', 'interconnect'])
                    groupby_cols.remove('plant_id')
                    groupby_cols.remove('zone')
                    groupby_cols.remove('interconnect')

                pg = pg.groupby(groupby_cols).sum()
                pg = pg.reset_index()
                
                # Round generation and curtailment values
                pg['generation'] = pg['generation'].apply(lambda x: round(x, 2))
                if 'curtailment' in pg.columns:
                    pg['curtailment'] = pg['curtailment'].apply(lambda x: round(x, 2))

                pg_dfs.append(pg)
                # print(pg)
                # print()

    print()
    print("Done!")
    return pg_dfs

# Branch

In [None]:
# Branch does not need rollups like Plant because for now we're only looking at data for the whole year

def process_branch_data(scenario_id, scenario, grid, pf):
    print(f"start pf processing for {scenario_id}")
    branch = grid.branch.loc[(grid.branch['rateA'] != 0) & (grid.branch['branch_device_type'] == 'Line')]

    print("getting utilization data")
    
    # UTILIZATION DATA
    util_median = get_utilization(branch, pf, median=True).T
    util_median = util_median.rename(columns={0: 'median_utilization'})
    
    print("getting congestion stats")
    
    # RISK DATA (congestion)
    congestion_stats = generate_cong_stats(pf, grid.branch)[['risk', 'bind']]

    # Combine
    branch_with_util_and_cong = pd.concat([util_median, congestion_stats], axis=1)
    #print("Median util rows that are NAN:", branch_with_util_and_cong.loc[branch_with_util_and_cong['median_utilization'] == np.nan].size)
    #print("Median util rows that are NOT NAN:", branch_with_util_and_cong.loc[branch_with_util_and_cong['median_utilization'] != np.nan].size)
    
    # Replace any NANs with 0 for binding and risk
    # TODO: check with team
    branch_with_util_and_cong = branch_with_util_and_cong.fillna(0)
    
    # Set index as col so we keep the branch id when writing records to dict
    branch_with_util_and_cong['branch_id'] = branch_with_util_and_cong.index
    branch_with_util_and_cong = branch[['from_zone_name', 'to_zone_name', 'interconnect']].join(branch_with_util_and_cong, on='branch_id')
    branch_with_util_and_cong = branch_with_util_and_cong.rename(columns={ 'from_zone_name': 'from_zone', 'to_zone_name': 'to_zone' })
    
    # Replace loadzone with state name
    branch_with_util_and_cong[['from_zone', 'to_zone']] = branch_with_util_and_cong[['from_zone', 'to_zone']].replace(loadzone2state)
    
    branch_with_util_and_cong['scenario_id'] = scenario_id
    branch_with_util_and_cong['LOC_ROLLUP'] = 'BRANCH'
    branch_with_util_and_cong['TIME_ROLLUP'] = 'YEAR'
    #print()
    #print("Unique cong risk rows:", branch_with_util_and_cong['risk'].unique().size)
    #print("Cong bind rows greater than 0", branch_with_util_and_cong.loc[branch_with_util_and_cong['bind'] != 0].size)
    #print()
    #print(branch_with_util_and_cong)
    return branch_with_util_and_cong


# Process pg and pf

In [None]:
def process_pg_and_pfdata(scenario_id, path):
    
    print(f"starting scenario {scenario_id}\n")
    s = Scenario(str(scenario_id))
    grid = s.state.get_grid()

    print("\nstarting pg processing\n")
    pg = s.state.get_pg()
    wind = s.state.get_wind()
    solar = s.state.get_solar()

    [pg_renewable, pg_nonrenewable] = get_plant_by_day(scenario_id, grid, pg)
    pg_and_curtailment_by_day = get_curtailment_by_day(scenario_id, grid, pg, pg_renewable, wind, solar)

    pg_rollups = create_list_of_pg_rollups(pg_and_curtailment_by_day, pg_nonrenewable)

    print("\npg processing finished, saving to file\n")

    save_dfs_to_json_file(pg_rollups, path + f'pg_data_{scenario_id}.json')
    print(f"pg file save complete for {scenario_id}\n")


#     pf = s.state.get_pf()
#     processed_pf = process_branch_data(scenario_id, s, grid, pf)
#     print(f"\nfinished processing pf for scenario {scenario_id}\n")
#     print("saving file")
#     save_dfs_to_json_file([processed_pf], path + f'pf_data_{scenario_id}.json')
#     print(f"pf file save complete for scenario {scenario_id}")
        

# Run everything

In [None]:
%%time

path = ''

scenario_ids = [544, 556, 573, 585, 594, 612, 823, 824, 1097, 1098, 1099, 1149, 1151, 1152, 1176, 1177, 1204, 
               1205, 1206, 1242, 1244, 1245, 1257, 1258, 1270, 1705, 1724, 1723]

    
for scenario_id in scenario_ids:
    process_pg_and_pfdata(scenario_id, path)
    
print("\nfinished!!")


# Grid Plant Data
```python
grid_plant = [
    {
        capacity: 123,
        resource_type: 'wind',
        coords: [-45.4, 90.1],                  # [lon, lat]
        zone: 'Washington',
        interconnect: 'Western',
        generation: 512034,                 # Generation for year in MW
        curtailment: 152                    # Curtailment for year in MW
    },
    ...
]
```

# Grid Branch Data
```python
grid_branch = [
    {
        capacity: 123,
        coords: [[-45, 90], [-46, 91]],     # [[from_lon, from_lat], [to_lon, to_lat]]
        zone: ['Washington', 'Washington'], # [from_zone, to_zone]
        interconnect: 'Western',
    },
    ...
]
```

# New grid functions

In [None]:
# TODO: fix SettingWithCopyWarning

# Combines columns into a new one. Drops old columns.
def combine_cols(df, cols, new_col_name):
    df_copy = df.copy()
    df_copy[new_col_name] = df_copy[cols].values.tolist()
    return df_copy.drop(columns=cols)

def get_pref_suf(string, prefix, suffix):
    return f"{prefix}{string}{suffix}"

# Takes a bus_id col and replaces it with a column with [lon, lat]
# also adds zone and interconnect 
# optionally, can round coordinates and then combine items with the same coordinates
def bus2coords(
    df, 
    grid, 
    bus_col_name='bus_id', 
    col_prefix="", 
    col_suffix="", 
    drop_bus_col=True, # Should be true if we are coordinate rounding
    coordinate_rounding=0, 
    groupby_cols=[], # for coord rounding
    agg_method={}    # for coord rounding
):
    bus = grid.bus[["lon", "lat", "zone_id", "interconnect"]]
    bus = bus.rename(columns={ 'zone_id': 'zone' })
    
    # Get zone info
    bus['zone'] = bus['zone'].replace(grid.id2zone)
    bus['zone'] = bus['zone'].replace(loadzone2state)
    
    # We need to update column names before joining tables
    bus = bus.add_prefix(col_prefix).add_suffix(col_suffix)
        
    # Get new column names for later
    # TODO: is there a more elegant way to do this?
    lat_col = get_pref_suf("lat", col_prefix, col_suffix)
    lon_col = get_pref_suf("lon", col_prefix, col_suffix)
    interconnect_col = get_pref_suf("interconnect", col_prefix, col_suffix)
    zone_col = get_pref_suf("zone", col_prefix, col_suffix)
    coords_col = get_pref_suf("coords", col_prefix, col_suffix)
    
    # Add location info to original df
    new_df = df.join(bus, bus_col_name)
    if (drop_bus_col):
        new_df = new_df.drop(columns=bus_col_name)
    
    # If we are coordinate_rounding, combine rows by location and groupby_cols 
    # Aggregate other cols by agg_method
    if (coordinate_rounding):
        new_df[lat_col] = new_df[lat_col].round(coordinate_rounding)
        new_df[lon_col] = new_df[lon_col].round(coordinate_rounding)

        new_df = new_df.groupby([lat_col, lon_col, interconnect_col, zone_col] + groupby_cols).agg(agg_method)
        new_df = new_df.reset_index()
    
    # combine lat and lon into coords col
    new_df = combine_cols(new_df, [lon_col, lat_col], coords_col)
    return new_df

def get_grid_plant(grid, pg, wind, solar):
    # get plant df
    plant = grid.plant[['Pmax', 'type', 'bus_id']]
    plant = plant.rename(columns={ 
        'type': 'resource_type', 
        'Pmax': 'capacity'
    })

    # get generation and curtailment
    pg_year = pg.resample('y').sum().T
    wind_year = wind.resample('y').sum().T
    solar_year = solar.resample('y').sum().T

    pg_year.columns = ['generation']
    wind_year.columns = ['available']
    solar_year.columns = ['available']

    # curtailment
    wind_and_solar = wind_year.append(solar_year)
    wind_and_solar = wind_and_solar.join(pg_year)
    wind_and_solar['curtailment'] = wind_and_solar['available'] - wind_and_solar['generation']

    # combine grid plant, pg, and curtailment
    pg_and_curtailment = pg_year.join(wind_and_solar[['curtailment']])
    plant = plant.join(pg_and_curtailment)
    plant = plant.fillna(value=0)
    plant = plant.reset_index()
    
    # combine plants by location. Sum capacity, generation, and curtailment
    plant = bus2coords(
        plant, 
        grid, 
        coordinate_rounding=1, 
        groupby_cols=["resource_type"], 
        agg_method={"generation": "sum", "capacity": "sum", "curtailment": "sum"}
    )
    
    print(plant)
    return plant

# We don't combine storage and plant because the ids overlap
# Also we're not graphing yearly generation for storage
# Storage constantly charges and discharges, so generation for 
# the year should be close to zero. There are some inefficiencies, 
# so technically generation is negative
def get_grid_storage(grid):
    storage = grid.storage['gen'][['bus_id', 'Pmax']]
    storage = storage.rename(columns={ 
        'Pmax': 'capacity' 
    })
    storage = bus2coords(storage, grid)
    return storage

def get_grid_branch(grid, pf):
    branch = grid.branch.loc[(grid.branch['rateA'] != 0) 
                             & (grid.branch['branch_device_type'] == 'Line')]

    branch = branch[['rateA', 'from_bus_id', 'to_bus_id', 'interconnect']]
    branch = branch.rename(columns={ 'rateA': 'capacity' })

    branch = bus2coords(branch, grid, bus_col_name='from_bus_id', col_prefix="from_")
    branch = bus2coords(branch, grid, bus_col_name='to_bus_id', col_prefix="to_")
    branch = combine_cols(branch, ['from_coords', 'to_coords'], 'coords')
    branch = combine_cols(branch, ['from_zone', 'to_zone'], 'zone')
    branch = branch.drop(columns=['from_interconnect', 'to_interconnect'])

    print("getting util")
    # UTILIZATION DATA
    util_median = get_utilization(grid.branch, pf, median=True).T
    util_median = util_median.rename(columns={0: 'median_utilization'})

    print("getting congestion stats")

    # RISK DATA (congestion)
    congestion_stats = generate_cong_stats(pf, grid.branch)[['risk', 'bind']]

    # Combine
    util_and_cong = pd.concat([util_median, congestion_stats], axis=1)

    # Replace any NANs with 0 for binding and risk
    util_and_cong = util_and_cong.fillna(0)

    # Set index as col so we keep the branch id when writing records to dict
    util_and_cong = util_and_cong.reset_index()
    branch_with_util_and_cong = branch.join(util_and_cong, on='branch_id')
    branch_with_util_and_cong = branch_with_util_and_cong.reset_index()
    
    print()
    print(branch_with_util_and_cong)
    
    return branch_with_util_and_cong

# TODO: do we need other data?
def get_grid_dcline(grid):
    dcline = grid.dcline[['Pmax', "from_bus_id", "to_bus_id"]]
    dcline = dcline.rename(columns={ 'Pmax': 'capacity' })

    dcline = bus2coords(dcline, grid, bus_col_name='from_bus_id', col_prefix="from_")
    dcline = bus2coords(dcline, grid, bus_col_name='to_bus_id', col_prefix="to_")
    dcline = combine_cols(dcline, ['from_coords', 'to_coords'], 'coords')
    dcline = combine_cols(dcline, ['from_interconnect', 'to_interconnect'], 'interconnect')
    dcline = dcline.drop(columns=['from_zone', 'to_zone'])
    
    return dcline

def get_grid_lmp(grid, lmp):
    lmp_mean = lmp.mean()
    lmp_mean = lmp_mean.to_frame()
    lmp_mean.columns = ['lmp_mean']
    
    lmp_and_voltage = lmp_mean.join(grid.bus[['baseKV']])
    lmp_and_voltage = lmp_and_voltage.rename(columns={'baseKV': 'voltage_level'})

    # copy the index (bus_id) to a column so we can call bus2coords
    lmp_and_voltage['bus_id'] = lmp_and_voltage.index
    lmp_with_coords = bus2coords(lmp_and_voltage, grid, drop_bus_col=False)
    
    return lmp_with_coords
    
def get_plant_records(s, grid):
    pg = s.state.get_pg()
    wind = s.state.get_wind()
    solar = s.state.get_solar()

    plant_records = get_grid_plant(grid, pg, wind, solar).to_dict('records')
    for plant in plant_records:
        plant["capacity"] = round(plant["capacity"], 2)
        plant["generation"] = round(plant["generation"], 2)
        plant["curtailment"] = round(plant["curtailment"], 2)
        
    return plant_records


In [None]:
# Converting to records creates trailing numbers due to floating 
# point arithmetic so we round after converting to records 
# Other note: our local saved files have the .json.gzip extension, 
# but the blob storage files are just .json. The reason for this is
# because it's important to know the local files are gzipped.
# Deck.gl, however, only works with the .json extension and lets the
# browser handle un-zipping the files
def create_and_upload_grid_files_for_scenario(
        scenario_id,
        data_types,
        blob_client, 
        path, 
        version):
    print(f"Creating grid files for {scenario_id}")

    s = Scenario(scenario_id)
    grid = s.get_grid()
    
    blob_path=f"{version}/{scenario_id}"
    
    # Create local path if it doesn't exist
    local_save_path=f"{path}/{version}/{scenario_id}"
    Path(local_save_path).mkdir(parents=True, exist_ok=True)
    
    if ("plant" in data_types):
        print("\nCreating plant data...")
        plant_records = get_plant_records(s, grid)

        print("Uploading plant data")
        blob_client.upload_dict_as_json_gzip(
            plant_records, 
            f"{local_save_path}/plant.json.gzip",
            f"{blob_path}/plant.json");
        
    if ("emissions" in data_types):
        if ("plant" in data_types):
            print("\nUsing existing plant data for emissions...")
            emissions_records = plant_records
        else:
            print("\nCreating emissions data...")
            emissions_records = get_plant_records(s, grid)
        
        emissions_records = list(filter(
            lambda plant: plant["resource_type"] == "coal" or plant["resource_type"] == "ng", 
            emissions_records
        ))
        
        print("Uploading emissions data")
        blob_client.upload_dict_as_json_gzip(
            emissions_records, 
            f"{local_save_path}/emissions.json.gzip",
            f"{blob_path}/emissions.json");
        
    if ("storage" in data_types):
        print("\nCreating storage data...")
        storage_records = get_grid_storage(grid).to_dict('records')
        for storage in storage_records:
            storage["capacity"] = round(storage["capacity"], 2)
        
        print("Uploading storage data")
        blob_client.upload_dict_as_json_gzip(
            storage_records, 
            f"{local_save_path}/storage.json.gzip",
            f"{blob_path}/storage.json");
    
    if ("branch" in data_types):
        print("\nCreating branch data...")
        pf = s.state.get_pf()
        branch_records = get_grid_branch(grid, pf).to_dict('records')
        for branch in branch_records:
            branch["capacity"] = round(branch["capacity"], 2)
            branch["median_utilization"] = round(branch["median_utilization"], 3)
            branch["risk"] = round(branch["risk"], 2)
            branch["bind"] = round(branch["bind"], 2)

        print("Uploading branch data")
        blob_client.upload_dict_as_json_gzip(
            branch_records, 
            f"{local_save_path}/branch.json.gzip",
            f"{blob_path}/branch.json");
        
    if ("dcline" in data_types):
        print("\nCreating dcline data...")
        dcline_records = get_grid_dcline(grid).to_dict('records')
        print("Uploading dcline data")
        for dcline in dcline_records:
            dcline["capacity"] = round(dcline["capacity"], 2)
            
        print("Uploading dcline data")
        blob_client.upload_dict_as_json_gzip(
            dcline_records, 
            f"{local_save_path}/dcline.json.gzip",
            f"{blob_path}/dcline.json");
        
    if ("lmp" in data_types):
        print("\nCreating lmp data...")
        lmp = s.state.get_lmp()
        lmp_records = get_grid_lmp(grid, lmp).to_dict('records')
        for lmp in lmp_records:
            lmp["lmp_mean"] = round(lmp["lmp_mean"], 2)
            lmp["voltage_level"] = round(lmp["voltage_level"], 2)
            
        print("Uploading lmp data")
        blob_client.upload_dict_as_json_gzip(
            lmp_records, 
            f"{local_save_path}/lmp.json.gzip",
            f"{blob_path}/lmp.json");


# Run new grid code

In [None]:
scenario_ids = ["824", "1270"] # list of strings
data_types = ["plant", "emissions", "storage", "branch", "dcline", "lmp"] # options: ["plant", "emissions", "storage", "branch", "dcline", "lmp"]
version = "v2" # e.g. "v1"
local_path = "./grid-data" # Don't include slash at the end
conn_str = os.environ.get("BLOB_STORAGE_CONN_STR")

for scenario_id in scenario_ids:
    create_and_upload_grid_files_for_scenario(
        scenario_id,
        data_types,
        BlobUtil(conn_str, 'grid-data'), 
        local_path, 
        version)