## Prepare distance to infrastructure data

`cerf` provides two different options for calculating the distance to near suitable transmission infrastructure: 

1. Get the distance to the nearest substation and gas pipeline within the utility zone in which the power plant is being sited.

2. Get the distance to the nearest substation and gas pipeline regardless of whether or not they are within the utility zone in which the power plant is being sited.


## Explore some of the package data

##### View a data frame of the sample LMP data

In [None]:
lmp_df = cerf.generate_random_lmp_dataframe()

lmp_df.head()


##### View the available suitability raster files contained in the package

In [None]:
suitability_file_list = cerf.list_available_suitability_files()

suitability_file_list


##### View the sample configuration file for year 2010 as a dictionary

In [None]:
config_file = cerf.load_sample_config(yr=2010)

config_file


##### Get the file name containing the sample utility zones raster

In [None]:
zones_file = cerf.sample_utility_zones_raster_file()

zones_file


In [3]:
import logging
import pkg_resources

import numpy as np
import pandas as pd

import cerf


In [4]:
f = pkg_resources.resource_filename('cerf', 'data/illustrative_lmp_8760-per-zone_dollars-per-mwh.zip')

In [58]:
f

'/Users/d3y010/repos/github/cerf/cerf/data/illustrative_lmp_8760-per-zone_dollars-per-mwh.zip'

In [59]:
rng = np.arange(0.1, 100.0, 0.1)

df = pd.read_csv(f)

for i in range(1, 58):
    
    s = str(i)
    
    df[s] = df[s] + np.random.choice(rng)
    
df.to_csv('/Users/d3y010/repos/github/cerf/cerf/data/illustrative_lmp_8760-per-zone_dollars-per-mwh.csv', index=False)

In [55]:
df.head()

Unnamed: 0,hour,1,2,3,4,5,6,7,8,9,...,48,49,50,51,52,53,54,55,56,57
0,1,114.421922,194.776176,119.763664,236.492993,296.551151,380.2001,225.272673,505.472072,40.917417,...,110.203804,121.140641,229.07998,149.031732,50.487688,178.611512,577.054454,143.450751,225.524324,134.104204
1,2,240.518018,71.723123,120.524424,252.278779,185.336937,180.632533,280.237638,579.893493,122.318819,...,450.063664,86.145646,171.832733,120.883584,217.855055,252.405305,227.831231,276.258559,89.348148,158.638739
2,3,116.514014,83.895295,218.282182,371.667167,221.853453,144.686587,286.513914,525.960561,221.217718,...,221.845445,177.627127,98.229129,227.58028,83.961161,136.95986,496.004404,127.094394,202.130931,102.152252
3,4,178.516016,151.032432,222.466366,249.235736,83.394995,76.027928,129.797197,241.220821,506.421922,...,282.516116,106.495996,273.964865,230.242943,169.546747,234.527427,124.557958,161.899199,217.155956,140.570671
4,5,275.7002,130.491892,231.405305,164.791291,78.83043,135.747648,269.967367,279.258859,274.118619,...,187.230831,207.296797,197.128028,117.840541,169.356557,381.765666,476.721121,101.989289,201.56036,158.638739


In [57]:
df.min()

hour      1.0
1        62.5
2        58.6
3        86.1
4        73.5
5        48.4
6        28.1
7       102.6
8        90.4
9        33.5
10       46.1
11       58.0
12       94.5
13       35.1
14       92.6
15       13.2
16       41.5
17       87.1
18       96.5
19       58.1
20       73.8
21       22.3
22       70.8
23       50.2
24      103.7
25       64.3
26       24.6
27       28.2
28       74.2
29       80.6
30       13.2
31       10.2
32       20.2
33       99.4
34       74.5
35      109.0
36       29.2
37       91.4
38       86.1
39       73.4
40       65.1
41       73.6
42       37.5
43       81.4
44       53.3
45       86.9
46       14.1
47       25.4
48      106.4
49       40.5
50       89.1
51       97.3
52       32.8
53      107.1
54       96.6
55       72.7
56       61.2
57       79.9
dtype: float64

In [None]:
# number of technologies
n_technologies = len(self.technology_dict)

lmp_arr = np.zeros(shape=(n_technologies, self.zones_arr.shape[0], self.zones_arr.shape[1]))

for index, i in enumerate(self.technology_order):

    # assign the correct LMP based on the capacity factor of the technology
    start_index, through_index = self.get_cf_bin(self.technology_dict[i]['capacity_factor'])

    # get the LMP file for the technology from the configuration file
    lmp_file = self.technology_dict[i].get('utility_zone_lmp_file', None)

    # use illustrative default if none provided
    if lmp_file == "None":

        # default illustrative LMP file
        lmp_file = pkg_resources.resource_filename('cerf', 'data/illustrative_lmp_8760-per-zone_dollars-per-mwh.zip')

        # generate a random multiplier to change the data up to positive 30 percent per tech
        modify_percent = np.random.choice(np.arange(1.01, 1.3, 0.01))

        # apply to lmp data frame
        lmp_df = pd.read_csv(lmp_file) * modify_percent

        logging.info(f"Using LMP from default illustrative package data for {self.technology_dict[i]['tech_name']}")

    else:
        logging.info(f"Using LMP file for {self.technology_dict[i]['tech_name']}:  {lmp_file}")
        lmp_df = pd.read_csv(lmp_file)

    # make a copy of the data frame
    df_sorted = lmp_df.copy()
    df_sorted.drop('hour', axis=1, inplace=True)

    # sort by descending lmp for each zone
    for i in df_sorted.columns:
        df_sorted[i] = df_sorted[i].sort_values(ascending=False).values

    # create a dictionary of LMP values for each power zone based on tech capacity factor
    lmp_dict = df_sorted.iloc[start_index:through_index].mean(axis=0).to_dict()
    lmp_dict = {int(k): lmp_dict[k] for k in lmp_dict.keys()}

    # add in no data
    lmp_dict[self.lmp_zone_dict['utility_zone_raster_nodata_value']] = np.nan

    # create LMP array for the current technology
    lmp_arr[index] = np.vectorize(lmp_dict.get)(self.zones_arr)

In [2]:
import pandas as pd


In [7]:

import pandas as pd
import pickle_utils

f = '/Users/d3y010/Downloads/new_tas_values.pkl'


df = pickle_utils.load(f, compression='zip')


AttributeError: Can't get attribute 'new_block' on <module 'pandas.core.internals.blocks' from '/Users/d3y010/.pyenv/versions/py3.9.4_cerf/lib/python3.9/site-packages/pandas/core/internals/blocks.py'>