# EnergyAnalysis Class
Calculate the solar supply, demand, storage left, and storage demand with given data of area (default as California)
...

## Import packages

In [2]:
import pandas as pd
from datetime import datetime
import matplotlib.pyplot as plt
import h5pyd

ModuleNotFoundError: No module named 'h5pyd'

## Class implementation

### Energy Supply Analysis

In [3]:
class EnergySupplyAnalysis:
    '''
    The class for energy supply analysis for given capacity and GHI data

    Parameter
    ---------
    capacity: int
        Utility-scale solar power capacity
        unit: Mega Watts
    
    solarDataFile: str
        Solar data file path
    '''
    def __init__(self, f: h5pyd._hl.files.File, capacity=17500) -> None:
        assert isinstance(capacity, float) or isinstance(capacity, int)
        assert isinstance(f, h5pyd._hl.files.File)
        
        self.capacity = capacity
        dset = f['ghi']
        factor = f['ghi'].attrs['psm_scale_factor']
        self.__solarData = pd.DataFrame()
        self.__solarData['Date'] = pd.to_datetime(f['time_index'][...].astype(str))
        self.__solarData['GHI'] = dset / factor
        self.__solarData['Supply'] = self.__solarData['ghi'] * capacity / 1000.
    
    def getEnergySupply(self) -> pd.DataFrame:
        return self.__solarData

NameError: name 'h5pyd' is not defined

In [4]:
class EnergySupplyAnalysisCSV:
    '''
    The class for energy supply analysis for given capacity and GHI data

    Parameter
    ---------
    capacity: int
        Utility-scale solar power capacity
        unit: Mega Watts
    
    solarDataFile: str
        Solar data file path
    '''
    def __init__(self, capacity=17500, solarDataFile: str='../raw_data/midCalifornia_GHI.csv') -> None:
        assert isinstance(capacity, float) or isinstance(capacity, int)
        assert isinstance(solarDataFile, str)

        self.solarData = pd.read_csv(solarDataFile)
        self.capacity = capacity
        self.__energySupply()

    def __energySupply(self) -> pd.DataFrame:
        '''
        Calculate the supply with given solar and solar panel capacity data

        Returns
        -------
        pandas.DataFrame
            | Year | Month | Day | Hour | Minute | GHI | Supply |
            |------|-------|-----|------|--------|-----|--------|
        
            Year: int
            Month: int
            Day: int
            Hour: int
            Minute: int
            GHI: int
            Supply: float
        '''
        self.__supplyDF = self.solarData
        self.__supplyDF['Supply'] = self.__supplyDF['GHI'] * self.capacity / 1000
    
    def getEnergySupply(self) -> pd.DataFrame:
        return self.__supplyDF

### Energy Demand Analysis

In [5]:
class EnergyDemandAnalysis:
    '''
    The class for energy demand analysis for given electrical data

    Parameter
    ---------
    energyDataFile: str
        Electrical demand data file path
    '''
    def __init__(self, energyDataFile: str='../raw_data/CAISOactualLoad.csv') -> None:
        assert isinstance(energyDataFile, str)

        self.demandData = pd.read_csv(energyDataFile)
        self.__energyDemand()

    def __energyDemand(self) -> pd.DataFrame:
        '''
        Calculate the demand with electrical data

        Returns
        -------
        pandas.DataFrame
            | Date | Demand |
            |------|--------|
        
            Date: datetime
            Demand: float
        '''

        data = self.demandData[self.demandData['zone'] == 'CA ISO']
        data['date'] = pd.to_datetime(data['Date'])
        self.__demandDF = data[['date', 'load']]
        self.__demandDF = self.__demandDF.rename(columns={"date": "Date", "load": "Demand"})
    
    def getEnergyDemand(self) -> pd.DataFrame:
        return self.__demandDF

In [6]:
#Energy storage analysis:


class StorageAnalysis:
    """
        This class takes in a pandas dataframe in the form of 
            | date | hour | supply |
            |------|------|--------|
            and
            | date | hour | demand |
            |------|------|--------|

            and creates a dataframe in the form of:
            
            | date | hour | storage demand | storage supplied | storage Left | Curtailed supply |
            |------|------|----------------|------------------|--------------|------------------|

            where storage demand is the need for power that is not met by solar capacity,
            storage supplied is the amount of energy supplied that hour, which should equal demand unless storageLeft goes to zero.
            storageLeft is a the amount of energy left in the storage devices.
            
    """
    def __init__(self, the_supply: pd.DataFrame, the_demand: pd.DataFrame, the_storageMWh: int):
        self.supply = the_supply
        self.demand = the_demand
        self.storage_max = the_storageMWh

        self.storage_left: pd.Series[int]
        self.storage_demand: pd.Series[int]
        self.storage_supplied: pd.Series[int]
        self.curtailed_supply: pd.Series[int]

        self.set_storage_series()
        

    def set_storage_series(self):
        the_supply: pd.Series[float] = self.supply["Supply"]
        #print(the_supply)
        the_demand: pd.Series[float] = self.demand["Demand"] 
        #print(the_demand)
        #net_generation: pd.Series[float] = the_supply - the_demand 
        #print(net_generation)
        storage_demand_list: list[int] = [the_demand.iat[0] - the_supply.iat[0]]
        storage_supplied_list: list[int] = [min(self.storage_max, the_demand.iat[0] - the_supply.iat[0])]
        storage_left_list: list[int] = [self.storage_max - storage_supplied_list[0]]
        curtailed_supply_list: list[int] = [max(0, storage_supplied_list[0] - storage_demand_list[0])]

        #Try doing this with something faster than for loop
        for i in range(1,len(the_supply)):
            net_storage_Demand: int = the_demand.iat[i] - the_supply.iat[i]

            #getting storage demand, negative implies oversupply:
            storage_demand_list.append(net_storage_Demand)

            #getting storage left
            storage: int = storage_left_list[i-1] - net_storage_Demand
            if storage < 0:
                storage = 0
            elif storage > self.storage_max:
                storage = self.storage_max
            storage_left_list.append(storage)

            #getting storage supplied:
            if net_storage_Demand > 0:
                storage_supplied_list.append(min(storage_left_list[i-1], net_storage_Demand))
            else:
                storage_supplied_list.append(max(storage_left_list[i-1] - self.storage_max, net_storage_Demand))

            #getting curtailed supply:
            curtailed_supply_list.append(max(0, storage_supplied_list[i] - storage_demand_list[i]))
            


        self.storage_left = pd.Series(storage_left_list)
        self.storage_demand = pd.Series(storage_demand_list)
        self.storage_supplied = pd.Series(storage_supplied_list)
        self.curtailed_supply = pd.Series(curtailed_supply_list)

        
    def get_storage_data(self):
        return_dict = {"Storage Demand" : self.storage_demand,
                       "Storage Supplied" : self.storage_supplied,
                       "Storage Left" : self.storage_left,
                       "Curtailed Supply" : self.curtailed_supply}
        
        return_dates = self.supply[["Year","Month","Day","Hour","Minute"]]
        
        return pd.concat([return_dates, pd.DataFrame(return_dict)], axis=1)
        # print(self.storage_left)
        # print(self.storage_demand)
        # print(self.storage_supplied)
        # print(self.curtailed_supply)
    
    

#https://assessingsolar.org/notebooks/solar_power_modeling.html

## Test

In [None]:
f = h5pyd.File("/nrel/nsrdb/v3/nsrdb_2012.h5", 'r')

In [None]:
test = EnergySupplyAnalysisCSV()

In [None]:
test.getEnergySupply()

Unnamed: 0,Year,Month,Day,Hour,Minute,GHI,Supply
0,2018,1,1,0,30,25,437.5
1,2018,1,1,1,30,0,0.0
2,2018,1,1,2,30,0,0.0
3,2018,1,1,3,30,0,0.0
4,2018,1,1,4,30,0,0.0
...,...,...,...,...,...,...,...
26275,2020,12,31,19,30,509,8907.5
26276,2020,12,31,20,30,517,9047.5
26277,2020,12,31,21,30,464,8120.0
26278,2020,12,31,22,30,356,6230.0


In [7]:
test_supply = EnergySupplyAnalysisCSV(100000)
test_demand = EnergyDemandAnalysis()
test_storage = StorageAnalysis(test_supply.getEnergySupply(), test_demand.getEnergyDemand(), 500000)
#print(test_storage.demand)
#print(test_storage.supply)
#pd.set_option("display.max_rows",100)
test_storage.get_storage_data()
    

#https://assessingsolar.org/notebooks/solar_power_modeling.html

  data['date'] = pd.to_datetime(data['Date'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['date'] = pd.to_datetime(data['Date'])


Unnamed: 0,Year,Month,Day,Hour,Minute,Storage Demand,Storage Supplied,Storage Left,Curtailed Supply
0,2018,1,1,0,30,18702.0,18702.0,481298.0,0.0
1,2018,1,1,1,30,20559.0,20559.0,460739.0,0.0
2,2018,1,1,2,30,20017.0,20017.0,440722.0,0.0
3,2018,1,1,3,30,19629.0,19629.0,421093.0,0.0
4,2018,1,1,4,30,19613.0,19613.0,401480.0,0.0
...,...,...,...,...,...,...,...,...,...
26275,2020,12,31,19,30,-29621.0,-29621.0,63328.0,0.0
26276,2020,12,31,20,30,-29211.0,-29211.0,92539.0,0.0
26277,2020,12,31,21,30,-21907.0,-21907.0,114446.0,0.0
26278,2020,12,31,22,30,-9006.0,-9006.0,123452.0,0.0
