In [1]:
# In the root of the repo run "pip install --editable ."

# OPTIONAL: Load the "autoreload" extension so that code can change
%load_ext autoreload

# OPTIONAL: always reload modules so that as you change code in src, it gets loaded
%autoreload 2

# Normalization of Power Signal

Author: Clemens Jochum

In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from src.data.import_data import import_df_from_zip_pkl, import_df_info_from_zip
from src.features.performance_index import normalize_power_signal
from src.data.make_dataset import downsample_dataframe

In [3]:
from warnings import filterwarnings
filterwarnings('ignore')

#plt.rcParams["figure.figsize"] = [10, 5]
#plt.rcParams['figure.dpi'] = 250
#'font.size': 10, 'savefig.dpi': 72, 'figure.subplot.bottom': 0.125

### Import Datasets

Import a range of datasets for each type of synthetic data.

In [9]:
data_key = 'soil_weather'

n_samples =100
path_to_data_raw = '../data/raw/'
path_to_dataset = '{0:s}/synthetic_{1:s}_extra.zip'.format(path_to_data_raw, data_key)

In [10]:
poa_reference_02 = pd.read_pickle('../data/raw/clear_sky_CO.pkl', compression='gzip')

t_begin = pd.Timestamp('2015-01-01 00:00:00').tz_localize('Etc/GMT+7')
t_final = pd.Timestamp('2019-12-31 23:59:00').tz_localize('Etc/GMT+7')
datetime_01 = pd.date_range(t_begin, poa_reference_02.index[0], freq='min')[:-1]
datetime_03 = pd.date_range(poa_reference_02.index[-1], t_final, freq='min')[1:]

poa_reference_01 = pd.Series(data=0., index=datetime_01, name='POAcs')
poa_reference_03 = pd.Series(data=0., index=datetime_03, name='POAcs')

poa_reference_co = pd.concat((poa_reference_01, poa_reference_02, poa_reference_03))

#print(pd.Timedelta(poa_reference_02.index[0] - t_begin).seconds / 60)
#print(pd.Timedelta(t_final - poa_reference_02.index[-1]).seconds / 60)
#print(pd.date_range(t_begin, poa_reference_02.index[0], freq='min')[:-1].size)
#print(pd.date_range(poa_reference_02.index[-1], t_final, freq='min')[1:].size)

#poa_reference[:7*1440].plot()
#print(df.POA.size - poa_reference.size)


Unnamed: 0_level_0,Power,POA,Tamb,Wind,Degradation,Soiling,Faults,minute_of_day
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2015-01-01 00:00:00-07:00,-1.0,0.0,11.9,3.6,1.0,0.99796,1,0
2015-01-01 00:01:00-07:00,-1.0,0.0,11.9,3.6,1.0,0.99796,1,1
2015-01-01 00:02:00-07:00,-1.0,0.0,11.9,3.1,1.0,0.99796,1,2
2015-01-01 00:03:00-07:00,-1.0,0.0,12.0,2.8,1.0,0.99796,1,3
2015-01-01 00:04:00-07:00,-1.0,0.0,12.0,1.7,1.0,0.99796,1,4


In [None]:
for index in range(n_samples):
    df = import_df_from_zip_pkl(path_to_dataset, index)
    
    outlier_threshold = 0.0
    
    if data_key in ['basic', 'soil']:
        clearsky = False
    else:
        clearsky = True
    
    if data_key in ['basic', 'soil']:
        poa_reference = None
    elif data_key in ['soil_weather', 'weather']:
        poa_reference = poa_reference_co
    
    p_norm_daily_clipping_basic = normalize_power_signal(df, poa_reference, clearsky=clearsky, nighttime=True, clipping='basic', verbose=True, outlier_threshold = outlier_threshold)
    p_norm_daily_clipping_flexible = normalize_power_signal(df, poa_reference, clearsky=clearsky, nighttime=True, clipping='flexible', verbose=True, outlier_threshold = outlier_threshold)
    p_norm_daily_clipping_universal = normalize_power_signal(df, poa_reference, clearsky=clearsky, nighttime=True, clipping='universal', verbose=True, outlier_threshold = outlier_threshold)
       
    df_pkl = downsample_dataframe(df)
    df_pkl['PI_clipping_basic'] = p_norm_daily_clipping_basic
    df_pkl['PI_clipping_flexible'] = p_norm_daily_clipping_flexible
    df_pkl['PI_clipping_universal'] = p_norm_daily_clipping_universal
    df_pkl.to_pickle('../data/synthetic_{:s}_pi_daily_{:s}.pkl'.format(data_key, str(index+1).zfill(3)), compression = 'gzip', protocol = 3)

0.13 % of data remaining after clearsky detection.
0.11 % of data remaining after clipping removal.
0.11 % of data remaining after night-time removal.
0.13 % of data remaining after clearsky detection.
0.09 % of data remaining after clipping removal.
0.09 % of data remaining after night-time removal.
0.13 % of data remaining after clearsky detection.
0.03 % of data remaining after clipping removal.
0.03 % of data remaining after night-time removal.
0.13 % of data remaining after clearsky detection.
0.11 % of data remaining after clipping removal.
0.11 % of data remaining after night-time removal.
0.13 % of data remaining after clearsky detection.
0.09 % of data remaining after clipping removal.
0.09 % of data remaining after night-time removal.
0.13 % of data remaining after clearsky detection.
0.03 % of data remaining after clipping removal.
0.03 % of data remaining after night-time removal.
0.13 % of data remaining after clearsky detection.
0.11 % of data remaining after clipping rem

0.13 % of data remaining after clearsky detection.
0.09 % of data remaining after clipping removal.
0.09 % of data remaining after night-time removal.
0.13 % of data remaining after clearsky detection.
0.03 % of data remaining after clipping removal.
0.03 % of data remaining after night-time removal.
0.14 % of data remaining after clearsky detection.
0.11 % of data remaining after clipping removal.
0.11 % of data remaining after night-time removal.
0.14 % of data remaining after clearsky detection.
0.10 % of data remaining after clipping removal.
0.10 % of data remaining after night-time removal.
0.14 % of data remaining after clearsky detection.
0.03 % of data remaining after clipping removal.
0.03 % of data remaining after night-time removal.
0.14 % of data remaining after clearsky detection.
0.11 % of data remaining after clipping removal.
0.11 % of data remaining after night-time removal.
0.14 % of data remaining after clearsky detection.
0.10 % of data remaining after clipping rem

0.13 % of data remaining after clearsky detection.
0.03 % of data remaining after clipping removal.
0.03 % of data remaining after night-time removal.
0.13 % of data remaining after clearsky detection.
0.11 % of data remaining after clipping removal.
0.11 % of data remaining after night-time removal.
0.13 % of data remaining after clearsky detection.
0.09 % of data remaining after clipping removal.
0.09 % of data remaining after night-time removal.
0.13 % of data remaining after clearsky detection.
0.03 % of data remaining after clipping removal.
0.03 % of data remaining after night-time removal.


In [None]:
# test
df_pi = pd.read_pickle('../data/raw/synthetic_basic_pi_daily/synthetic_basic_pi_daily_001.pkl', compression = 'gzip')
df_pi.PI.plot()