# Step 1: Power system setup

In [7]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import time

import MCCoordinator
import StorageSampler

def storage_system(**kwargs):
    """
    Instantiate StorageSystem object using GB system data

    :param wind_power: assumed wind power capacity (in MW)
    :param kwargs: additional arguments to be supplied to the StorageSystem constructor
    :return: StorageSystem object
    """
    import pandas as pd
    import os
    data = pd.read_csv('../data/UKdata/20161213_uk_wind_solar_demand_temperature.csv',
                       parse_dates=['UTC Time', 'Local Time'], infer_datetime_format=True, dayfirst=True, index_col=0)

    demand_data = data['demand_net'].dropna()['2006':'2015']
    wind_data = 10000 * data['wind_merra1'].dropna()

    demand_samples = {yeardata[0]: yeardata[1].values[:8760] for yeardata in
                      demand_data.groupby(demand_data.index.year)}
    wind_samples = {yeardata[0]: yeardata[1].values[:8760] for yeardata in wind_data.groupby(wind_data.index.year)}

    dataframe = pd.read_csv('../data/UKdata/battery_data.csv')
    store_power_list=3*dataframe['Power (MW)'][0:27]
    store_energy_list=3*dataframe['Energy (MWh)'][0:27]

    return StorageSampler.StorageSystem(demand_samples=demand_samples, 
                                        wind_samples=wind_samples, 
                                        store_power_list=store_power_list,
                                        store_energy_list=store_energy_list,
                                        **kwargs)


system = storage_system()

import MachineLearning
system.AI_model = MachineLearning.MachineLearning(train_size=500)

  data = pd.read_csv('../data/UKdata/20161213_uk_wind_solar_demand_temperature.csv',


Generator set: [1200, 600, 600, 250, 250, 120, 60, 20, 20, 10, 10, 10]
Optimising number of generator sets to LOLE target of 3 hours
20 generator sets. Base LOLE=2.92 h; determined load offset of 36.15 MW to reach LOLE target.



In [8]:
def pool_generation(system, pool_size):
    pool_temporal = np.zeros((pool_size, 24))

    for i in range(pool_size):
        pool_temporal[i] = system.generate_daily_margin_trace()
    return pool_temporal

def load_data(file_name):
    '''
    load data from the file_name
    Parameters: 
        file_name: string
            file address
    Returns:
        data: ndarray
            an array of file's content
    '''
    data = np.ascontiguousarray(np.genfromtxt(file_name, delimiter=','))
    return data

# Set up active learning framework for surrogate modeling

In [9]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import root_mean_squared_error

from modAL.models import ActiveLearner

# Active learning strategy
def random_forest_regression_std(regressor, X, n_instances=1):
    all_tree_preds = np.array([tree.predict(X) for tree in regressor.estimator.estimators_])  # Shape: (n_trees, n_samples)

    std = np.var(all_tree_preds, axis=0)
    
    query_idx = np.argsort(std)[-n_instances:]  # Last 10 indices after sorting
    return query_idx, X[query_idx]

def random_forest_regression_std_analysis(estimator, X, n_instances=1):
    all_tree_preds = np.array([tree.predict(X) for tree in estimator.estimators_])  # Shape: (n_trees, n_samples)

    std = np.var(all_tree_preds, axis=0)
    
    query_idx = np.argsort(std)[-n_instances:]  # Last 10 indices after sorting
    return query_idx, X[query_idx], std[query_idx]

# Random sampling strategy
def random_sampling(regressor, X, n_instances=1):
    query_idx = np.random.choice(X.shape[0], n_instances, replace=False)

    return query_idx, X[query_idx]

In [10]:
# LOL and ENS surrogate model for a year
def surrogate_model_year_prediction(estimator, margin_year):
    margin_daily = margin_year.reshape(-1, 365, 24)
    return [np.sum(estimator.predict(margin_daily[i,:, :])) for i in range(margin_year.shape[0])]

In [11]:
def surrogate_model_training(system, training_strategy, n_train_random=1825, n_initial=1825, n_queries=20, temporal_pool_size=3650, n_instances=91):
    time_start = time.time()
    if training_strategy == 'Random':
        # Generate the training data
        X_train = pool_generation(system, n_train_random)
        y_train = np.array([system.run_optimal_policy(X_train[i]) for i in range(X_train.shape[0])])

        lol_train = y_train[:,0]
        ens_train = y_train[:,1]

        lol_learner = RandomForestRegressor().fit(X_train, lol_train)
        ens_learner = RandomForestRegressor().fit(X_train, ens_train)
    
    elif training_strategy == 'AL':    
        X_train = pool_generation(system, n_initial)

        y_train = np.array([system.run_optimal_policy(X_train[i]) for i in range(X_train.shape[0])])
        lol_train = y_train[:,0]
        ens_train = y_train[:,1]

        # Initialize the learner
        lol_learner = ActiveLearner(
            estimator=RandomForestRegressor(),
            query_strategy=random_forest_regression_std,
            X_training=X_train, y_training=lol_train
        )

        ens_learner = ActiveLearner(
            estimator=RandomForestRegressor(),
            query_strategy=random_forest_regression_std,
            X_training=X_train, y_training=ens_train
        )

        for i in range(n_queries):
            X_pool_temporal = pool_generation(system, pool_size=temporal_pool_size)
            
            query_idx, query_inst = ens_learner.query(X_pool_temporal, n_instances=n_instances)

            y_new = np.array([system.run_optimal_policy(query_inst[j]) for j in range(query_inst.shape[0])])
            
            lol_learner.teach(query_inst, y_new[:,0])
            ens_learner.teach(query_inst, y_new[:,1])
    else:
        raise ValueError('Unknown training strategy: {}'.format(training_strategy))
        
    time_end = time.time()

    return lol_learner, ens_learner, time_end - time_start

# Result section 1: Surrogate model performance assessment in terms of RMSE and Correlation

In [12]:
# Loading the test set
X_test = load_data("../data/AIdata/daily_margin_test.csv")
ens_test = load_data("../data/AIdata/ens_test_daily.csv")
lol_test = load_data("../data/AIdata/lol_test_daily.csv")

# Load the yearly test set for calculating the correlation
X_test_yearly = load_data("../data/AIdata/yearly_margin_test.csv")
ens_test_yearly = load_data("../data/AIdata/ens_test_yearly.csv")
lol_test_yearly = load_data("../data/AIdata/lol_test_yearly.csv")

## Active learning hyperparameters

In [13]:
n_simulations = 10

n_initial = 365 * 2 # Initial training set, 2 years
n_queries = 35 # 10 years
temporal_pool_size = 3650 # 10 years
n_instances = 91 # 1 season = 91 days

## Section 1.1 Comparing based on the size of training set

### Section 1.1.1 Active learning

In [14]:
accuracy_scores_lol_AL = np.zeros((n_simulations, n_queries+1))
accuracy_scores_ens_AL = np.zeros((n_simulations, n_queries+1))
correlation_scores_lol_AL = np.zeros((n_simulations, n_queries+1))
correlation_scores_ens_AL = np.zeros((n_simulations, n_queries+1))
time_train_AL = np.zeros((n_simulations, n_queries+1))

for n in range(n_simulations):
    time_start_initial = time.time()
    X_train = pool_generation(system, n_initial)

    y_train = np.array([system.run_optimal_policy(X_train[i]) for i in range(X_train.shape[0])])
    lol_train = y_train[:,0]
    ens_train = y_train[:,1]

    # Initialize the learner
    lol_learner_AL = ActiveLearner(
        estimator=RandomForestRegressor(),
        query_strategy=random_forest_regression_std,
        X_training=X_train, y_training=lol_train
    )

    ens_learner_AL = ActiveLearner(
        estimator=RandomForestRegressor(),
        query_strategy=random_forest_regression_std,
        X_training=X_train, y_training=ens_train
    )
    time_end_initial = time.time()
    time_train_AL[n,0] = time_end_initial - time_start_initial

    accuracy_scores_lol_AL[n, 0] = root_mean_squared_error(lol_test, lol_learner_AL.predict(X_test))
    accuracy_scores_ens_AL[n, 0] = root_mean_squared_error(ens_test, ens_learner_AL.predict(X_test))

    # Calculate the correlation scores
    correlation_scores_lol_AL[n, 0] = np.corrcoef(lol_test_yearly, surrogate_model_year_prediction(lol_learner_AL, X_test_yearly))[0,1]
    correlation_scores_ens_AL[n, 0] = np.corrcoef(ens_test_yearly, surrogate_model_year_prediction(ens_learner_AL, X_test_yearly))[0,1]

    for i in range(n_queries):
        time_start_al = time.time()
        X_pool_temporal = pool_generation(system, pool_size=temporal_pool_size)
        
        query_idx, query_inst = ens_learner_AL.query(X_pool_temporal, n_instances=n_instances)
        # query_idx, query_inst, std = random_forest_regression_std_analysis(ens_learner_AL.estimator, X_pool_temporal, n_instances=n_instances)
        # print(std)

        y_new = np.array([system.run_optimal_policy(query_inst[j]) for j in range(query_inst.shape[0])])
        
        lol_learner_AL.teach(query_inst, y_new[:,0])
        ens_learner_AL.teach(query_inst, y_new[:,1])
        time_end_al = time.time()
        time_train_AL[n,i+1] = time_train_AL[n,i] + time_end_al - time_start_al

        # Update the accuracy scores
        accuracy_scores_lol_AL[n,i+1] = root_mean_squared_error(lol_test, lol_learner_AL.predict(X_test))
        accuracy_scores_ens_AL[n,i+1] = root_mean_squared_error(ens_test, ens_learner_AL.predict(X_test))
        
        # Calculate the correlation scores
        correlation_scores_lol_AL[n, i+1] = np.corrcoef(lol_test_yearly, surrogate_model_year_prediction(lol_learner_AL, X_test_yearly))[0,1]
        correlation_scores_ens_AL[n, i+1] = np.corrcoef(ens_test_yearly, surrogate_model_year_prediction(ens_learner_AL, X_test_yearly))[0,1]

accuracy_scores_lol_AL_mean = np.mean(accuracy_scores_lol_AL, axis=0)
accuracy_scores_ens_AL_mean = np.mean(accuracy_scores_ens_AL, axis=0)
correlation_scores_lol_AL_mean = np.mean(correlation_scores_lol_AL, axis=0)
correlation_scores_ens_AL_mean = np.mean(correlation_scores_ens_AL, axis=0)
time_train_AL_mean = np.mean(time_train_AL, axis=0)

  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]


### Section 1.1.2 Baseline adding data points randomly

In [15]:
accuracy_scores_lol_random = np.zeros((n_simulations, n_queries+1))
accuracy_scores_ens_random = np.zeros((n_simulations, n_queries+1))
correlation_scores_lol_random = np.zeros((n_simulations, n_queries+1))
correlation_scores_ens_random = np.zeros((n_simulations, n_queries+1))

for n in range(n_simulations):

    X_train = pool_generation(system, n_initial)

    y_train = np.array([system.run_optimal_policy(X_train[i]) for i in range(X_train.shape[0])])
    lol_train = y_train[:,0]
    ens_train = y_train[:,1]

    # Initialize the learner
    lol_learner_random = ActiveLearner(
        estimator=RandomForestRegressor(),
        query_strategy=random_sampling,
        X_training=X_train, y_training=lol_train
    )

    ens_learner_random = ActiveLearner(
        estimator=RandomForestRegressor(),
        query_strategy=random_sampling,
        X_training=X_train, y_training=ens_train
    )

    accuracy_scores_lol_random[n, 0] = root_mean_squared_error(lol_test, lol_learner_random.predict(X_test))
    accuracy_scores_ens_random[n, 0] = root_mean_squared_error(ens_test, ens_learner_random.predict(X_test))
    
    # Calculate the correlation scores
    correlation_scores_lol_random[n, 0] = np.corrcoef(lol_test_yearly, surrogate_model_year_prediction(lol_learner_random, X_test_yearly))[0,1]
    correlation_scores_ens_random[n, 0] = np.corrcoef(ens_test_yearly, surrogate_model_year_prediction(ens_learner_random, X_test_yearly))[0,1]

    for i in range(n_queries):
        X_pool_temporal = pool_generation(system, pool_size=temporal_pool_size)
        
        query_idx, query_inst = ens_learner_random.query(X_pool_temporal, n_instances=n_instances)

        y_new = np.array([system.run_optimal_policy(query_inst[j]) for j in range(query_inst.shape[0])])
        
        lol_learner_random.teach(query_inst, y_new[:,0])
        ens_learner_random.teach(query_inst, y_new[:,1])

        # Update the accuracy scores
        accuracy_scores_lol_random[n,i+1] = root_mean_squared_error(lol_test, lol_learner_random.predict(X_test))
        accuracy_scores_ens_random[n,i+1] = root_mean_squared_error(ens_test, ens_learner_random.predict(X_test))
        # Calculate the correlation scores
        correlation_scores_lol_random[n, i+1] = np.corrcoef(lol_test_yearly, surrogate_model_year_prediction(lol_learner_random, X_test_yearly))[0,1]
        correlation_scores_ens_random[n, i+1] = np.corrcoef(ens_test_yearly, surrogate_model_year_prediction(ens_learner_random, X_test_yearly))[0,1]
       
accuracy_scores_lol_random_mean = np.mean(accuracy_scores_lol_random, axis=0)
accuracy_scores_ens_random_mean = np.mean(accuracy_scores_ens_random, axis=0)
correlation_scores_lol_random_mean = np.mean(correlation_scores_lol_random, axis=0)
correlation_scores_ens_random_mean = np.mean(correlation_scores_ens_random, axis=0)

  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]


### Section 1.1.3 Result figure generation

In [None]:
# Create figure and axis
fig, ax1 = plt.subplots()

iteration_index = range(n_initial, n_initial+(n_queries+1)*n_instances, n_instances)
# Plotting accuracy scores for LOL
ax1.plot(iteration_index, accuracy_scores_lol_AL_mean, color='blue', label='LOL with Active learning')
ax1.plot(iteration_index, accuracy_scores_lol_random_mean, color='navy', label='LOL with random sampling')
ax1.set_xlabel('Number of training samples')
ax1.set_ylabel('RMSE of Loss-of-load [h/day]', color='blue')
ax1.tick_params(axis='y', labelcolor='blue')

# Create a second y-axis
ax2 = ax1.twinx()

# Plot on second y-axis
ax2.plot(iteration_index, accuracy_scores_ens_AL_mean, color='red', label='ENS with Active learning')
ax2.plot(iteration_index, accuracy_scores_ens_random_mean, color='tab:orange', label='ENS with random sampling')
ax2.set_ylabel('RMSE of Energy not served [MWh/day]', color='red')
ax2.tick_params(axis='y', labelcolor='red')

# Optional: Add legends
lines1, labels1 = ax1.get_legend_handles_labels()
lines2, labels2 = ax2.get_legend_handles_labels()
ax1.legend(lines1 + lines2, labels1 + labels2, loc='best')

plt.tight_layout()

# plt.savefig('LOL_ENS.png', dpi=300)

# Show the plot
plt.show()

In [None]:
# Create figure and axis
fig, ax1 = plt.subplots()

iteration_index = range(n_initial, n_initial+(n_queries+1)*n_instances, n_instances)
# Plotting accuracy scores for LOL
ax1.plot(iteration_index, correlation_scores_lol_AL_mean, color='blue', label='LOL with Active learning')
ax1.plot(iteration_index, correlation_scores_lol_random_mean, color='navy', label='LOL with random sampling')
ax1.set_xlabel('Number of training samples')
ax1.set_ylabel('Correlation of LOL estimator $\hat{f}^{LOL}(z)$ and $f^{LOL}(z)$', color='blue')
ax1.tick_params(axis='y', labelcolor='blue')

# Create a second y-axis
ax2 = ax1.twinx()

# # Plot on second y-axis
ax2.plot(iteration_index, correlation_scores_ens_AL_mean, color='red', label='ENS with Active learning')
ax2.plot(iteration_index, correlation_scores_ens_random_mean, color='tab:orange', label='ENS with random sampling')
ax2.set_ylabel('Correlation of ENS estimator $\hat{f}^{ENS}(z)$ and $f^{ENS}(z)$', color='red')
ax2.tick_params(axis='y', labelcolor='red')

# Optional: Add legends
lines1, labels1 = ax1.get_legend_handles_labels()
lines2, labels2 = ax2.get_legend_handles_labels()
ax1.legend(lines1 + lines2, labels1 + labels2, loc='best')

plt.tight_layout()

plt.savefig('correlation_LOL_ENS.png', dpi=300)

# Show the plot
plt.show()

## Section 1.2 Comparing based on training time

### Section 1.2.1 Random generation of training set

In [16]:
# number of simulations
n_simulations = 10

n_train_step = 20 # Number of training steps

accuracy_scores_lol_random_time = np.zeros((n_simulations, n_train_step))
accuracy_scores_ens_random_time = np.zeros((n_simulations, n_train_step))
correlation_scores_lol_random_time = np.zeros((n_simulations, n_train_step))
correlation_scores_ens_random_time = np.zeros((n_simulations, n_train_step))

time_train_random_time = np.zeros((n_simulations, n_train_step))

for n in range(n_simulations):
    for i in range(n_train_step):    
        time_start_train = time.time()
        # Generate the training set
        n_train = 365 * 5 + 365 * 5 * i

        X_train = pool_generation(system, n_train)

        y_train = np.array([system.run_optimal_policy(X_train[i]) for i in range(X_train.shape[0])])
        lol_train = y_train[:,0]
        ens_train = y_train[:,1]

        # train the learner
        lol_learner_random = RandomForestRegressor().fit(X_train, lol_train)
        ens_learner_random = RandomForestRegressor().fit(X_train, ens_train)
        time_end_train = time.time()
        time_train_random_time[n, i] = time_end_train - time_start_train

        accuracy_scores_lol_random_time[n, i] = root_mean_squared_error(lol_test, lol_learner_random.predict(X_test))
        accuracy_scores_ens_random_time[n, i] = root_mean_squared_error(ens_test, ens_learner_random.predict(X_test))
        
        # Calculate the correlation scores
        correlation_scores_lol_random_time[n, i] = np.corrcoef(lol_test_yearly, surrogate_model_year_prediction(lol_learner_random, X_test_yearly))[0,1]
        correlation_scores_ens_random_time[n, i] = np.corrcoef(ens_test_yearly, surrogate_model_year_prediction(ens_learner_random, X_test_yearly))[0,1]
        
accuracy_scores_lol_random_mean_time = np.mean(accuracy_scores_lol_random_time, axis=0)
accuracy_scores_ens_random_mean_time = np.mean(accuracy_scores_ens_random_time, axis=0)
correlation_scores_lol_random_mean_time = np.mean(correlation_scores_lol_random_time, axis=0)
correlation_scores_ens_random_mean_time = np.mean(correlation_scores_ens_random_time, axis=0)

time_train_random_mean_time = np.mean(time_train_random_time, axis=0)

### Section 1.2.2 Figure generation

In [None]:
# Create figure and axis
fig, ax1 = plt.subplots()

# iteration_index_AL = range(n_initial, n_initial+(n_queries+1)*n_instances, n_instances)
# Plotting accuracy scores for LOL
ax1.plot(time_train_AL_mean[1:], accuracy_scores_lol_AL_mean[1:], color='blue', label='LOL with Active learning')
ax1.plot(time_train_random_mean_time, accuracy_scores_lol_random_mean_time, color='navy', label='LOL with random sampling')
ax1.set_xlabel('Training time [s]')
ax1.set_ylabel('RMSE of Loss-of-load [h/day]', color='blue')
ax1.tick_params(axis='y', labelcolor='blue')

# Create a second y-axis
ax2 = ax1.twinx()

# Plot on second y-axis
ax2.plot(time_train_AL_mean[1:], accuracy_scores_ens_AL_mean[1:], color='red', label='ENS with Active learning')
ax2.plot(time_train_random_mean_time, accuracy_scores_ens_random_mean_time, color='tab:orange', label='ENS with random sampling')
ax2.set_ylabel('RMSE of Energy not served [MWh/day]', color='red')
ax2.tick_params(axis='y', labelcolor='red')

# Optional: Add legends
lines1, labels1 = ax1.get_legend_handles_labels()
lines2, labels2 = ax2.get_legend_handles_labels()
ax1.legend(lines1 + lines2, labels1 + labels2, loc='best')

plt.tight_layout()

plt.savefig('LOL_ENS_time.png', dpi=300)

# Show the plot
plt.show()

In [None]:
# Create figure and axis
fig, ax1 = plt.subplots()


# Plotting accuracy scores for LOL
ax1.plot(time_train_AL_mean[1:], correlation_scores_lol_AL_mean[1:], color='blue', label='LOL with Active learning')
ax1.plot(time_train_random_mean_time, correlation_scores_lol_random_mean_time, color='navy', label='LOLE with random sampling')
ax1.set_xlabel('Training time [s]')
ax1.set_ylabel('Correlation of LOL estimator $\hat{f}^{LOL}(z)$ and $f^{LOL}(z)$', color='blue')
ax1.tick_params(axis='y', labelcolor='blue')

# Create a second y-axis
ax2 = ax1.twinx()

# # Plot on second y-axis
ax2.plot(time_train_AL_mean[1:], correlation_scores_ens_AL_mean[1:], color='red', label='ENS with Active learning')
ax2.plot(time_train_random_mean_time, correlation_scores_ens_random_mean_time, color='tab:orange', label='ENS with random sampling')
ax2.set_ylabel('Correlation of ENS estimator $\hat{f}^{ENS}(z)$ and $f^{ENS}(z)$', color='red')
ax2.tick_params(axis='y', labelcolor='red')

# Optional: Add legends
lines1, labels1 = ax1.get_legend_handles_labels()
lines2, labels2 = ax2.get_legend_handles_labels()
ax1.legend(lines1 + lines2, labels1 + labels2, loc='best')

plt.tight_layout()

plt.savefig('correlation_LOL_ENS_time.png', dpi=300)

# Show the plot
plt.show()