In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import time

import MCCoordinator
import StorageSampler

def storage_system(**kwargs):
    """
    Instantiate StorageSystem object using GB system data

    :param wind_power: assumed wind power capacity (in MW)
    :param kwargs: additional arguments to be supplied to the StorageSystem constructor
    :return: StorageSystem object
    """
    import pandas as pd
    import os
    data = pd.read_csv('../data/UKdata/20161213_uk_wind_solar_demand_temperature.csv',
                       parse_dates=['UTC Time', 'Local Time'], infer_datetime_format=True, dayfirst=True, index_col=0)

    demand_data = data['demand_net'].dropna()['2006':'2015']
    wind_data = 10000 * data['wind_merra1'].dropna()

    demand_samples = {yeardata[0]: yeardata[1].values[:8760] for yeardata in
                      demand_data.groupby(demand_data.index.year)}
    wind_samples = {yeardata[0]: yeardata[1].values[:8760] for yeardata in wind_data.groupby(wind_data.index.year)}

    dataframe = pd.read_csv('../data/UKdata/battery_data.csv')
    store_power_list=3*dataframe['Power (MW)'][0:27]
    store_energy_list=3*dataframe['Energy (MWh)'][0:27]

    return StorageSampler.StorageSystem(demand_samples=demand_samples, 
                                        wind_samples=wind_samples, 
                                        store_power_list=store_power_list,
                                        store_energy_list=store_energy_list,
                                        **kwargs)


system = storage_system()

import MachineLearning
system.AI_model = MachineLearning.MachineLearning(train_size=500)

In [None]:
def pool_generation(system, pool_size):
    pool_temporal = np.zeros((pool_size, 24))

    for i in range(pool_size):
        pool_temporal[i] = system.generate_daily_margin_trace()
    return pool_temporal

def load_data(file_name):
    '''
    load data from the file_name
    Parameters: 
        file_name: string
            file address
    Returns:
        data: ndarray
            an array of file's content
    '''
    data = np.ascontiguousarray(np.genfromtxt(file_name, delimiter=','))
    return data

In [None]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import root_mean_squared_error

from modAL.models import ActiveLearner

# Active learning strategy
def random_forest_regression_std(regressor, X, n_instances=1):
    all_tree_preds = np.array([tree.predict(X) for tree in regressor.estimator.estimators_])  # Shape: (n_trees, n_samples)

    std = np.var(all_tree_preds, axis=0)
    
    query_idx = np.argsort(std)[-n_instances:]  # Last 10 indices after sorting
    return query_idx, X[query_idx]

def random_forest_regression_std_analysis(estimator, X, n_instances=1):
    all_tree_preds = np.array([tree.predict(X) for tree in estimator.estimators_])  # Shape: (n_trees, n_samples)

    std = np.var(all_tree_preds, axis=0)
    
    query_idx = np.argsort(std)[-n_instances:]  # Last 10 indices after sorting
    return query_idx, X[query_idx], std[query_idx]

# Random sampling strategy
def random_sampling(regressor, X, n_instances=1):
    query_idx = np.random.choice(X.shape[0], n_instances, replace=False)

    return query_idx, X[query_idx]

In [None]:
# LOL and ENS surrogate model for a year
def surrogate_model_year_prediction(estimator, margin_year):
    margin_daily = margin_year.reshape(-1, 365, 24)
    return [np.sum(estimator.predict(margin_daily[i,:, :])) for i in range(margin_year.shape[0])]

In [None]:
def surrogate_model_training(system, training_strategy, n_train_random=1825, n_initial=1825, n_queries=20, temporal_pool_size=3650, n_instances=91):
    time_start = time.time()
    if training_strategy == 'Random':
        # Generate the training data
        X_train = pool_generation(system, n_train_random)
        y_train = np.array([system.run_optimal_policy(X_train[i]) for i in range(X_train.shape[0])])

        lol_train = y_train[:,0]
        ens_train = y_train[:,1]

        lol_learner = RandomForestRegressor().fit(X_train, lol_train)
        ens_learner = RandomForestRegressor().fit(X_train, ens_train)
    
    elif training_strategy == 'AL':    
        X_train = pool_generation(system, n_initial)

        y_train = np.array([system.run_optimal_policy(X_train[i]) for i in range(X_train.shape[0])])
        lol_train = y_train[:,0]
        ens_train = y_train[:,1]

        # Initialize the learner
        lol_learner = ActiveLearner(
            estimator=RandomForestRegressor(),
            query_strategy=random_forest_regression_std,
            X_training=X_train, y_training=lol_train
        )

        ens_learner = ActiveLearner(
            estimator=RandomForestRegressor(),
            query_strategy=random_forest_regression_std,
            X_training=X_train, y_training=ens_train
        )

        for i in range(n_queries):
            X_pool_temporal = pool_generation(system, pool_size=temporal_pool_size)
            
            query_idx, query_inst = ens_learner.query(X_pool_temporal, n_instances=n_instances)

            y_new = np.array([system.run_optimal_policy(query_inst[j]) for j in range(query_inst.shape[0])])
            
            lol_learner.teach(query_inst, y_new[:,0])
            ens_learner.teach(query_inst, y_new[:,1])
    else:
        raise ValueError('Unknown training strategy: {}'.format(training_strategy))
        
    time_end = time.time()

    return lol_learner, ens_learner, time_end - time_start

In [None]:
def run_MLMC(system, samples, n_run, time_seconds, ml_hierarchy, use_joblib, verbose):
    '''
    Run MLMC simulation
    Parameters:
        samples: int
            initial number of samples in each level for sigma estimation.
        n_run: int
            number of runs for MLMC simulation.
        time_seconds: int
            duration for each run of MLMC simulation.
        ml_hierarchy: {'OptimalNStore', 'GreedyNStore', 'AIGreedyNStore', 'Greedy1Store', 'AvgStore', 'NoStore', 'AIModel'}
            set of models for MLMC structure.
        use_joblib: bool
            if Ture: use all cores, otherwise run on a single core.
        Verbose: bool
            if Ture: print with details, otherwise: print summery of results
    '''
    mcc = MCCoordinator.MCCoordinator(factory=system, 
                                    ml_hierarchy=ml_hierarchy, 
                                    use_expectations=True, 
                                    use_joblib=use_joblib, joblib_n_jobs=-1, joblib_batch_size=5)
    mcc.explore(n_samples=samples)
    for i in range(n_run):
        mcc.run_recommended(time_seconds=time_seconds, verbose=verbose, optimization_target='EENS')
    mcc.verbose_result()
    return mcc


def round_to_n(x, n):
    """
    Round x to n significant digits.

    :param x: scalar to be converted
    :param n: number of significant digits
    :return: scalar
    """
    import math

    if np.isnan(x): return x
    if np.sign(x) == 0: return 0.0
    return np.round(x, -int(math.floor(math.log10(abs(x)))) + (n - 1))


def generate_mcc_results(mcc):
    """
    Print summary results to screen.

    :return: None
    """
    # Suppress divide by zeros and nan errors; store old settings to restore on return.
    old_settings = np.seterr(divide='ignore', invalid='ignore')

    # Compute the total time as computed by index computations and sample computations
    total_time_level = sum([level_stats.sum for level_stats in mcc.level_exec_times.values()])
    total_time = sum([set_stats.sum for set_stats in mcc.set_exec_times.values()])

    # compute the overall MLMC estimate and its standard error, by quadratic summation of term errors
    mean_result = sum([ml_stats.mean for key, ml_stats in mcc.ml_stats.items()])
    stderr_result = np.sqrt(sum([ml_stats.stderr ** 2 for key, ml_stats in mcc.ml_stats.items()]))

    est_time_per_target_sample = min([mcc.set_exec_times[level_set].mean for level_set in mcc.set_exec_times.keys() if mcc.target_level in level_set])
    target_stats = mcc.level_stats[mcc.target_level]
    est_time_spent_for_target_result = target_stats.count * est_time_per_target_sample

    # report results, alongside an estimated 'computational speed'
    results = [round_to_n(mcc.wall_clock_time, 3)]
    for result_index, result_label in enumerate(mcc.output_labels):
        results.append(round_to_n(mean_result[result_index], 4))
        results.append(round_to_n(stderr_result[result_index], 2))
        results.append(round_to_n(mean_result[result_index] ** 2 / (stderr_result[result_index] ** 2 * mcc.wall_clock_time), 3))
        
    return results

# Active learning

In [None]:
# Number of times to run MLMC
n_run_MLMC = 10

# initialize parameters for run
samples = 20
n_run = 40
time_seconds = 50
training_strategy = 'AL' # 'AL' or 'Random'

######################################################################################################
# Hyperparameters for active learning
n_initial = 365 * 2 # Initial training set, 2 years
n_queries = 10 # 10 years
temporal_pool_size = 3650 # 10 years
n_instances = 91 # 1 season = 91 days

#####################################################################################################
# Hyperparameters for random sampling
n_train_random = 365 * 5 + 365 * 5 * 18 # Initial training set, 5 years

# Estimator: Surr , Architecture: Exact|Surrogate models
ml_hierarchy = ['OptimalNStore', 'AIModel']

# ["Training time", "Simulation time", "mean_LOL", "stderr_LOL", "speed_LOL", "mean_ENS", "stderr_ENS", "speed_ENS"]
results = np.zeros((n_run_MLMC, 8))

print('Training strategy: ', training_strategy)
print('n_train_random: ', n_train_random)

for i in range(n_run_MLMC):
    lol_learner, ens_learner, results[i,0] = surrogate_model_training(system, training_strategy, n_train_random, n_initial, n_queries, temporal_pool_size, n_instances)
    system.AI_model.lol_model = lol_learner
    system.AI_model.ens_model = ens_learner

    mcc = run_MLMC(system, samples, n_run, time_seconds, ml_hierarchy, use_joblib=False, verbose=False)
    results[i,1:] = generate_mcc_results(mcc)

    # print('MLMC results: ', results[i,:])

results_al = results.copy()
avg_results = np.mean(results, axis=0)
print('Average results: ', avg_results)

In [None]:
# Number of times to run MLMC
n_run_MLMC = 10

# initialize parameters for run
samples = 20
n_run = 40
time_seconds = 50
training_strategy = 'AL' # 'AL' or 'Random'

######################################################################################################
# Hyperparameters for active learning
n_initial = 365 * 2 # Initial training set, 5 years
n_queries = 20 # 10 years
temporal_pool_size = 3650 # 10 years
n_instances = 91 # 1 season = 91 days

################################################################################################
# Hyperparameters for random sampling
n_train_random = 2735  # 365 * 5 + 365 * 5 * 5 # Initial training set, 5 years


# Estimator: Surr , Architecture: Exact|Surrogate models
ml_hierarchy = ['OptimalNStore', 'AIModel']

# ["Training time", "Simulation time", "mean_LOL", "stderr_LOL", "speed_LOL", "mean_ENS", "stderr_ENS", "speed_ENS"]
results = np.zeros((n_run_MLMC, 8))

print('Training strategy: ', training_strategy)
print('n_train_random: ', n_train_random)

for i in range(n_run_MLMC):
    lol_learner, ens_learner, results[i,0] = surrogate_model_training(system, training_strategy, n_train_random, n_initial, n_queries, temporal_pool_size, n_instances)
    system.AI_model.lol_model = lol_learner
    system.AI_model.ens_model = ens_learner

    mcc = run_MLMC(system, samples, n_run, time_seconds, ml_hierarchy, use_joblib=False, verbose=False)
    results[i,1:] = generate_mcc_results(mcc)

    # print('MLMC results: ', results[i,:])

# results_random_11 = results.copy()
avg_results = np.mean(results, axis=0)
print('Average results: ', avg_results)

In [None]:
# Number of times to run MLMC
n_run_MLMC = 10

# initialize parameters for run
samples = 20
n_run = 40
time_seconds = 50
training_strategy = 'AL' # 'AL' or 'Random'

######################################################################################################
# Hyperparameters for active learning
n_initial = 365 * 2 # Initial training set, 5 years
n_queries = 30 # 10 years
temporal_pool_size = 3650 # 10 years
n_instances = 91 # 1 season = 91 days

################################################################################################
# Hyperparameters for random sampling
n_train_random = 2735  # 365 * 5 + 365 * 5 * 5 # Initial training set, 5 years


# Estimator: Surr , Architecture: Exact|Surrogate models
ml_hierarchy = ['OptimalNStore', 'AIModel']

# ["Training time", "Simulation time", "mean_LOL", "stderr_LOL", "speed_LOL", "mean_ENS", "stderr_ENS", "speed_ENS"]
results = np.zeros((n_run_MLMC, 8))

print('Training strategy: ', training_strategy)
print('n_train_random: ', n_train_random)

for i in range(n_run_MLMC):
    lol_learner, ens_learner, results[i,0] = surrogate_model_training(system, training_strategy, n_train_random, n_initial, n_queries, temporal_pool_size, n_instances)
    system.AI_model.lol_model = lol_learner
    system.AI_model.ens_model = ens_learner

    mcc = run_MLMC(system, samples, n_run, time_seconds, ml_hierarchy, use_joblib=False, verbose=False)
    results[i,1:] = generate_mcc_results(mcc)

    # print('MLMC results: ', results[i,:])

# results_random_11 = results.copy()
avg_results = np.mean(results, axis=0)
print('Average results: ', avg_results)

In [None]:
# Number of times to run MLMC
n_run_MLMC = 10

# initialize parameters for run
samples = 20
n_run = 40
time_seconds = 50
training_strategy = 'AL' # 'AL' or 'Random'

######################################################################################################
# Hyperparameters for active learning
n_initial = 365 * 2 # Initial training set, 5 years
n_queries = 5 # 10 years
temporal_pool_size = 3650 # 10 years
n_instances = 91 # 1 season = 91 days

################################################################################################
# Hyperparameters for random sampling
n_train_random = 2735  # 365 * 5 + 365 * 5 * 5 # Initial training set, 5 years


# Estimator: Surr , Architecture: Exact|Surrogate models
ml_hierarchy = ['OptimalNStore', 'AIModel']

# ["Training time", "Simulation time", "mean_LOL", "stderr_LOL", "speed_LOL", "mean_ENS", "stderr_ENS", "speed_ENS"]
results = np.zeros((n_run_MLMC, 8))

print('Training strategy: ', training_strategy)
print('n_train_random: ', n_train_random)

for i in range(n_run_MLMC):
    lol_learner, ens_learner, results[i,0] = surrogate_model_training(system, training_strategy, n_train_random, n_initial, n_queries, temporal_pool_size, n_instances)
    system.AI_model.lol_model = lol_learner
    system.AI_model.ens_model = ens_learner

    mcc = run_MLMC(system, samples, n_run, time_seconds, ml_hierarchy, use_joblib=False, verbose=False)
    results[i,1:] = generate_mcc_results(mcc)

    # print('MLMC results: ', results[i,:])

# results_random_11 = results.copy()
avg_results = np.mean(results, axis=0)
print('Average results: ', avg_results)

# Random sampling

In [None]:
# Number of times to run MLMC
n_run_MLMC = 5

# initialize parameters for run
samples = 20
n_run = 40
time_seconds = 50
training_strategy = 'Random' # 'AL' or 'Random'

######################################################################################################
# Hyperparameters for active learning
n_initial = 365 * 2 # Initial training set, 5 years
n_queries = 20 # 10 years
temporal_pool_size = 3650 # 10 years
n_instances = 91 # 1 season = 91 days

################################################################################################
# Hyperparameters for random sampling
n_train_random = 365 * 5 + 365 * 5 * 3 # Initial training set, 5 years


# Estimator: Surr , Architecture: Exact|Surrogate models
ml_hierarchy = ['OptimalNStore', 'AIModel']

# ["Training time", "Simulation time", "mean_LOL", "stderr_LOL", "speed_LOL", "mean_ENS", "stderr_ENS", "speed_ENS"]
results = np.zeros((n_run_MLMC, 8))

print('Training strategy: ', training_strategy)
print('n_train_random: ', n_train_random)

for i in range(n_run_MLMC):
    lol_learner, ens_learner, results[i,0] = surrogate_model_training(system, training_strategy, n_train_random, n_initial, n_queries, temporal_pool_size, n_instances)
    system.AI_model.lol_model = lol_learner
    system.AI_model.ens_model = ens_learner

    mcc = run_MLMC(system, samples, n_run, time_seconds, ml_hierarchy, use_joblib=False, verbose=False)
    results[i,1:] = generate_mcc_results(mcc)

    # print('MLMC results: ', results[i,:])

# results_random_11 = results.copy()
avg_results = np.mean(results, axis=0)
print('Average results: ', avg_results)

# Random

In [None]:
# Number of times to run MLMC
n_run_MLMC = 10

# initialize parameters for run
samples = 20
n_run = 40
time_seconds = 50
training_strategy = 'Random' # 'AL' or 'Random'

######################################################################################################
# Hyperparameters for active learning
n_initial = 365 * 2 # Initial training set, 5 years
n_queries = 20 # 10 years
temporal_pool_size = 3650 # 10 years
n_instances = 91 # 1 season = 91 days

################################################################################################
# Hyperparameters for random sampling
n_train_random = 365 * 5 + 365 * 5 * 7 # Initial training set, 5 years


# Estimator: Surr , Architecture: Exact|Surrogate models
ml_hierarchy = ['OptimalNStore', 'AIModel']

# ["Training time", "Simulation time", "mean_LOL", "stderr_LOL", "speed_LOL", "mean_ENS", "stderr_ENS", "speed_ENS"]
results = np.zeros((n_run_MLMC, 8))

print('Training strategy: ', training_strategy)
print('n_train_random: ', n_train_random)

for i in range(n_run_MLMC):
    lol_learner, ens_learner, results[i,0] = surrogate_model_training(system, training_strategy, n_train_random, n_initial, n_queries, temporal_pool_size, n_instances)
    system.AI_model.lol_model = lol_learner
    system.AI_model.ens_model = ens_learner

    mcc = run_MLMC(system, samples, n_run, time_seconds, ml_hierarchy, use_joblib=False, verbose=False)
    results[i,1:] = generate_mcc_results(mcc)

    # print('MLMC results: ', results[i,:])

# results_random_11 = results.copy()
avg_results = np.mean(results, axis=0)
print('Average results: ', avg_results)

In [None]:
# Number of times to run MLMC
n_run_MLMC = 5

# initialize parameters for run
samples = 20
n_run = 40
time_seconds = 50
training_strategy = 'Random' # 'AL' or 'Random'

######################################################################################################
# Hyperparameters for active learning
n_initial = 365 * 2 # Initial training set, 5 years
n_queries = 20 # 10 years
temporal_pool_size = 3650 # 10 years
n_instances = 91 # 1 season = 91 days

################################################################################################
# Hyperparameters for random sampling
n_train_random = 365 * 5 + 365 * 5 * 15 # Initial training set, 5 years


# Estimator: Surr , Architecture: Exact|Surrogate models
ml_hierarchy = ['OptimalNStore', 'AIModel']

# ["Training time", "Simulation time", "mean_LOL", "stderr_LOL", "speed_LOL", "mean_ENS", "stderr_ENS", "speed_ENS"]
results = np.zeros((n_run_MLMC, 8))

print('Training strategy: ', training_strategy)
print('n_train_random: ', n_train_random)

for i in range(n_run_MLMC):
    lol_learner, ens_learner, results[i,0] = surrogate_model_training(system, training_strategy, n_train_random, n_initial, n_queries, temporal_pool_size, n_instances)
    system.AI_model.lol_model = lol_learner
    system.AI_model.ens_model = ens_learner

    mcc = run_MLMC(system, samples, n_run, time_seconds, ml_hierarchy, use_joblib=False, verbose=False)
    results[i,1:] = generate_mcc_results(mcc)

    # print('MLMC results: ', results[i,:])

# results_random_11 = results.copy()
avg_results = np.mean(results, axis=0)
print('Average results: ', avg_results)

In [None]:
# Number of times to run MLMC
n_run_MLMC = 5

# initialize parameters for run
samples = 20
n_run = 40
time_seconds = 50
training_strategy = 'Random' # 'AL' or 'Random'

######################################################################################################
# Hyperparameters for active learning
n_initial = 365 * 2 # Initial training set, 5 years
n_queries = 20 # 10 years
temporal_pool_size = 3650 # 10 years
n_instances = 91 # 1 season = 91 days

################################################################################################
# Hyperparameters for random sampling
n_train_random = 365 * 5 + 365 * 4 * 1 # Initial training set, 5 years


# Estimator: Surr , Architecture: Exact|Surrogate models
ml_hierarchy = ['OptimalNStore', 'AIModel']

# ["Training time", "Simulation time", "mean_LOL", "stderr_LOL", "speed_LOL", "mean_ENS", "stderr_ENS", "speed_ENS"]
results = np.zeros((n_run_MLMC, 8))

print('Training strategy: ', training_strategy)
print('n_train_random: ', n_train_random)

for i in range(n_run_MLMC):
    lol_learner, ens_learner, results[i,0] = surrogate_model_training(system, training_strategy, n_train_random, n_initial, n_queries, temporal_pool_size, n_instances)
    system.AI_model.lol_model = lol_learner
    system.AI_model.ens_model = ens_learner

    mcc = run_MLMC(system, samples, n_run, time_seconds, ml_hierarchy, use_joblib=False, verbose=False)
    results[i,1:] = generate_mcc_results(mcc)

    # print('MLMC results: ', results[i,:])

# results_random_11 = results.copy()
avg_results = np.mean(results, axis=0)
print('Average results: ', avg_results)

# Exact model

In [None]:
# Number of times to run MLMC
n_run_MLMC = 10

# initialize parameters for run
samples = 25
n_run = 40
time_seconds = 50

# Estimator: MC , Architecture: Exact
ml_hierarchy = ['OptimalNStore']

results = np.zeros((n_run_MLMC, 8))
for i in range(n_run_MLMC):
    time_start = time.time()
    mcc = run_MLMC(system, samples, n_run, time_seconds, ml_hierarchy, use_joblib=False, verbose=False)
    time_end = time.time()
    results[i,0] = time_end - time_start
    results[i,1:] = generate_mcc_results(mcc)

avg_results = np.mean(results, axis=0)
print('Average results: ', avg_results)