In [1]:
# Scenario Generation with Copulas 
# 
# Hugo S. de Araujo
# Nov. 14th, 2022 | Mays Group | Cornell University
################################################################################

#=======================================================================
PROJECT SETUP
=======================================================================#
using Pkg
Pkg.activate("copulas");
Pkg.instantiate();
# Import "here" function. Wrapper to allow easy path concatenation.
include(joinpath(@__DIR__, "functions", "fct_here.jl"))

# Import all required packages. 
begin
    # using AWSS3
    using CSV
    using DataFrames
    using Dates
    using DelimitedFiles
    using Distributions
    using HDF5
    using JuliaFormatter
    using LaTeXStrings
    using LinearAlgebra
    using LinearSolve
    #using Measures
    using Random
    using RCall
    using Revise
    using Statistics
    using StatsBase
    #using StatsPlots
    using OhMyREPL
    using Plots
    #using PrettyTables
    using Tables
    using TSFrames
    using TimeZones
end

# Include functions 
#= functions_dirpath = joinpath(pwd(),"src", "functions");
function_paths = readdir(functions_dirpath, join=true);
function_index = occursin.(".jl", function_paths);
functions_only = function_paths[function_index];

for str in functions_only
    include(str)
end =#

include(here("src", "functions", "fct_bind_historical_forecast.jl"));
include(here("src", "functions", "fct_compute_hourly_average_actuals.jl"));
include(here("src", "functions", "fct_compute_landing_probability.jl"));
include(here("src", "functions", "fct_convert_hours_2018.jl"));
include(here("src", "functions", "fct_convert_ISO_standard.jl"));
include(here("src", "functions", "fct_convert_land_prob_to_data.jl"));
include(here("src", "functions", "fct_generate_probability_scenarios.jl"));
include(here("src", "functions", "fct_getplots.jl"));
include(here("src", "functions", "fct_plot_correlation_heatmap.jl"));
include(here("src", "functions", "fct_plot_historical_landing.jl"));
include(here("src", "functions", "fct_plot_historical_synthetic_autocorrelation.jl"));
include(here("src", "functions", "fct_plot_correlogram_landing_probability.jl"));
include(here("src", "functions", "fct_plot_scenarios_and_actual.jl"));
include(here("src", "functions", "fct_read_h5_file.jl"));
include(here("src", "functions", "fct_read_input_file.jl"));
include(here("src", "functions", "fct_transform_landing_probability.jl"));
include(here("src", "functions", "fct_write_percentiles.jl"));
#=======================================================================
READ INPUT FILE
=======================================================================#
input_file_path = here("src\\copulas.txt")

# XXX Needs to be updated to be a hardcoded instead of reading in a text file
data_type,
scenario_length,
number_of_scenarios,
scenario_hour,
scenario_day,
scenario_month,
scenario_year,
read_locally,
historical_load,
forecast_load,
historical_solar,
forecast_da_solar,
forecast_2da_solar,
historical_wind,
forecastd_da_wind,
forecast_2da_wind,
write_percentile = read_input_file(input_file_path);

#=======================================================================
READ INPUT DATA: ARPA-E PERFORM PROJECT H5 FILES
=======================================================================#
# Function that reads the .h5 file and binds the time index and the actuals/fore-
# cast values into a single dataframe.

# Load data
load_actuals_raw = read_h5_file(here("data", historical_load), "load");
load_forecast_raw = read_h5_file(here("data", "ercot_BA_load_forecast_day_ahead_2018.h5"), "load", false);

# Solar data
solar_actuals_raw = read_h5_file(here("data", "ercot_BA_solar_actuals_Existing_2018.h5"), "solar");
solar_forecast_dayahead_raw = read_h5_file(here("data", "ercot_BA_solar_forecast_day_ahead_existing_2018.h5"), "solar", false);
solar_forecast_2dayahead_raw = read_h5_file(here("data", "ercot_BA_solar_forecast_2_day_ahead_existing_2018.h5"), "solar", false);

# Wind data
wind_actuals_raw = read_h5_file(here("data", "ercot_BA_wind_actuals_Existing_2018.h5"), "wind");
wind_forecast_dayahead_raw = read_h5_file(here("data", "ercot_BA_wind_forecast_day_ahead_existing_2018.h5"), "wind", false);
wind_forecast_2dayahead_raw = read_h5_file(here("data", "ercot_BA_wind_forecast_2_day_ahead_existing_2018.h5"), "wind", false);

#=======================================================================
Compute the hourly average for the actuals data
=======================================================================#
# Load
aux = compute_hourly_average_actuals(load_actuals_raw);
load_actual_avg_raw = DataFrame();
time_index = aux[:, :Index];
avg_actual = aux[:, :values_mean];
load_actual_avg_raw[!, :time_index] = time_index;
load_actual_avg_raw[!, :avg_actual] = avg_actual;

# Solar
aux = compute_hourly_average_actuals(solar_actuals_raw);
time_index = aux[:, :Index];
avg_actual = aux[:, :values_mean];
solar_actual_avg_raw = DataFrame();
solar_actual_avg_raw[!, :time_index] = time_index;
solar_actual_avg_raw[!, :avg_actual] = avg_actual;

# Wind
aux = compute_hourly_average_actuals(wind_actuals_raw);
time_index = aux[:, :Index];
avg_actual = aux[:, :values_mean];
wind_actual_avg_raw = DataFrame();
wind_actual_avg_raw[!, :time_index] = time_index;
wind_actual_avg_raw[!, :avg_actual] = avg_actual;

[32m[1m  Activating[22m[39m project at `c:\Users\ks885\Documents\aa_research\Modeling\norta_scenarios\copulas\src\copulas`




In [4]:
# take forecast time and issue time and subtract 6 hours to convert to Central US time.
data = load_forecast_raw;
df = copy(data);
df[:,:forecast_time] = df[:,:forecast_time] .- Dates.Hour(6);
df[:,:issue_time] = df[:,:issue_time] .- Dates.Hour(6);
return df;

* github copilot just wrote the timeshift/transformation in a different way.... would this fix the issue in the convert_hours?
    * I could test it by running the original 02_debugging dayling_savings_time.ipynb and go to the end and do all the checks at the end...

In [5]:
# create a function to do this for all data
function upd_convert_hours_2018(data, is_actual = true)
    if is_actual
        x = copy(data);
        x[:,:time_index] = x[:,:time_index] .- Dates.Hour(6);
        return x;
    else
        df = copy(data);
        df[:,:forecast_time] = df[:,:forecast_time] .- Dates.Hour(6);
        df[:,:issue_time] = df[:,:issue_time] .- Dates.Hour(6);
        return df;
    end
end

upd_convert_hours_2018 (generic function with 2 methods)

In [6]:
# Load data
load_actuals = upd_convert_hours_2018(load_actuals_raw);
load_actual_avg = upd_convert_hours_2018(load_actual_avg_raw);
load_forecast = upd_convert_hours_2018(load_forecast_raw, false);

# Solar data
solar_actuals = upd_convert_hours_2018(solar_actuals_raw);
solar_actual_avg = upd_convert_hours_2018(solar_actual_avg_raw);
solar_forecast_dayahead = upd_convert_hours_2018(solar_forecast_dayahead_raw, false);
solar_forecast_2dayahead = upd_convert_hours_2018(solar_forecast_2dayahead_raw, false);

# Wind data
wind_actuals = upd_convert_hours_2018(wind_actuals_raw);
wind_actual_avg = upd_convert_hours_2018(wind_actual_avg_raw);
wind_forecast_dayahead = upd_convert_hours_2018(wind_forecast_dayahead_raw, false);
wind_forecast_2dayahead = upd_convert_hours_2018(wind_forecast_2dayahead_raw, false);


In [7]:
#=======================================================================
BIND HOURLY HISTORICAL DATA WITH FORECAST DATA
========================================================================#
#= The binding is made by ("forecast_time" = "time_index"). This causes the 
average actual value to be duplicated, which is desired, given the # of rows
in the load_forecast is double that of load_actual. To distinguish a 
one-day-ahead forecast from a two-day-ahead forecast, the column "ahead_factor"
is introduced. Bind the day-ahead and two-day-ahead forecasts for wind and solar
to get all the forecast data into one object as it is for load forecast =#
    load_data = bind_historical_forecast(true,
    load_actual_avg,
    load_forecast);

solar_data = bind_historical_forecast(false,
    solar_actual_avg,
    solar_forecast_dayahead,
    solar_forecast_2dayahead);

wind_data = bind_historical_forecast(false,
    wind_actual_avg,
    wind_forecast_dayahead,
    wind_forecast_2dayahead);

In [8]:
#=======================================================================
Landing probability
=======================================================================#
#= This section holds the calculation of the probability that the actual
value was equaled or superior than the forecast percentiles for a given
day. This is made possible by the estimation of an approximate CDF
computed on the forecast percentiles. Once estimated, this function is
used to find the "landing probability"; the prob. that the actual value
is equal or greater than a % percentage of the forecast percentile.
=#
#include(here("src", "functions", "fct_compute_landing_probability.jl"))
landing_probability_load = compute_landing_probability(load_data);
landing_probability_solar = compute_landing_probability(solar_data);
landing_probability_wind = compute_landing_probability(wind_data);

In [9]:
#=======================================================================
ADJUST LANDING PROBABILITY DATAFRAME
=======================================================================#
lp_load = transform_landing_probability(landing_probability_load);
lp_solar = transform_landing_probability(landing_probability_solar);
lp_wind = transform_landing_probability(landing_probability_wind);

In [10]:
x = copy(landing_probability_load);
# Sort data by issue time
sort!(x, :issue_time);
# Group data by issue time and count occurences in every group
df = combine(groupby(x, [:issue_time]), DataFrames.nrow => :count);

dst_filter = filter(:count => ==(50), df)
forecast_time_dst = filter(row -> row.issue_time in dst_filter.issue_time, x).forecast_time


DateTime[]

There are none with 50! Now check that there are all the ones we expect to be 48.

In [11]:
363*24

8712

Let us make sure that when we go through the process of filtering, we get 8712 hours out of it...

In [12]:
lp_load

363×48 transpose(::Matrix{Float64}) with eltype Float64:
 0.888889  0.585859  0.545455  0.505051  …  0.878788  0.939394  0.919192
 0.747475  0.777778  0.606061  0.606061     0.909091  0.878788  0.878788
 1.0       0.939394  0.909091  1.0          0.434343  0.30303   0.323232
 0.959596  0.828283  0.79798   0.888889     0.585859  0.575758  0.535354
 0.353535  0.474747  0.575758  0.626263     0.333333  0.30303   0.323232
 0.747475  0.707071  0.69697   0.757576  …  0.222222  0.20202   0.282828
 0.525253  0.444444  0.393939  0.333333     0.818182  0.838384  0.838384
 0.626263  0.686869  0.717172  0.656566     0.747475  0.757576  0.767677
 0.818182  0.838384  0.919192  0.888889     0.515152  0.545455  0.515152
 0.717172  0.686869  0.707071  0.737374     0.989899  0.979798  0.989899
 ⋮                                       ⋱  ⋮                   
 0.868687  0.919192  0.949495  0.949495     0.515152  0.616162  0.525253
 0.666667  0.676768  0.737374  0.676768  …  0.585859  0.555556  0.59596
 0.

It is 363 by 48 so it looks correct