In [54]:
using CSV, DataFrames, Dates, TimeZones

In [55]:
ID = "01"

"01"

In [56]:
# Read the CSV file from the specified path
df = CSV.read("data/charger_battery_data_$ID.csv", DataFrame)

# Filter the data
df = df[:, [:timestamp, :e_consumption, :e_production]]

df.e_consumption = df.e_consumption / 1000
df.e_production = df.e_production / 1000

# Parse the timestamps with timezone offset
timestamps = ZonedDateTime.(df.timestamp, DateFormat("yyyy-mm-dd HH:MM:SSzzzz"))

# Shift the timestamps one hour ahead
timestamps = timestamps .- Hour(1)

# Replace the timestamp column in the DataFrame with the shifted timestamps
df.timestamp = timestamps

35040-element Vector{ZonedDateTime}:
 2020-11-01T00:00:00+01:00
 2020-11-01T00:15:00+01:00
 2020-11-01T00:30:00+01:00
 2020-11-01T00:45:00+01:00
 2020-11-01T01:00:00+01:00
 2020-11-01T01:15:00+01:00
 2020-11-01T01:30:00+01:00
 2020-11-01T01:45:00+01:00
 2020-11-01T02:00:00+01:00
 2020-11-01T02:15:00+01:00
 ⋮
 2021-10-31T21:45:00+01:00
 2021-10-31T22:00:00+01:00
 2021-10-31T22:15:00+01:00
 2021-10-31T22:30:00+01:00
 2021-10-31T22:45:00+01:00
 2021-10-31T23:00:00+01:00
 2021-10-31T23:15:00+01:00
 2021-10-31T23:30:00+01:00
 2021-10-31T23:45:00+01:00

In [57]:
df

Unnamed: 0_level_0,timestamp,e_consumption,e_production
Unnamed: 0_level_1,ZonedDa…,Float64,Float64
1,2020-11-01T00:00:00+01:00,0.023,0.0
2,2020-11-01T00:15:00+01:00,0.02,0.0
3,2020-11-01T00:30:00+01:00,0.028,0.0
4,2020-11-01T00:45:00+01:00,0.035,0.0
5,2020-11-01T01:00:00+01:00,0.033,0.0
6,2020-11-01T01:15:00+01:00,0.028,0.0
7,2020-11-01T01:30:00+01:00,0.03,0.0
8,2020-11-01T01:45:00+01:00,0.034,0.0
9,2020-11-01T02:00:00+01:00,0.033,0.0
10,2020-11-01T02:15:00+01:00,0.029,0.0


In [58]:
for col_name in names(df)
    println("Column: ", col_name, ", Type: ", eltype(df[!, col_name]))
end

for col_name in names(df)
    missing_count = sum(ismissing.(df[!, col_name]))
    println("Column: ", col_name, ", Missing Values: ", missing_count)
end

Column: timestamp, Type: ZonedDateTime
Column: e_consumption, Type: Float64
Column: e_production, Type: Float64
Column: timestamp, Missing Values: 0
Column: e_consumption, Missing Values: 0
Column: e_production, Missing Values: 0


In [59]:
# # Handle missing values
# total_missing = 0
# replaced_with_previous_day = 0
# replaced_with_zero = 0

# for col_name in names(df)
#     missing_indices = findall(ismissing, df[!, col_name])
#     total_missing += length(missing_indices)
    
#     for i in missing_indices
#         if i > 24
#             if ismissing(df[i-24, col_name])
#                 df[i, col_name] = 0
#                 replaced_with_zero += 1
#             else
#                 df[i, col_name] = df[i-24, col_name]
#                 replaced_with_previous_day += 1
#             end
#         else
#             df[i, col_name] = 0
#             replaced_with_zero += 1
#         end
#     end
# end

# println("Total missing values: ", total_missing)
# println("Replaced with previous day: ", replaced_with_previous_day)
# println("Replaced with zero: ", replaced_with_zero)

In [60]:
# Define a custom resampling function
function resample(df, time_column, interval)
    # Round the timestamps to the nearest hour
    df[!, time_column] = Dates.floor.(df[!, time_column], interval)
    
    # Group by the rounded timestamps and sum the other columns
    new_df = combine(groupby(df, time_column), names(df, Not(time_column)) .=> sum .=> names(df, Not(time_column)))
    
    return new_df
end

resample (generic function with 1 method)

In [61]:
# Resample the data to 1-hour intervals
df = resample(df, :timestamp, Dates.Hour(1))

Unnamed: 0_level_0,timestamp,e_consumption,e_production
Unnamed: 0_level_1,ZonedDa…,Float64,Float64
1,2020-11-01T00:00:00+01:00,0.106,0.0
2,2020-11-01T01:00:00+01:00,0.125,0.0
3,2020-11-01T02:00:00+01:00,0.126,0.0
4,2020-11-01T03:00:00+01:00,0.129,0.0
5,2020-11-01T04:00:00+01:00,0.129,0.0
6,2020-11-01T05:00:00+01:00,0.13,0.0
7,2020-11-01T06:00:00+01:00,0.145,0.031
8,2020-11-01T07:00:00+01:00,0.579,0.226
9,2020-11-01T08:00:00+01:00,1.497,0.545
10,2020-11-01T09:00:00+01:00,1.432,0.685


In [62]:
# Create a new DataFrame with the desired structure
new_df = DataFrame(
    electkwh = df.e_consumption,
    heatingkwh = zeros(length(df.e_consumption)),
    hotwaterkwh = zeros(length(df.e_consumption)),
    PV_generation = df.e_production,
    Temperature = zeros(length(df.e_consumption)),
    month = month.(DateTime.(df.timestamp)),
    day = day.(DateTime.(df.timestamp)),
    hour = hour.(DateTime.(df.timestamp))
)

Unnamed: 0_level_0,electkwh,heatingkwh,hotwaterkwh,PV_generation,Temperature,month,day,hour
Unnamed: 0_level_1,Float64,Float64,Float64,Float64,Float64,Int64,Int64,Int64
1,0.106,0.0,0.0,0.0,0.0,11,1,0
2,0.125,0.0,0.0,0.0,0.0,11,1,1
3,0.126,0.0,0.0,0.0,0.0,11,1,2
4,0.129,0.0,0.0,0.0,0.0,11,1,3
5,0.129,0.0,0.0,0.0,0.0,11,1,4
6,0.13,0.0,0.0,0.0,0.0,11,1,5
7,0.145,0.0,0.0,0.031,0.0,11,1,6
8,0.579,0.0,0.0,0.226,0.0,11,1,7
9,1.497,0.0,0.0,0.545,0.0,11,1,8
10,1.432,0.0,0.0,0.685,0.0,11,1,9


In [63]:
# Write the new DataFrame to a CSV file in the specified path
CSV.write("data/sonnen$(ID)_datafile_all.csv", new_df)

"data/sonnen01_datafile_all.csv"

In [64]:
#Input_df = CSV.read("data/sonnen$(ID)_datafile_all.csv", DataFrame);

Input_df = new_df

Unnamed: 0_level_0,electkwh,heatingkwh,hotwaterkwh,PV_generation,Temperature,month,day,hour
Unnamed: 0_level_1,Float64,Float64,Float64,Float64,Float64,Int64,Int64,Int64
1,0.106,0.0,0.0,0.0,0.0,11,1,0
2,0.125,0.0,0.0,0.0,0.0,11,1,1
3,0.126,0.0,0.0,0.0,0.0,11,1,2
4,0.129,0.0,0.0,0.0,0.0,11,1,3
5,0.129,0.0,0.0,0.0,0.0,11,1,4
6,0.13,0.0,0.0,0.0,0.0,11,1,5
7,0.145,0.0,0.0,0.031,0.0,11,1,6
8,0.579,0.0,0.0,0.226,0.0,11,1,7
9,1.497,0.0,0.0,0.545,0.0,11,1,8
10,1.432,0.0,0.0,0.685,0.0,11,1,9


In [65]:
describe(Input_df)

Unnamed: 0_level_0,variable,mean,min,median,max,nmissing,eltype
Unnamed: 0_level_1,Symbol,Float64,Real,Float64,Real,Int64,DataType
1,electkwh,1.09726,0.001,0.415,10.517,0,Float64
2,heatingkwh,0.0,0.0,0.0,0.0,0,Float64
3,hotwaterkwh,0.0,0.0,0.0,0.0,0,Float64
4,PV_generation,1.11964,0.0,0.007,7.673,0,Float64
5,Temperature,0.0,0.0,0.0,0.0,0,Float64
6,month,6.52683,1.0,7.0,12.0,0,Int64
7,day,15.7209,1.0,16.0,31.0,0,Int64
8,hour,11.5,0.0,11.5,23.0,0,Int64


In [66]:
# add new column with number of days
Input_df[!, :nday] = 1:nrow(Input_df)

1:8760

In [67]:
# calculate residual demand
Input_df[!, :d_res] = Input_df[!,:electkwh] + Input_df[!,:heatingkwh] + Input_df[!,:hotwaterkwh] - Input_df[!,:PV_generation]

8760-element Vector{Float64}:
 0.106
 0.125
 0.126
 0.129
 0.129
 0.13
 0.11400000000000002
 0.3529999999999999
 0.9519999999999998
 0.7469999999999999
 ⋮
 0.09599999999999997
 0.986
 0.46699999999999997
 0.468
 0.326
 0.083
 0.084
 0.08600000000000001
 0.086

## Add periodical time representation using cos/sin

In [68]:
# add columns with cos and sin values for periodical time values day + month
Input_df[!, :hour_cos] = cos.(Input_df[!,:hour] ./ maximum(Input_df[!,:hour]) .* 2*pi);
Input_df[!, :hour_sin] = sin.(Input_df[!,:hour] ./ maximum(Input_df[!,:hour]) .* 2*pi);

Input_df[!, :month_cos] = cos.(Input_df[!,:month] ./ maximum(Input_df[!,:month]) .* 2*pi);
Input_df[!, :month_sin] = sin.(Input_df[!,:month] ./ maximum(Input_df[!,:month]) .* 2*pi);

#Input_df[!, :nday_cos] = cos.(Input_df[!,:nday] ./ maximum(Input_df[!,:nday]) .* 2*pi);
#Input_df[!, :nday_sin] = sin.(Input_df[!,:nday] ./ maximum(Input_df[!,:nday]) .* 2*pi);

## Add seasons

In [69]:
Input_df[!, :spring] = (Input_df[!,:month] .>= 3) .* (Input_df[!,:month] .<= 5);
Input_df[!, :summer] = (Input_df[!,:month] .>= 6) .* (Input_df[!,:month] .<= 8);
Input_df[!, :autumn] = (Input_df[!,:month] .>= 9) .* (Input_df[!,:month] .<= 11);
Input_df[!, :winter] = convert.(Bool, (Input_df[!,:month] .>= 12) .+ (Input_df[!,:month] .<= 2));

Input_df[!, :season] = ifelse.(Input_df[!,:spring] .== true, 1,
                        ifelse.(Input_df[!,:summer] .== true, 2,
                        ifelse.(Input_df[!,:autumn] .== true, 3, 
                        4)));

In [70]:
describe(Input_df)

Unnamed: 0_level_0,variable,mean,min,median,max,nmissing,eltype
Unnamed: 0_level_1,Symbol,Float64,Real,Float64,Real,Int64,DataType
1,electkwh,1.09726,0.001,0.415,10.517,0,Float64
2,heatingkwh,0.0,0.0,0.0,0.0,0,Float64
3,hotwaterkwh,0.0,0.0,0.0,0.0,0,Float64
4,PV_generation,1.11964,0.0,0.007,7.673,0,Float64
5,Temperature,0.0,0.0,0.0,0.0,0,Float64
6,month,6.52683,1.0,7.0,12.0,0,Int64
7,day,15.7209,1.0,16.0,31.0,0,Int64
8,hour,11.5,0.0,11.5,23.0,0,Int64
9,nday,4380.5,1.0,4380.5,8760.0,0,Int64
10,d_res,-0.0223821,-7.092,0.143,10.517,0,Float64


## Add dynamic prices based on Ye et al. 2020

In [71]:
#LU
#=
function set_dynamic_prices(Input_df)
    map(eachrow(Input_df)) do r
        if r.month >= 5 && r.month <= 10
            if (r.hour >= 6 && r.hour <= 9) || (r.hour >= 16 && r.hour <= 17)
                return 0.3f0
            elseif (r.hour >= 10 && r.hour <= 15)
                return 0.6f0
            else
                return 0.15f0
            end
        elseif r.month >= 11 || r.month <= 4
            if (r.hour >= 6 && r.hour <= 9) || (r.hour >= 16 && r.hour <= 17)
                return 0.6f0
            elseif r.hour >= 10 && r.hour <= 15
                return 0.3f0
            else
                return 0.15f0
            end
        end
    end
end        

ErrorException: syntax: incomplete: unterminated multi-line comment #= ... =#

In [72]:
#LU
#=Input_df[!, "p_buy"] = set_dynamic_prices(Input_df);
Input_df[!, "p_sell"] = 0.5 .* Input_df[!, "p_buy"];

ErrorException: syntax: incomplete: unterminated multi-line comment #= ... =#

In [73]:
#LU describe(Input_df)

## Extract training, testing + evalution data set for summer, winter, both, all

In [74]:
#LU
#=# filter summer
Input_data_summer = filter(:summer => !=(0), Input_df)
describe(Input_data_summer), size(Input_data_summer)

ErrorException: syntax: incomplete: unterminated multi-line comment #= ... =#

In [75]:
function train_eval_test_split(Input_df)
    train = filter(row -> row.day <= 15, Input_df)
    eval = filter(row -> row.day > 15 && row.day <= 20, Input_df)
    test = filter(row -> row.day > 20, Input_df)
    return train, eval, test
end

train_eval_test_split (generic function with 1 method)

In [76]:
#LU
#=summer_training, summer_evaluation, summer_testing = train_eval_test_split(Input_data_summer)

CSV.write("data/$(ID)_summer_train_TOU.csv", summer_training);
CSV.write("data/$(ID)_summer_eval_TOU.csv", summer_evaluation);
CSV.write("data/$(ID)_summer_test_TOU.csv", summer_testing);

ErrorException: syntax: incomplete: unterminated multi-line comment #= ... =#

In [77]:
#LU
#=# filter winter
Input_data_winter = filter(:winter => !=(0), Input_df)
describe(Input_data_winter), size(Input_data_winter)

ErrorException: syntax: incomplete: unterminated multi-line comment #= ... =#

In [78]:
#LU
#=winter_training, winter_evaluation, winter_testing = train_eval_test_split(Input_data_winter)

# write data files
CSV.write("data/$(ID)_winter_train_TOU.csv", winter_training);
CSV.write("data/$(ID)_winter_eval_TOU.csv", winter_evaluation);
CSV.write("data/$(ID)_winter_test_TOU.csv", winter_testing);

ErrorException: syntax: incomplete: unterminated multi-line comment #= ... =#

In [79]:
all_training, all_evaluation, all_testing = train_eval_test_split(Input_df)

#LU 
#=
# write data files
CSV.write("data/$(ID)_all_train_TOU.csv", all_training);
CSV.write("data/$(ID)_all_eval_TOU.csv", all_evaluation);
CSV.write("data/$(ID)_all_test_TOU.csv", all_testing);

ErrorException: syntax: incomplete: unterminated multi-line comment #= ... =#

In [80]:
#LU
#=describe(vcat(Input_data_winter, Input_data_summer) ), size(vcat(Input_data_winter, Input_data_summer) )

ErrorException: syntax: incomplete: unterminated multi-line comment #= ... =#

In [81]:
#LU
#=# both seasons split
both_training, both_evaluation, both_testing = train_eval_test_split(vcat(Input_data_winter, Input_data_summer))

# write data files
CSV.write("data/$(ID)_both_train_TOU.csv", both_training);
CSV.write("data/$(ID)_both_test_TOU.csv", both_testing);
CSV.write("data/$(ID)_both_eval_TOU.csv", both_evaluation);

ErrorException: syntax: incomplete: unterminated multi-line comment #= ... =#

## Dataset for fixed prices

In [82]:
Input_df[!, "p_buy"] .= 0.3;
Input_df[!, "p_sell"] .= 0.1;

In [83]:
#LU
#=Input_data_summer = filter(:summer => !=(0), Input_df);
Input_data_winter = filter(:winter => !=(0), Input_df);

ErrorException: syntax: incomplete: unterminated multi-line comment #= ... =#

In [84]:
#LU summer_training, summer_evaluation, summer_testing = train_eval_test_split(Input_data_summer);
#LU winter_training, winter_evaluation, winter_testing = train_eval_test_split(Input_data_winter);
all_training, all_evaluation, all_testing = train_eval_test_split(Input_df);
#LU both_training, both_evaluation, both_testing = train_eval_test_split(vcat(Input_data_winter, Input_data_summer));

In [85]:
#LU
#=CSV.write("data/$(ID)_both_train_fix.csv", both_training);
CSV.write("data/$(ID)_both_test_fix.csv", both_testing);
CSV.write("data/$(ID)_both_eval_fix.csv", both_evaluation);=#

CSV.write("data/$(ID)_all_train_fix.csv", all_training);
CSV.write("data/$(ID)_all_test_fix.csv", all_testing);
CSV.write("data/$(ID)_all_eval_fix.csv", all_evaluation);

#LU
#=CSV.write("data/$(ID)_summer_train_fix.csv", summer_training);
CSV.write("data/$(ID)_summer_test_fix.csv", summer_testing);
CSV.write("data/$(ID)_summer_eval_fix.csv", summer_evaluation);
CSV.write("data/$(ID)_winter_train_fix.csv", winter_training);
CSV.write("data/$(ID)_winter_test_fix.csv", winter_testing);
CSV.write("data/$(ID)_winter_eval_fix.csv", winter_evaluation);

ErrorException: syntax: incomplete: unterminated multi-line comment #= ... =#