In [400]:
using CSV, DataFrames, Dates, TimeZones

ENV["COLUMNS"] = 1000

1000

In [401]:
ID = "Charger99"

"Charger99"

In [402]:
# Read the CSV file from the specified path
df = CSV.read("data/charger_battery_data_$ID.csv", DataFrame)

# Filter the data
df = df[:, [:timestamp, :e_consumption, :e_production, :e_charger, :h_countdown, :soc_ev]]

df.e_consumption = df.e_consumption / 1000
df.e_production = df.e_production / 1000
df.e_charger = df.e_charger / 1000

# Parse the timestamps with timezone offset
timestamps = ZonedDateTime.(df.timestamp, DateFormat("yyyy-mm-dd HH:MM:SSzzzz"))

# Shift the timestamps one hour ahead
timestamps = timestamps .- Hour(1)

# Replace the timestamp column in the DataFrame with the shifted timestamps
df.timestamp = timestamps

35040-element Vector{ZonedDateTime}:
 2020-11-01T00:00:00+01:00
 2020-11-01T00:15:00+01:00
 2020-11-01T00:30:00+01:00
 2020-11-01T00:45:00+01:00
 2020-11-01T01:00:00+01:00
 2020-11-01T01:15:00+01:00
 2020-11-01T01:30:00+01:00
 2020-11-01T01:45:00+01:00
 2020-11-01T02:00:00+01:00
 2020-11-01T02:15:00+01:00
 ⋮
 2021-10-31T21:45:00+01:00
 2021-10-31T22:00:00+01:00
 2021-10-31T22:15:00+01:00
 2021-10-31T22:30:00+01:00
 2021-10-31T22:45:00+01:00
 2021-10-31T23:00:00+01:00
 2021-10-31T23:15:00+01:00
 2021-10-31T23:30:00+01:00
 2021-10-31T23:45:00+01:00

In [403]:
#df

In [404]:
for col_name in names(df)
    display(("Column: ", col_name, ", Type: ", eltype(df[!, col_name])))
end

for col_name in names(df)
    missing_count = sum(ismissing.(df[!, col_name]))
    display(("Column: ", col_name, ", Missing Values: ", missing_count))
end

("Column: ", "timestamp", ", Type: ", ZonedDateTime)

("Column: ", "e_consumption", ", Type: ", Float64)

("Column: ", "e_production", ", Type: ", Float64)

("Column: ", "e_charger", ", Type: ", Union{Missing, Float64})

("Column: ", "h_countdown", ", Type: ", Float64)

("Column: ", "soc_ev", ", Type: ", Float64)

("Column: ", "timestamp", ", Missing Values: ", 0)

("Column: ", "e_consumption", ", Missing Values: ", 0)

("Column: ", "e_production", ", Missing Values: ", 0)

("Column: ", "e_charger", ", Missing Values: ", 28546)

("Column: ", "h_countdown", ", Missing Values: ", 0)

("Column: ", "soc_ev", ", Missing Values: ", 0)

In [405]:
#describe(df)

Resamplign from 15min intervalls to 1h intervalls. In the future, I'd like to try to work with 15min intervals.

In [406]:
function resample(df, time_column, interval)
    # Round the timestamps to the nearest hour
    df[!, time_column] = Dates.floor.(df[!, time_column], interval)
    
    # Define the columns to be summed
    sum_columns = ["e_consumption", "e_production", "e_charger"]
    
    # Group by the rounded timestamps and sum the other columns
    new_df = combine(groupby(df, time_column), sum_columns .=> (x -> sum(coalesce.(x, 0))) .=> sum_columns)
    
    # Handle "h_countdown" and "soc_ev" separately
    min_values = combine(groupby(df, time_column), "h_countdown" => minimum => "h_countdown")
    new_df = leftjoin(new_df, min_values, on=time_column)
    
    min_values = combine(groupby(df, time_column), "soc_ev" => minimum => "soc_ev")
    new_df = leftjoin(new_df, min_values, on=time_column)

    #Increase every value of h_countdown to the next higher integer
    new_df.h_countdown = ceil.(new_df.h_countdown)

    # Change last countdown value from -1
    for i in 2:(nrow(new_df) )# - 1)
        if new_df[i, :h_countdown] == -1 && new_df[(i-1), :h_countdown] == 1
            new_df[i, :h_countdown] = 0
        end
        if new_df[i, :h_countdown] == -1 && new_df[i, :soc_ev] < 1
            new_df[i, :soc_ev] = 1
        end
    end

    return new_df
end


resample (generic function with 1 method)

In [407]:
# Resample the data to 1-hour intervals
df_resampled = resample(df, :timestamp, Dates.Hour(1))

Unnamed: 0_level_0,timestamp,e_consumption,e_production,e_charger,h_countdown,soc_ev
Unnamed: 0_level_1,ZonedDa…,Float64,Float64,Float64,Float64,Float64?
1,2020-11-01T00:00:00+01:00,2.128,0.0,0.0,-1.0,1.0
2,2020-11-01T01:00:00+01:00,0.24,0.0,0.0,-1.0,1.0
3,2020-11-01T02:00:00+01:00,0.722,0.0,0.0,-1.0,1.0
4,2020-11-01T03:00:00+01:00,2.186,0.0,0.0,-1.0,1.0
5,2020-11-01T04:00:00+01:00,2.162,0.0,0.0,-1.0,1.0
6,2020-11-01T05:00:00+01:00,1.701,0.0,0.0,-1.0,1.0
7,2020-11-01T06:00:00+01:00,0.258,0.441,0.0,-1.0,1.0
8,2020-11-01T07:00:00+01:00,0.764,1.5,0.0,-1.0,1.0
9,2020-11-01T08:00:00+01:00,2.059,3.483,0.0,-1.0,1.0
10,2020-11-01T09:00:00+01:00,0.295,5.637,0.0,-1.0,1.0


In [408]:
# Change the type of the chargekwh column to Union{Missing, Float64}
df_resampled.e_charge = convert(Vector{Union{Missing, Float64}}, df_resampled.e_charger)


8760-element Vector{Union{Missing, Float64}}:
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 ⋮
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0

In [409]:
# Create a new DataFrame with the desired structure
new_df = DataFrame(
    electkwh = df_resampled.e_consumption,
    PV_generation = df_resampled.e_production,
    chargekwh = df_resampled.e_charger,
    h_countdown = df_resampled.h_countdown,
    soc_ev = df_resampled.soc_ev,
    month = month.(DateTime.(df_resampled.timestamp)),
    day = day.(DateTime.(df_resampled.timestamp)),
    hour = hour.(DateTime.(df_resampled.timestamp))
)

Unnamed: 0_level_0,electkwh,PV_generation,chargekwh,h_countdown,soc_ev,month,day,hour
Unnamed: 0_level_1,Float64,Float64,Float64,Float64,Float64?,Int64,Int64,Int64
1,2.128,0.0,0.0,-1.0,1.0,11,1,0
2,0.24,0.0,0.0,-1.0,1.0,11,1,1
3,0.722,0.0,0.0,-1.0,1.0,11,1,2
4,2.186,0.0,0.0,-1.0,1.0,11,1,3
5,2.162,0.0,0.0,-1.0,1.0,11,1,4
6,1.701,0.0,0.0,-1.0,1.0,11,1,5
7,0.258,0.441,0.0,-1.0,1.0,11,1,6
8,0.764,1.5,0.0,-1.0,1.0,11,1,7
9,2.059,3.483,0.0,-1.0,1.0,11,1,8
10,0.295,5.637,0.0,-1.0,1.0,11,1,9


In [410]:

# Change the type of the chargekwh column to Union{Missing, Float64}
new_df.chargekwh = convert(Vector{Union{Missing, Float64}}, new_df.chargekwh)

# In every row where h_countdown is -1, set chargekwh to 'missing'
new_df[new_df.h_countdown .== -1, :chargekwh] .= missing

7138-element view(::Vector{Union{Missing, Float64}}, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10  …  8751, 8752, 8753, 8754, 8755, 8756, 8757, 8758, 8759, 8760]) with eltype Union{Missing, Float64}:
 missing
 missing
 missing
 missing
 missing
 missing
 missing
 missing
 missing
 missing
 ⋮
 missing
 missing
 missing
 missing
 missing
 missing
 missing
 missing
 missing

In [411]:
# Write the new DataFrame to a CSV file in the specified path
CSV.write("data/sonnen$(ID)_datafile_all.csv", new_df)

"data/sonnenCharger99_datafile_all.csv"

In [412]:
#Input_df = CSV.read("data/sonnen$(ID)_datafile_all.csv", DataFrame);

Input_df = new_df

Unnamed: 0_level_0,electkwh,PV_generation,chargekwh,h_countdown,soc_ev,month,day,hour
Unnamed: 0_level_1,Float64,Float64,Float64?,Float64,Float64?,Int64,Int64,Int64
1,2.128,0.0,missing,-1.0,1.0,11,1,0
2,0.24,0.0,missing,-1.0,1.0,11,1,1
3,0.722,0.0,missing,-1.0,1.0,11,1,2
4,2.186,0.0,missing,-1.0,1.0,11,1,3
5,2.162,0.0,missing,-1.0,1.0,11,1,4
6,1.701,0.0,missing,-1.0,1.0,11,1,5
7,0.258,0.441,missing,-1.0,1.0,11,1,6
8,0.764,1.5,missing,-1.0,1.0,11,1,7
9,2.059,3.483,missing,-1.0,1.0,11,1,8
10,0.295,5.637,missing,-1.0,1.0,11,1,9


In [413]:
#describe(Input_df)

In [414]:
# add new column with number of days
Input_df[!, :nday] = 1:nrow(Input_df)

1:8760

In [415]:
# calculate residual demand
Input_df[!, :d_res] = Input_df[!,:electkwh] + coalesce.(Input_df[!,:chargekwh], 0) - Input_df[!,:PV_generation]

8760-element Vector{Float64}:
  2.128
  0.24
  0.722
  2.186
  2.162
  1.701
 -0.18299999999999994
 -0.736
 -1.4240000000000004
 -5.3420000000000005
  ⋮
 -2.497
  0.7210000000000001
  1.69
  0.5630000000000001
  2.612
  2.2880000000000003
  0.9759999999999999
  0.677
  2.005

## Add periodical time representation using cos/sin

In [416]:
# add columns with cos and sin values for periodical time values day + month
Input_df[!, :hour_cos] = cos.(Input_df[!,:hour] ./ maximum(Input_df[!,:hour]) .* 2*pi);
Input_df[!, :hour_sin] = sin.(Input_df[!,:hour] ./ maximum(Input_df[!,:hour]) .* 2*pi);

Input_df[!, :month_cos] = cos.(Input_df[!,:month] ./ maximum(Input_df[!,:month]) .* 2*pi);
Input_df[!, :month_sin] = sin.(Input_df[!,:month] ./ maximum(Input_df[!,:month]) .* 2*pi);

#Input_df[!, :nday_cos] = cos.(Input_df[!,:nday] ./ maximum(Input_df[!,:nday]) .* 2*pi);
#Input_df[!, :nday_sin] = sin.(Input_df[!,:nday] ./ maximum(Input_df[!,:nday]) .* 2*pi);

## Add seasons

In [417]:
Input_df[!, :spring] = (Input_df[!,:month] .>= 3) .* (Input_df[!,:month] .<= 5);
Input_df[!, :summer] = (Input_df[!,:month] .>= 6) .* (Input_df[!,:month] .<= 8);
Input_df[!, :autumn] = (Input_df[!,:month] .>= 9) .* (Input_df[!,:month] .<= 11);
Input_df[!, :winter] = convert.(Bool, (Input_df[!,:month] .>= 12) .+ (Input_df[!,:month] .<= 2));

Input_df[!, :season] = ifelse.(Input_df[!,:spring] .== true, 1,
                        ifelse.(Input_df[!,:summer] .== true, 2,
                        ifelse.(Input_df[!,:autumn] .== true, 3, 
                        4)));

In [418]:
print("L")

L

In [419]:
#describe(Input_df)

## Add dynamic prices based on Ye et al. 2020

In [420]:
#LU
#=
function set_dynamic_prices(Input_df)
    map(eachrow(Input_df)) do r
        if r.month >= 5 && r.month <= 10
            if (r.hour >= 6 && r.hour <= 9) || (r.hour >= 16 && r.hour <= 17)
                return 0.3f0
            elseif (r.hour >= 10 && r.hour <= 15)
                return 0.6f0
            else
                return 0.15f0
            end
        elseif r.month >= 11 || r.month <= 4
            if (r.hour >= 6 && r.hour <= 9) || (r.hour >= 16 && r.hour <= 17)
                return 0.6f0
            elseif r.hour >= 10 && r.hour <= 15
                return 0.3f0
            else
                return 0.15f0
            end
        end
    end
end      =#  

In [421]:
#LU
#=Input_df[!, "p_buy"] = set_dynamic_prices(Input_df);
Input_df[!, "p_sell"] = 0.5 .* Input_df[!, "p_buy"];
=#

In [422]:
#LU describe(Input_df)

## Extract training, testing + evalution data set for summer, winter, both, all

In [423]:
#LU
#=# filter summer
Input_data_summer = filter(:summer => !=(0), Input_df)
describe(Input_data_summer), size(Input_data_summer)
=#

In [424]:
#=function train_eval_test_split(Input_df)
    train = filter(row -> row.day <= 15, Input_df)
    eval = filter(row -> row.day > 15 && row.day <= 20, Input_df)
    test = filter(row -> row.day > 20, Input_df)
    return train, eval, test
end=#

In [425]:
#=function split_all_data_advanced(Input_df)
    # Initialize the split points and the adjustments
    splitpoint_adjustments = Dict("train" => 0, "eval" => 0, "test" => 0)
    splitpoints = Dict("train" => 15, "eval" => 20)#, "test" => lastday(Input_df.date[1]))

    # Initialize the data sets
    train, eval, test = DataFrame(), DataFrame(), DataFrame()

    # Iterate over each month
    for month in unique(Input_df[:,:month])
        # Adjust the split points based on the previous month's adjustments
        #print("\n New month: ", month, "\n Splitpoint adjustment: ", splitpoint_adjustments)
        splitpoints = Dict("train" => 15, "eval" => 20)#, "test" => lastday(Input_df.date[1]))
        splitpoints["train"] -= splitpoint_adjustments["train"]
        splitpoints["eval"] -= splitpoint_adjustments["eval"]
        #splitpoints["test"] -= splitpoint_adjustments["test"]

        #print("\nSplitpoints: ", splitpoints)

        # Reset adjustments for the current month
        splitpoint_adjustments = Dict("train" => 0, "eval" => 0)#, "test" => 0)

        # Filter the data for the current month
        month_data = filter(row -> row.month == month, Input_df)

        # Check and adjust the split points
        for (yset_name, yday) in splitpoints
            #print("\nmonth: ", month, "\nyset_name: ", yset_name, "\n yday: ", yday, "\n")
            while ((month_data[(month_data.day .== yday) .& (month_data.hour .== 23), :h_countdown][1] > -1) && splitpoint_adjustments[yset_name] <= 4)
                print("split not ok at month ", month, ", day ", yday, ". Trying at next day.", "\n")
                yday += 1
                splitpoint_adjustments[yset_name] += 1
                if splitpoint_adjustments[yset_name] == 4
                    print("\n Maximum adjustment of 4 days reached! Not ideal splitpoint chosen at month ", month, ". \n Splitpoint: ", yset_name, " at ", yday)
                end

            end
            
            splitpoints[yset_name] = yday
        end

        # Split the data for the current month
        train = vcat(train, filter(row -> row.day <= splitpoints["train"], month_data))
        eval = vcat(eval, filter(row -> row.day > splitpoints["train"] && row.day <= splitpoints["eval"], month_data))
        test = vcat(test, filter(row -> row.day > splitpoints["eval"], month_data))
    end

    return train, eval, test
end
=#

In [426]:
function split_all_data_advanced_v2(Input_df)
    # Initialize the data sets
    train = DataFrame()
    eval = DataFrame()
    test = DataFrame()

    # Define the pattern of row counts for each data set
    pattern = [("test", 24*10), ("eval", 5*24), ("train", 15*24)]

    splitpoint_adjustments = Dict("train" => 0, "eval" => 0, "test" => 0)

    # Initialize the pattern index
    pattern_index = 1

    i = 0

    # Iterate over the rows of the DataFrame
    while i < size(Input_df, 1)
        print("$i  ")
        # Get the current data set name and row count from the pattern
        set_name, row_count = pattern[pattern_index]

        row_count -= min(splitpoint_adjustments[set_name], 4*24)

        splitpoint_adjustments[set_name] -= min(splitpoint_adjustments[set_name], 4*24)

        maxi = 0
        maxi += 1
        if maxi > 10000
            println("maxi break")
            break
        end

        while (Input_df[min(i+row_count, nrow(Input_df)), :h_countdown] > -1) #&& splitpoint_adjustments[set_name] <= 3*24)
            if i+row_count-1 >  10000
                println("breaking at $(pattern[pattern_index])")
                break
            end



            print("\n i: $i, set_name: $set_name, row_count: $row_count, days: $(row_count/24)")
            print("\n split point not ok at nday ", Input_df[min(i+row_count-1, size(Input_df, 1)), :nday] ,
                    ".\n month $(Input_df[min(i+row_count-1, size(Input_df, 1)), :month]), day $(Input_df[min(i+row_count-1, size(Input_df, 1)), :day]). Trying at next day. \n")
            

            if (i+row_count+24-1) > (nrow(Input_df)-1)
                println("i+rowcount = $i + $row_count +24 - 1 = $(i+row_count+24-1), which is out of bounds. Break!")
                break
            else
                row_count += 24
                splitpoint_adjustments[set_name] += 24
                print("Total new splitpoint adjustment for $set_name: $(splitpoint_adjustments[set_name])")
            end
            
        end

        print("\n Splitpoint found.")        

        # Add the rows to the appropriate data set
        rows = Input_df[i+1:min(i+row_count, size(Input_df, 1)), :]

        if set_name == "train"
            train, row_count = build_sets(train, rows, 4320, row_count)
        elseif set_name == "eval"
            eval, row_count = build_sets(eval, rows, 1440, row_count)
        else # set_name == "test"
            test, row_count = build_sets(test, rows, 3000, row_count)
        end

        # Move to the next rows
        i = i + row_count

        # Move to the next pattern index, or reset to 1 if the end of the pattern is reached
        pattern_index = pattern_index % length(pattern) + 1
    end

    print("\n Length train-data = ", nrow(train), "\n Length eval-data = ", nrow(eval), "\n Length test-data = ", nrow(test))
    
    return train, eval, test

end

function build_sets(set, rows, limit, row_count)
    set = vcat(set, rows)
    if nrow(set) > limit
        println("limit exceeded")
        row_count -= nrow(set) - limit
        set = first(set, limit)
        
    end
    return set, row_count
end
        


build_sets (generic function with 1 method)

In [427]:
#LU
#=summer_training, summer_evaluation, summer_testing = train_eval_test_split(Input_data_summer)

CSV.write("data/$(ID)_summer_train_TOU.csv", summer_training);
CSV.write("data/$(ID)_summer_eval_TOU.csv", summer_evaluation);
CSV.write("data/$(ID)_summer_test_TOU.csv", summer_testing);
=#

In [428]:
#LU
#=# filter winter
Input_data_winter = filter(:winter => !=(0), Input_df)
describe(Input_data_winter), size(Input_data_winter)
=#

In [429]:
#LU
#=winter_training, winter_evaluation, winter_testing = train_eval_test_split(Input_data_winter)

# write data files
CSV.write("data/$(ID)_winter_train_TOU.csv", winter_training);
CSV.write("data/$(ID)_winter_eval_TOU.csv", winter_evaluation);
CSV.write("data/$(ID)_winter_test_TOU.csv", winter_testing);
=#

In [430]:
#all_training, all_evaluation, all_testing = split_all_data_advanced(Input_df)

#LU 
#=
# write data files
CSV.write("data/$(ID)_all_train_TOU.csv", all_training);
CSV.write("data/$(ID)_all_eval_TOU.csv", all_evaluation);
CSV.write("data/$(ID)_all_test_TOU.csv", all_testing);
=#

In [431]:
#LU
#describe(vcat(Input_data_winter, Input_data_summer) ), size(vcat(Input_data_winter, Input_data_summer) )

In [432]:
#LU
#=# both seasons split
both_training, both_evaluation, both_testing = train_eval_test_split(vcat(Input_data_winter, Input_data_summer))

# write data files
CSV.write("data/$(ID)_both_train_TOU.csv", both_training);
CSV.write("data/$(ID)_both_test_TOU.csv", both_testing);
CSV.write("data/$(ID)_both_eval_TOU.csv", both_evaluation);
=#

## Dataset for fixed prices

In [433]:
Input_df[!, "p_buy"] .= 0.3;
Input_df[!, "p_sell"] .= 0.1;

In [434]:
all_training, all_evaluation, all_testing = split_all_data_advanced_v2(Input_df)

0  
 i: 0, set_name: test, row_count: 240, days: 10.0
 split point not ok at nday 239.
 month 11, day 10. Trying at next day. 
Total new splitpoint adjustment for test: 24
 Splitpoint found.264  
 i: 264, set_name: eval, row_count: 120, days: 5.0
 split point not ok at nday 383.
 month 11, day 16. Trying at next day. 
Total new splitpoint adjustment for eval: 24
 Splitpoint found.408  
 i: 408, set_name: train, row_count: 360, days: 15.0
 split point not ok at nday 767.
 month 12, day 2. Trying at next day. 
Total new splitpoint adjustment for train: 24
 Splitpoint found.792  
 Splitpoint found.1008  
 Splitpoint found.1104  
 Splitpoint found.1440  
 Splitpoint found.1680  
 Splitpoint found.1800  
 i: 1800, set_name: train, row_count: 360, days: 15.0
 split point not ok at nday 2159.
 month 1, day 29. Trying at next day. 
Total new splitpoint adjustment for train: 24
 i: 1800, set_name: train, row_count: 384, days: 16.0
 split point not ok at nday 2183.
 month 1, day 30. Trying at ne

([1m4320×21 DataFrame[0m
[1m  Row [0m│[1m electkwh [0m[1m PV_generation [0m[1m chargekwh [0m[1m h_countdown [0m[1m soc_ev   [0m[1m month [0m[1m day   [0m[1m hour  [0m[1m nday  [0m[1m d_res   [0m[1m hour_cos   [0m[1m hour_sin     [0m[1m month_cos [0m[1m month_sin [0m[1m spring [0m[1m summer [0m[1m autumn [0m[1m winter [0m[1m season [0m[1m p_buy   [0m[1m p_sell  [0m
[1m      [0m│[90m Float64  [0m[90m Float64       [0m[90m Float64?  [0m[90m Float64     [0m[90m Float64? [0m[90m Int64 [0m[90m Int64 [0m[90m Int64 [0m[90m Int64 [0m[90m Float64 [0m[90m Float64    [0m[90m Float64      [0m[90m Float64   [0m[90m Float64   [0m[90m Bool   [0m[90m Bool   [0m[90m Bool   [0m[90m Bool   [0m[90m Int64  [0m[90m Float64 [0m[90m Float64 [0m
──────┼─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────

In [435]:
for (set, df) in [("train" , all_training), ("eval", all_evaluation), ("test", all_testing)]
    print(set, "-data length: ", nrow(df), "\n")
end

train-data length: 4320
eval-data length: 1440
test-data length: 3000


In [436]:
#LU
#Input_data_summer = filter(:summer => !=(0), Input_df);
#Input_data_winter = filter(:winter => !=(0), Input_df);

In [437]:
#LU summer_training, summer_evaluation, summer_testing = train_eval_test_split(Input_data_summer);
#LU winter_training, winter_evaluation, winter_testing = train_eval_test_split(Input_data_winter);
#LU all_training, all_evaluation, all_testing = train_eval_test_split(Input_df);
#LU both_training, both_evaluation, both_testing = train_eval_test_split(vcat(Input_data_winter, Input_data_summer));

In [438]:
#LU
#=CSV.write("data/$(ID)_both_train_fix.csv", both_training);
CSV.write("data/$(ID)_both_test_fix.csv", both_testing);
CSV.write("data/$(ID)_both_eval_fix.csv", both_evaluation);=#

CSV.write("data/$(ID)_all_train_fix.csv", all_training);
CSV.write("data/$(ID)_all_test_fix.csv", all_testing);
CSV.write("data/$(ID)_all_eval_fix.csv", all_evaluation);

#LU
#=CSV.write("data/$(ID)_summer_train_fix.csv", summer_training);
CSV.write("data/$(ID)_summer_test_fix.csv", summer_testing);
CSV.write("data/$(ID)_summer_eval_fix.csv", summer_evaluation);
CSV.write("data/$(ID)_winter_train_fix.csv", winter_training);
CSV.write("data/$(ID)_winter_test_fix.csv", winter_testing);
CSV.write("data/$(ID)_winter_eval_fix.csv", winter_evaluation);
=#

"data/Charger99_all_eval_fix.csv"

**Adding my own analysis**

In [439]:


sell_discount = 0.3

p_buy = 0.3

p_sell = p_buy * sell_discount

grid = all_evaluation[!, "electkwh"] + coalesce.(all_evaluation[!, "chargekwh"], 0) - all_evaluation[!, "PV_generation"]

# Multiply positive values with p_buy and negative values with p_sell
grid_cost = [x > 0 ? x * p_buy : x * p_sell for x in grid]

# Calculate the sum of all these multiplications and store it in cost
cost = sum(grid_cost)

print("Grid purchase cost of $ID in the data: $cost. This is the sonnen benchmark.")



Grid purchase cost of Charger99 in the data: -179.92617851282057. This is the sonnen benchmark.

In [440]:
all_evaluation

Unnamed: 0_level_0,electkwh,PV_generation,chargekwh,h_countdown,soc_ev,month,day,hour,nday,d_res,hour_cos,hour_sin,month_cos,month_sin,spring,summer,autumn,winter,season,p_buy,p_sell
Unnamed: 0_level_1,Float64,Float64,Float64?,Float64,Float64?,Int64,Int64,Int64,Int64,Float64,Float64,Float64,Float64,Float64,Bool,Bool,Bool,Bool,Int64,Float64,Float64
1,0.259,0.0,missing,-1.0,1.0,11,12,0,265,0.259,1.0,0.0,0.866025,-0.5,0,0,1,0,3,0.3,0.1
2,1.859,0.0,missing,-1.0,1.0,11,12,1,266,1.859,0.962917,0.269797,0.866025,-0.5,0,0,1,0,3,0.3,0.1
3,1.933,0.0,missing,-1.0,1.0,11,12,2,267,1.933,0.854419,0.519584,0.866025,-0.5,0,0,1,0,3,0.3,0.1
4,0.235,0.0,missing,-1.0,1.0,11,12,3,268,0.235,0.682553,0.730836,0.866025,-0.5,0,0,1,0,3,0.3,0.1
5,0.709,0.0,missing,-1.0,1.0,11,12,4,269,0.709,0.460065,0.887885,0.866025,-0.5,0,0,1,0,3,0.3,0.1
6,2.545,0.0,missing,-1.0,1.0,11,12,5,270,2.545,0.203456,0.979084,0.866025,-0.5,0,0,1,0,3,0.3,0.1
7,2.421,0.024,missing,-1.0,1.0,11,12,6,271,2.397,-0.0682424,0.997669,0.866025,-0.5,0,0,1,0,3,0.3,0.1
8,1.964,1.467,missing,-1.0,1.0,11,12,7,272,0.497,-0.33488,0.942261,0.866025,-0.5,0,0,1,0,3,0.3,0.1
9,0.26,3.756,missing,-1.0,1.0,11,12,8,273,-3.496,-0.57668,0.81697,0.866025,-0.5,0,0,1,0,3,0.3,0.1
10,1.278,6.642,missing,-1.0,1.0,11,12,9,274,-5.364,-0.775711,0.631088,0.866025,-0.5,0,0,1,0,3,0.3,0.1
