In [384]:
using CSV, DataFrames, Dates, TimeZones
ENV["COLUMNS"] = 1000

1000

In [385]:
# Change ID here for each data set

ID = "Charger09"

"Charger09"

In [386]:
# Read the CSV file from the specified path
df = CSV.read("data/charger_battery_data_$ID.csv", DataFrame)

# Filter the data
df = df[:, [:timestamp, :e_consumption, :e_production, :e_charger, :h_countdown, :soc_ev]]

df.e_consumption = df.e_consumption / 1000
df.e_production = df.e_production / 1000
df.e_charger = df.e_charger / 1000

# Parse the timestamps with timezone offset
timestamps = ZonedDateTime.(df.timestamp, DateFormat("yyyy-mm-dd HH:MM:SSzzzz"))

# Shift the timestamps one hour ahead
timestamps = timestamps .- Hour(1)

# Replace the timestamp column in the DataFrame with the shifted timestamps
df.timestamp = timestamps

35040-element Vector{ZonedDateTime}:
 2020-11-01T00:00:00+01:00
 2020-11-01T00:15:00+01:00
 2020-11-01T00:30:00+01:00
 2020-11-01T00:45:00+01:00
 2020-11-01T01:00:00+01:00
 2020-11-01T01:15:00+01:00
 2020-11-01T01:30:00+01:00
 2020-11-01T01:45:00+01:00
 2020-11-01T02:00:00+01:00
 2020-11-01T02:15:00+01:00
 ⋮
 2021-10-31T21:45:00+01:00
 2021-10-31T22:00:00+01:00
 2021-10-31T22:15:00+01:00
 2021-10-31T22:30:00+01:00
 2021-10-31T22:45:00+01:00
 2021-10-31T23:00:00+01:00
 2021-10-31T23:15:00+01:00
 2021-10-31T23:30:00+01:00
 2021-10-31T23:45:00+01:00

In [387]:
for col_name in names(df)
    missing_count = sum(ismissing.(df[!, col_name]))

    display(("Column: ", col_name, ", Type: ", eltype(df[!, col_name]), ", Missing Values: ", missing_count))
end

("Column: ", "timestamp", ", Type: ", ZonedDateTime, ", Missing Values: ", 0)

("Column: ", "e_consumption", ", Type: ", Float64, ", Missing Values: ", 0)

("Column: ", "e_production", ", Type: ", Float64, ", Missing Values: ", 0)

("Column: ", "e_charger", ", Type: ", Union{Missing, Float64}, ", Missing Values: ", 27674)

("Column: ", "h_countdown", ", Type: ", Float64, ", Missing Values: ", 0)

("Column: ", "soc_ev", ", Type: ", Float64, ", Missing Values: ", 0)

In [388]:
describe(df)

Unnamed: 0_level_0,variable,mean,min,median,max,nmissing,eltype
Unnamed: 0_level_1,Symbol,Union…,Any,Union…,Any,Int64,Type
1,timestamp,,2020-11-01T00:00:00+01:00,,2021-10-31T23:45:00+01:00,0,ZonedDateTime
2,e_consumption,0.182519,0.0,0.083,2.188,0,Float64
3,e_production,0.27991,0.0,0.0,2.022,0,Float64
4,e_charger,0.436712,0.0,0.0,1.73902,27674,"Union{Missing, Float64}"
5,h_countdown,0.45772,-1.0,-1.0,27.0,0,Float64
6,soc_ev,0.97766,0.0,1.0,1.0,0,Float64


Resamplign from 15min intervalls to 1h intervalls. In the future, I'd like to try to work with 15min intervals.

In [389]:
function resample(df, time_column, interval)
    # Round the timestamps to the nearest hour
    df[!, time_column] = Dates.floor.(df[!, time_column], interval)
    
    # Define the columns to be summed
    sum_columns = ["e_consumption", "e_production", "e_charger"]
    
    # Group by the rounded timestamps and sum the other columns
    new_df = combine(groupby(df, time_column), sum_columns .=> (x -> sum(coalesce.(x, 0))) .=> sum_columns)
    
    # Handle "h_countdown" and "soc_ev" separately
    min_values = combine(groupby(df, time_column), "h_countdown" => maximum => "h_countdown")
    new_df = leftjoin(new_df, min_values, on=time_column)
    
    min_values = combine(groupby(df, time_column), "soc_ev" => minimum => "soc_ev")
    new_df = leftjoin(new_df, min_values, on=time_column)

    #Increase every value of h_countdown to the next higher integer
    #new_df.h_countdown = ceil.(new_df.h_countdown) .-1

    # Change first countdown value: +1 
    for i in 2:(nrow(new_df))
        if new_df[i, :h_countdown] > -1 
            new_df[i, :h_countdown] = floor.(new_df[i, :h_countdown])
            if new_df[i, :h_countdown] == new_df[(i-1), :h_countdown]
                new_df[(i-1), :h_countdown] += 1
            elseif new_df[i, :h_countdown] == 0 && new_df[i-1, :h_countdown] == -1
                new_df[(i-1), :h_countdown] = 1
                new_df[(i-1), :soc_ev] = new_df[(i), :soc_ev]
            end
        end
        if new_df[i, :h_countdown] == -1 && new_df[i, :soc_ev] < 1
            new_df[i, :soc_ev] = 1
        end
    end

    return new_df
end


resample (generic function with 1 method)

In [390]:
# Resample the data to 1-hour intervals
df_resampled = resample(df, :timestamp, Dates.Hour(1))

Unnamed: 0_level_0,timestamp,e_consumption,e_production,e_charger,h_countdown,soc_ev
Unnamed: 0_level_1,ZonedDa…,Float64,Float64,Float64,Float64?,Float64?
1,2020-11-01T00:00:00+01:00,0.106,0.0,0.0,-1.0,1.0
2,2020-11-01T01:00:00+01:00,0.125,0.0,0.0,-1.0,1.0
3,2020-11-01T02:00:00+01:00,0.126,0.0,0.0,-1.0,1.0
4,2020-11-01T03:00:00+01:00,0.129,0.0,0.0,-1.0,1.0
5,2020-11-01T04:00:00+01:00,0.129,0.0,0.0,-1.0,1.0
6,2020-11-01T05:00:00+01:00,0.13,0.0,0.0,-1.0,1.0
7,2020-11-01T06:00:00+01:00,0.145,0.031,0.0,-1.0,1.0
8,2020-11-01T07:00:00+01:00,0.579,0.226,0.0,-1.0,1.0
9,2020-11-01T08:00:00+01:00,1.497,0.545,0.0,-1.0,1.0
10,2020-11-01T09:00:00+01:00,1.432,0.685,0.0,-1.0,1.0


In [391]:
# Change the type of the chargekwh column to Union{Missing, Float64}
df_resampled.e_charge = convert(Vector{Union{Missing, Float64}}, df_resampled.e_charger)

# Create a new DataFrame with the desired structure
new_df = DataFrame(
    electkwh = df_resampled.e_consumption,
    PV_generation = df_resampled.e_production,
    chargekwh = df_resampled.e_charger,
    h_countdown = df_resampled.h_countdown,
    soc_ev = df_resampled.soc_ev,
    month = month.(DateTime.(df_resampled.timestamp)),
    day = day.(DateTime.(df_resampled.timestamp)),
    hour = hour.(DateTime.(df_resampled.timestamp))
)

# Change the type of the chargekwh column to Union{Missing, Float64}
new_df.chargekwh = convert(Vector{Union{Missing, Float64}}, new_df.chargekwh)

# In every row where h_countdown is -1, set chargekwh to 'missing'
new_df[new_df.h_countdown .== -1, :chargekwh] .= missing

6606-element view(::Vector{Union{Missing, Float64}}, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10  …  8751, 8752, 8753, 8754, 8755, 8756, 8757, 8758, 8759, 8760]) with eltype Union{Missing, Float64}:
 missing
 missing
 missing
 missing
 missing
 missing
 missing
 missing
 missing
 missing
 ⋮
 missing
 missing
 missing
 missing
 missing
 missing
 missing
 missing
 missing

In [392]:
CSV.write("data/sonnen$(ID)_datafile_all.csv", new_df)

"data/sonnenCharger09_datafile_all.csv"

In [393]:
#Input_df = CSV.read("data/sonnen$(ID)_datafile_all.csv", DataFrame); # Data loading in original code started here.
Input_df = new_df

Unnamed: 0_level_0,electkwh,PV_generation,chargekwh,h_countdown,soc_ev,month,day,hour
Unnamed: 0_level_1,Float64,Float64,Float64?,Float64?,Float64?,Int64,Int64,Int64
1,0.106,0.0,missing,-1.0,1.0,11,1,0
2,0.125,0.0,missing,-1.0,1.0,11,1,1
3,0.126,0.0,missing,-1.0,1.0,11,1,2
4,0.129,0.0,missing,-1.0,1.0,11,1,3
5,0.129,0.0,missing,-1.0,1.0,11,1,4
6,0.13,0.0,missing,-1.0,1.0,11,1,5
7,0.145,0.031,missing,-1.0,1.0,11,1,6
8,0.579,0.226,missing,-1.0,1.0,11,1,7
9,1.497,0.545,missing,-1.0,1.0,11,1,8
10,1.432,0.685,missing,-1.0,1.0,11,1,9


In [394]:
#describe(Input_df)

In [395]:
# add new column with number of days
Input_df[!, :nday] = 1:nrow(Input_df)

1:8760

In [396]:
# calculate residual demand
Input_df[!, :d_res] = Input_df[!,:electkwh] + coalesce.(Input_df[!,:chargekwh], 0) - Input_df[!,:PV_generation]

8760-element Vector{Float64}:
 0.106
 0.125
 0.126
 0.129
 0.129
 0.13
 0.11400000000000002
 0.3529999999999999
 0.9519999999999998
 0.7469999999999999
 ⋮
 0.09599999999999997
 0.986
 0.46699999999999997
 0.468
 0.326
 0.083
 0.084
 0.08600000000000001
 0.086

## Add periodical time representation using cos/sin

In [397]:
# add columns with cos and sin values for periodical time values day + month
Input_df[!, :hour_cos] = cos.(Input_df[!,:hour] ./ maximum(Input_df[!,:hour]) .* 2*pi);
Input_df[!, :hour_sin] = sin.(Input_df[!,:hour] ./ maximum(Input_df[!,:hour]) .* 2*pi);

Input_df[!, :month_cos] = cos.(Input_df[!,:month] ./ maximum(Input_df[!,:month]) .* 2*pi);
Input_df[!, :month_sin] = sin.(Input_df[!,:month] ./ maximum(Input_df[!,:month]) .* 2*pi);

#Input_df[!, :nday_cos] = cos.(Input_df[!,:nday] ./ maximum(Input_df[!,:nday]) .* 2*pi);
#Input_df[!, :nday_sin] = sin.(Input_df[!,:nday] ./ maximum(Input_df[!,:nday]) .* 2*pi);

## Add seasons

In [398]:
Input_df[!, :spring] = (Input_df[!,:month] .>= 3) .* (Input_df[!,:month] .<= 5);
Input_df[!, :summer] = (Input_df[!,:month] .>= 6) .* (Input_df[!,:month] .<= 8);
Input_df[!, :autumn] = (Input_df[!,:month] .>= 9) .* (Input_df[!,:month] .<= 11);
Input_df[!, :winter] = convert.(Bool, (Input_df[!,:month] .>= 12) .+ (Input_df[!,:month] .<= 2));

Input_df[!, :season] = ifelse.(Input_df[!,:spring] .== true, 1,
                        ifelse.(Input_df[!,:summer] .== true, 2,
                        ifelse.(Input_df[!,:autumn] .== true, 3, 
                        4)));

In [399]:
describe(Input_df)

Unnamed: 0_level_0,variable,mean,min,median,max,nmissing,eltype
Unnamed: 0_level_1,Symbol,Float64,Real,Float64,Real,Int64,Type
1,electkwh,0.730077,0.001,0.385,7.716,0,Float64
2,PV_generation,1.11964,0.0,0.007,7.673,0,Float64
3,chargekwh,1.49342,0.0,0.0,6.92327,6606,"Union{Missing, Float64}"
4,h_countdown,0.583333,-1.0,-1.0,27.0,0,"Union{Missing, Float64}"
5,soc_ev,0.963877,0.0,1.0,1.0,0,"Union{Missing, Float64}"
6,month,6.52683,1.0,7.0,12.0,0,Int64
7,day,15.7209,1.0,16.0,31.0,0,Int64
8,hour,11.5,0.0,11.5,23.0,0,Int64
9,nday,4380.5,1.0,4380.5,8760.0,0,Int64
10,d_res,-0.0223464,-7.092,0.143,10.517,0,Float64


## Add dynamic prices based on Ye et al. 2020

In [400]:
#LU
#=
function set_dynamic_prices(Input_df)
    map(eachrow(Input_df)) do r
        if r.month >= 5 && r.month <= 10
            if (r.hour >= 6 && r.hour <= 9) || (r.hour >= 16 && r.hour <= 17)
                return 0.3f0
            elseif (r.hour >= 10 && r.hour <= 15)
                return 0.6f0
            else
                return 0.15f0
            end
        elseif r.month >= 11 || r.month <= 4
            if (r.hour >= 6 && r.hour <= 9) || (r.hour >= 16 && r.hour <= 17)
                return 0.6f0
            elseif r.hour >= 10 && r.hour <= 15
                return 0.3f0
            else
                return 0.15f0
            end
        end
    end
end      =#  

In [401]:
#LU
#=Input_df[!, "p_buy"] = set_dynamic_prices(Input_df);
Input_df[!, "p_sell"] = 0.5 .* Input_df[!, "p_buy"];
=#

In [402]:
#LU describe(Input_df)

## Extract training, testing + evalution data set for summer, winter, both, all

In [403]:
#LU
#=# filter summer
Input_data_summer = filter(:summer => !=(0), Input_df)
describe(Input_data_summer), size(Input_data_summer)
=#

In [404]:
#=function train_eval_test_split(Input_df)
    train = filter(row -> row.day <= 15, Input_df)
    eval = filter(row -> row.day > 15 && row.day <= 20, Input_df)
    test = filter(row -> row.day > 20, Input_df)
    return train, eval, test
end=#

In [405]:
function split_all_data_advanced_v2(Input_df)
    # Initialize the data sets
    train = DataFrame()
    eval = DataFrame()
    test = DataFrame()

    # Define the pattern of row counts for each data set
    pattern = [("test", 24*10), ("eval", 5*24), ("train", 15*24)]

    splitpoint_adjustments = Dict("train" => 0, "eval" => 0, "test" => 0)

    # Initialize the pattern index
    pattern_index = 1

    i = 0

    # Iterate over the rows of the DataFrame
    while i < size(Input_df, 1)
        print("$i  ")
        # Get the current data set name and row count from the pattern
        set_name, row_count = pattern[pattern_index]

        row_count -= min(splitpoint_adjustments[set_name], 4*24)

        splitpoint_adjustments[set_name] -= min(splitpoint_adjustments[set_name], 4*24)

        maxi = 0
        maxi += 1
        if maxi > 10000
            println("maxi break")
            break
        end

        while (Input_df[min(i+row_count, nrow(Input_df)), :h_countdown] > -1) #&& splitpoint_adjustments[set_name] <= 3*24)
            if i+row_count-1 >  10000
                println("breaking at $(pattern[pattern_index])")
                break
            end



            print("\n i: $i, set_name: $set_name, row_count: $row_count, days: $(row_count/24)")
            print("\n split point not ok at nday ", Input_df[min(i+row_count-1, size(Input_df, 1)), :nday] ,
                    ".\n month $(Input_df[min(i+row_count-1, size(Input_df, 1)), :month]), day $(Input_df[min(i+row_count-1, size(Input_df, 1)), :day]). Trying at next day. \n")
            

            if (i+row_count+24-1) > (nrow(Input_df)-1)
                println("i+rowcount = $i + $row_count +24 - 1 = $(i+row_count+24-1), which is out of bounds. Break!")
                break
            else
                row_count += 24
                splitpoint_adjustments[set_name] += 24
                print("Total new splitpoint adjustment for $set_name: $(splitpoint_adjustments[set_name])")
            end
            
        end

        print("\n Splitpoint found.")        

        # Add the rows to the appropriate data set
        rows = Input_df[i+1:min(i+row_count, size(Input_df, 1)), :]

        if set_name == "train"
            train, row_count = build_sets(train, rows, 4320, row_count)
        elseif set_name == "eval"
            eval, row_count = build_sets(eval, rows, 1440, row_count)
        else # set_name == "test"
            test, row_count = build_sets(test, rows, 3000, row_count)
        end

        # Move to the next rows
        i = i + row_count

        # Move to the next pattern index, or reset to 1 if the end of the pattern is reached
        pattern_index = pattern_index % length(pattern) + 1
    end

    print("\n Length train-data = ", nrow(train), "\n Length eval-data = ", nrow(eval), "\n Length test-data = ", nrow(test))
    
    return train, eval, test

end

function build_sets(set, rows, limit, row_count)
    set = vcat(set, rows)
    if nrow(set) > limit
        println("limit exceeded")
        row_count -= nrow(set) - limit
        set = first(set, limit)
        
    end
    return set, row_count
end
        


build_sets (generic function with 1 method)

In [406]:
#LU
#=summer_training, summer_evaluation, summer_testing = train_eval_test_split(Input_data_summer)

CSV.write("data/$(ID)_summer_train_TOU.csv", summer_training);
CSV.write("data/$(ID)_summer_eval_TOU.csv", summer_evaluation);
CSV.write("data/$(ID)_summer_test_TOU.csv", summer_testing);
=#

In [407]:
#LU
#=# filter winter
Input_data_winter = filter(:winter => !=(0), Input_df)
describe(Input_data_winter), size(Input_data_winter)
=#

In [408]:
#LU
#=winter_training, winter_evaluation, winter_testing = train_eval_test_split(Input_data_winter)

# write data files
CSV.write("data/$(ID)_winter_train_TOU.csv", winter_training);
CSV.write("data/$(ID)_winter_eval_TOU.csv", winter_evaluation);
CSV.write("data/$(ID)_winter_test_TOU.csv", winter_testing);
=#

In [409]:
#all_training, all_evaluation, all_testing = split_all_data_advanced(Input_df)

#LU 
#=
# write data files
CSV.write("data/$(ID)_all_train_TOU.csv", all_training);
CSV.write("data/$(ID)_all_eval_TOU.csv", all_evaluation);
CSV.write("data/$(ID)_all_test_TOU.csv", all_testing);
=#

In [410]:
#LU
#describe(vcat(Input_data_winter, Input_data_summer) ), size(vcat(Input_data_winter, Input_data_summer) )

In [411]:
#LU
#=# both seasons split
both_training, both_evaluation, both_testing = train_eval_test_split(vcat(Input_data_winter, Input_data_summer))

# write data files
CSV.write("data/$(ID)_both_train_TOU.csv", both_training);
CSV.write("data/$(ID)_both_test_TOU.csv", both_testing);
CSV.write("data/$(ID)_both_eval_TOU.csv", both_evaluation);
=#

## Dataset for fixed prices

In [412]:
Input_df[!, "p_buy"] .= 0.4; # 0.3
Input_df[!, "p_sell"] .= 0.08; # 0.1

In [413]:
all_training, all_evaluation, all_testing = split_all_data_advanced_v2(Input_df)

0  
 Splitpoint found.240  
 Splitpoint found.360  
 Splitpoint found.720  
 Splitpoint found.960  
 Splitpoint found.1080  
 Splitpoint found.1440  
 Splitpoint found.1680  
 Splitpoint found.1800  
 Splitpoint found.2160  
 Splitpoint found.2400  
 Splitpoint found.2520  
 Splitpoint found.2880  
 Splitpoint found.3120  
 Splitpoint found.3240  
 Splitpoint found.3600  
 Splitpoint found.3840  
 Splitpoint found.3960  
 i: 3960, set_name: train, row_count: 360, days: 15.0
 split point not ok at nday 4319.
 month 4, day 29. Trying at next day. 
Total new splitpoint adjustment for train: 24
 Splitpoint found.4344  
 i: 4344, set_name: test, row_count: 240, days: 10.0
 split point not ok at nday 4583.
 month 5, day 10. Trying at next day. 
Total new splitpoint adjustment for test: 24
 i: 4344, set_name: test, row_count: 264, days: 11.0
 split point not ok at nday 4607.
 month 5, day 11. Trying at next day. 
Total new splitpoint adjustment for test: 48
 Splitpoint found.4632  
 i: 4632, 

([1m4320×21 DataFrame[0m
[1m  Row [0m│[1m electkwh [0m[1m PV_generation [0m[1m chargekwh [0m[1m h_countdown [0m[1m soc_ev   [0m[1m month [0m[1m day   [0m[1m hour  [0m[1m nday  [0m[1m d_res   [0m[1m hour_cos   [0m[1m hour_sin     [0m[1m month_cos [0m[1m month_sin [0m[1m spring [0m[1m summer [0m[1m autumn [0m[1m winter [0m[1m season [0m[1m p_buy   [0m[1m p_sell  [0m
[1m      [0m│[90m Float64  [0m[90m Float64       [0m[90m Float64?  [0m[90m Float64?    [0m[90m Float64? [0m[90m Int64 [0m[90m Int64 [0m[90m Int64 [0m[90m Int64 [0m[90m Float64 [0m[90m Float64    [0m[90m Float64      [0m[90m Float64   [0m[90m Float64   [0m[90m Bool   [0m[90m Bool   [0m[90m Bool   [0m[90m Bool   [0m[90m Int64  [0m[90m Float64 [0m[90m Float64 [0m
──────┼─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────

In [414]:
for (set, df) in [("train" , all_training), ("eval", all_evaluation), ("test", all_testing)]
    print(set, "-data length: ", nrow(df), "\n")
end

train-data length: 4320
eval-data length: 1440
test-data length: 3000


In [415]:
#LU
#Input_data_summer = filter(:summer => !=(0), Input_df);
#Input_data_winter = filter(:winter => !=(0), Input_df);

In [416]:
#LU summer_training, summer_evaluation, summer_testing = train_eval_test_split(Input_data_summer);
#LU winter_training, winter_evaluation, winter_testing = train_eval_test_split(Input_data_winter);
#LU all_training, all_evaluation, all_testing = train_eval_test_split(Input_df);
#LU both_training, both_evaluation, both_testing = train_eval_test_split(vcat(Input_data_winter, Input_data_summer));

In [417]:
function check_and_update_h_countdown!(df::DataFrame)
    for i in 1:(nrow(df) - 1)
        if df.h_countdown[i] == 0 && df.h_countdown[i + 1] != -1
            df.h_countdown[i + 1] = -1
            df.soc_ev[i + 1] = 1.0
            print("inserte -1 at: ", i+1)
        end
    end
end

check_and_update_h_countdown!(all_training)

check_and_update_h_countdown!(all_testing)

check_and_update_h_countdown!(all_evaluation)

In [418]:
# An algorithm that determines the soc_ev for each charging transaciton by interpolation between start and end of the transaction.

function interpolate_soc_ev!(df::DataFrame)
    start_idx = nothing
    end_idx = nothing

    for i in 1:nrow(df)
        if df.h_countdown[i] > 0 && (i == 1 || df.h_countdown[i-1] == -1)
            start_idx = i
        end

        if df.h_countdown[i] == 0
            end_idx = i
            if start_idx !== nothing
                start_val = df.soc_ev[start_idx]
                end_val = 1.0
                for j in start_idx:end_idx
                    df.soc_ev[j] = start_val + (end_val - start_val) * (j - start_idx) / (end_idx - start_idx)
                end
                start_idx = nothing
            end
        end
    end
    
end

interpolate_soc_ev!(all_training)


In [419]:
any(ismissing, eachcol(all_training))

false

In [420]:
#LU
#=CSV.write("data/$(ID)_both_train_fix.csv", both_training);
CSV.write("data/$(ID)_both_test_fix.csv", both_testing);
CSV.write("data/$(ID)_both_eval_fix.csv", both_evaluation);=#

CSV.write("data/$(ID)_all_train_fix.csv", all_training);
CSV.write("data/$(ID)_all_test_fix.csv", all_testing);
CSV.write("data/$(ID)_all_eval_fix.csv", all_evaluation);

#LU
#=CSV.write("data/$(ID)_summer_train_fix.csv", summer_training);
CSV.write("data/$(ID)_summer_test_fix.csv", summer_testing);
CSV.write("data/$(ID)_summer_eval_fix.csv", summer_evaluation);
CSV.write("data/$(ID)_winter_train_fix.csv", winter_training);
CSV.write("data/$(ID)_winter_test_fix.csv", winter_testing);
CSV.write("data/$(ID)_winter_eval_fix.csv", winter_evaluation);
=#

"data/Charger09_all_eval_fix.csv"

**Adding my own analysis**

In [423]:
# Find out number of transactions per charger and data set:

using CSV, DataFrames

# Create a list of IDs
ids = ["Charger01", "Charger02", "Charger03", "Charger04", "Charger05",
       "Charger06", "Charger07", "Charger08", "Charger09", "Charger98"]

for ID in ids
    # Read data from CSV files
    all_training = CSV.read("data/$(ID)_all_train_fix.csv", DataFrame)
    all_testing = CSV.read("data/$(ID)_all_test_fix.csv", DataFrame)
    all_evaluation = CSV.read("data/$(ID)_all_eval_fix.csv", DataFrame)

    # Count zeros in the "h_coundown" column
    zeros_training = count(x -> x == 0, all_training.h_countdown)
    zeros_testing = count(x -> x == 0, all_testing.h_countdown)
    zeros_evaluation = count(x -> x == 0, all_evaluation.h_countdown)

    println("ID: $ID")
    println("Charging Transactions in training data: $zeros_training")
    println("Charging Transactions in testing data: $zeros_testing")
    println("Charging Transactions in evaluation data: $zeros_evaluation")
    println()
end


ID: Charger01
Charging Transactions in training data: 33
Charging Transactions in testing data: 26
Charging Transactions in evaluation data: 14

ID: Charger02
Charging Transactions in training data: 17
Charging Transactions in testing data: 16
Charging Transactions in evaluation data: 7

ID: Charger03
Charging Transactions in training data: 19
Charging Transactions in testing data: 15
Charging Transactions in evaluation data: 5

ID: Charger04
Charging Transactions in training data: 35
Charging Transactions in testing data: 19
Charging Transactions in evaluation data: 9

ID: Charger05
Charging Transactions in training data: 35
Charging Transactions in testing data: 21
Charging Transactions in evaluation data: 16

ID: Charger06
Charging Transactions in training data: 38
Charging Transactions in testing data: 22
Charging Transactions in evaluation data: 7

ID: Charger07
Charging Transactions in training data: 14
Charging Transactions in testing data: 9
Charging Transactions in evaluation 

In [430]:
# Add interpolation of EV socs and correction of countdown for the synthetic data set

runs = ["train", "eval", "test"]

function check_and_update_h_countdown!(df::DataFrame)
   for i in 1:(nrow(df) - 1)
       if df.h_countdown[i] == 0 && df.h_countdown[i + 1] != -1
           df.h_countdown[i + 1] = -1
           df.soc_ev[i + 1] = 1.0
           print("inserted -1 at: ", i+1)
       end
   end
end

function interpolate_soc_ev!(df::DataFrame)
    start_idx = nothing
    end_idx = nothing

    for i in 1:nrow(df)
        if df.h_countdown[i] > 0 && (i == 1 || df.h_countdown[i-1] == -1)
            start_idx = i
        end

        if df.h_countdown[i] == 0
            end_idx = i
            if start_idx !== nothing
                start_val = df.soc_ev[start_idx]
                end_val = 1.0
                for j in start_idx:end_idx
                    df.soc_ev[j] = start_val + (end_val - start_val) * (j - start_idx) / (end_idx - start_idx)
                end
                start_idx = nothing
            end
        end
    end
    
end



for run in runs
   println("Run: ", run)
   all_data98 = CSV.read("data/Charger98_all_$(run)_fix.csv", DataFrame)
   check_and_update_h_countdown!(all_data98)

   if run == "train"
    interpolate_soc_ev!(all_data98)
   end

   CSV.write("data/Charger98_all_$(run)_fix.csv", all_data98);
   
end


Run: train
Run: eval
Run: test
inserted -1 at: 1741