In [None]:
using CSV, DataFrames, Statistics, Dates, Gadfly, Random;
include("utils/precipitation.jl");

In [None]:
function generate_variation(val)
    var = rand() * 0.4;
    return val * (0.8 + var);
end

In [None]:
function generate_new_entry(df, entry)
    nwDATE = entry.DATE;
    nwSURVERSE = entry.SURVERSE;
    nwFS_dist = entry.FS_dist;
    nwSS_dist = entry.SS_dist;
    nwFS_sum = generate_variation(entry.FS_sum);
    nwFS_max = generate_variation(entry.FS_max);
    nwFS_max3 = generate_variation(entry.FS_max3);
    nwSS_sum = generate_variation(entry.SS_sum);
    nwSS_max = generate_variation(entry.SS_max);
    nwSS_max3 = generate_variation(entry.SS_max3);
    
    push!(df, [nwDATE, nwSURVERSE, nwFS_dist, nwSS_dist, 
               nwFS_sum, nwFS_max, nwFS_max3, 
               nwSS_sum, nwSS_max, nwSS_max3])
end

## Ouvrage 3260

In [None]:
data_set = CSV.read("data/parsed/ouvrage_3260.csv");
size(data_set)

In [None]:
first(shuffleDf(data_set), 10)

In [None]:
describe(data_set)

In [None]:
filter(row -> row.SURVERSE == 1, data_set)

In [None]:
plot(data_set, x=:FS_sum, y=:SS_sum, color=:SURVERSE)

In [None]:
n_to_random_sample = 10000 - size(data_set, 1);
new_entries = similar(data_set, nrow(data_set))

for (i, row) in enumerate(eachrow(data_set))
    new_entries[i, :] = row[:]
end

for i=1:n_to_random_sample
    should_surverse = rand() > 0.45 ? 1 : 0;
    cols = filter(row -> row.SURVERSE == should_surverse, data_set);
    idx_row = convert(Int64, trunc(rand() * size(cols, 1))) + 1;
    
    entry = cols[idx_row, :];
    generate_new_entry(new_entries, entry);
end

In [None]:
describe(new_entries)

In [None]:
filter(row -> row.SURVERSE == 1, new_entries)

In [None]:
plot(new_entries, x=:FS_sum, y=:SS_sum, color=:SURVERSE)

In [None]:
CSV.write("data/parsed/oversampled/ouvrage_3260.csv", new_entries)

## Ouvrage 3350

In [None]:
data_set = CSV.read("data/parsed/ouvrage_3350.csv");
size(data_set)

In [None]:
describe(data_set)

In [None]:
n_to_random_sample = 10000 - size(data_set, 1);
new_entries = similar(data_set, nrow(data_set))

for (i, row) in enumerate(eachrow(data_set))
    new_entries[i, :] = row[:]
end

for i=1:n_to_random_sample
    should_surverse = rand() > 0.47 ? 1 : 0;
    cols = filter(row -> row.SURVERSE == should_surverse, data_set);
    idx_row = convert(Int64, trunc(rand() * size(cols, 1))) + 1;
    
    entry = cols[idx_row, :];
    generate_new_entry(new_entries, entry);
end

In [None]:
describe(new_entries)

In [None]:
CSV.write("data/parsed/oversampled/ouvrage_3350.csv", new_entries)

## Ouvrage 4240

In [None]:
data_set = CSV.read("data/parsed/ouvrage_4240.csv");
size(data_set)

In [None]:
describe(data_set)

In [None]:
n_to_random_sample = 10000 - size(data_set, 1);
new_entries = similar(data_set, nrow(data_set))

for (i, row) in enumerate(eachrow(data_set))
    new_entries[i, :] = row[:]
end

for i=1:n_to_random_sample
    should_surverse = rand() > 0.45 ? 1 : 0;
    cols = filter(row -> row.SURVERSE == should_surverse, data_set);
    idx_row = convert(Int64, trunc(rand() * size(cols, 1))) + 1;
    
    entry = cols[idx_row, :];
    generate_new_entry(new_entries, entry);
end

In [None]:
describe(new_entries)

In [None]:
CSV.write("data/parsed/oversampled/ouvrage_4240.csv", new_entries)

## Ouvrage 4350

In [None]:
data_set = CSV.read("data/parsed/ouvrage_4350.csv");
size(data_set)

In [None]:
describe(data_set)

In [None]:
n_to_random_sample = 10000 - size(data_set, 1);
new_entries = similar(data_set, nrow(data_set))

for (i, row) in enumerate(eachrow(data_set))
    new_entries[i, :] = row[:]
end

for i=1:n_to_random_sample
    should_surverse = rand() > 0.45 ? 1 : 0;
    cols = filter(row -> row.SURVERSE == should_surverse, data_set);
    idx_row = convert(Int64, trunc(rand() * size(cols, 1))) + 1;
    
    entry = cols[idx_row, :];
    generate_new_entry(new_entries, entry);
end

In [None]:
describe(new_entries)

In [None]:
CSV.write("data/parsed/oversampled/ouvrage_4350.csv", new_entries)

## Ouvrage 4380

In [None]:
data_set = CSV.read("data/parsed/ouvrage_4380.csv");
size(data_set)

In [None]:
describe(data_set)

In [None]:
n_to_random_sample = 10000 - size(data_set, 1);
new_entries = similar(data_set, nrow(data_set))

for (i, row) in enumerate(eachrow(data_set))
    new_entries[i, :] = row[:]
end

for i=1:n_to_random_sample
    should_surverse = rand() > 0.45 ? 1 : 0;
    cols = filter(row -> row.SURVERSE == should_surverse, data_set);
    idx_row = convert(Int64, trunc(rand() * size(cols, 1))) + 1;
    
    entry = cols[idx_row, :];
    generate_new_entry(new_entries, entry);
end

In [None]:
describe(new_entries)

In [None]:
CSV.write("data/parsed/oversampled/ouvrage_4380.csv", new_entries)