In [1]:
using CSV, DataFrames, Statistics, Dates, Gadfly, Random;
include("utils/precipitation.jl");

┌ Info: Loading DataFrames support into Gadfly.jl
└ @ Gadfly /home/chaime/.julia/packages/Gadfly/09PWZ/src/mapping.jl:228


On garde les ouvrages d'intérêt.

In [3]:
ouvrages = CSV.read("data/ouvrages-surverses.csv");
colnames = ["N_Env", "ID_SOMA", "ID_OUVRAGE", "NOM", "SOMA_SEC", "REGION", "TP_X", "TP_Y", "TP_Z", "TP_LAT", "TP_LNG", "EMI_X", "EMI_Y", "EMI_LNG", "EMI_LAT"];
names!(ouvrages, Symbol.(colnames));
select!(ouvrages, [:ID_OUVRAGE, :TP_LAT, :TP_LNG]);

│   caller = top-level scope at In[3]:3
└ @ Core In[3]:3


In [4]:
size(ouvrages)

(167, 3)

In [5]:
important_ouvrages = ["3260-01D", "3350-07D", "4240-01D", "4350-01D", "4380-01D"];
ouvrages = filter(row -> row.ID_OUVRAGE ∈ important_ouvrages, ouvrages);

In [6]:
first(ouvrages, 10)

Unnamed: 0_level_0,ID_OUVRAGE,TP_LAT,TP_LNG
Unnamed: 0_level_1,String,Float64,Float64
1,3260-01D,45.6507,-73.5803
2,3350-07D,45.5461,-73.6921
3,4240-01D,45.6497,-73.4877
4,4350-01D,45.4991,-73.555
5,4380-01D,45.4677,-73.5637


In [7]:
surverses = CSV.read("data/surverses.csv", missingstring="-99999");
surverses = filter(row -> month(row.DATE) > 4, surverses);
surverses = filter(row -> month(row.DATE) < 11, surverses);
surverses[!,:RAISON] = coalesce.(surverses[:,:RAISON],"Inconnue");

surverses = filter(row -> row.RAISON ∈ ["P","Inconnue","TS"], surverses);
select!(surverses, [:NO_OUVRAGE, :DATE, :SURVERSE]);
rename!(surverses, :NO_OUVRAGE => :ID_OUVRAGE);

In [8]:
surverses = filter(row -> row.ID_OUVRAGE ∈ important_ouvrages, surverses);
dropmissing!(surverses);

In [9]:
describe(surverses[!, :SURVERSE])

Summary Stats:
Length:         5129
Missing Count:  0
Mean:           0.085202
Minimum:        0.000000
1st Quartile:   0.000000
Median:         0.000000
3rd Quartile:   0.000000
Maximum:        1.000000
Type:           Int64


In [10]:
curr = filter(row -> row.ID_OUVRAGE == important_ouvrages[3], surverses);
describe(curr[!, :SURVERSE])

Summary Stats:
Length:         1100
Missing Count:  0
Mean:           0.062727
Minimum:        0.000000
1st Quartile:   0.000000
Median:         0.000000
3rd Quartile:   0.000000
Maximum:        1.000000
Type:           Int64


Le mean correspond au taux de surverses ici -> Beaucoup plus de non surverses que de surverses

### Beaucoup plus de 0 que de 1 -> Class imbalance problem
On le solve avec du over sampling de 1 et du under sampling de 0, plus tard

## Précipitations

In [11]:
precipitations = CSV.read("data/precipitations.csv",missingstring="-99999");
rename!(precipitations, Symbol("St-Hubert")=>:StHubert);

precipitations = filter(row -> month(row.date) > 4, precipitations);
precipitations = filter(row -> month(row.date) < 11, precipitations); 
names(precipitations)

7-element Array{Symbol,1}:
 :date      
 :heure     
 :McTavish  
 :Bellevue  
 :Assomption
 :Trudeau   
 :StHubert  

In [12]:
describe(precipitations[!, :StHubert])

Summary Stats:
Length:         30912
Missing Count:  5206
Mean:           1.223683
Minimum:        0.000000
1st Quartile:   0.000000
Median:         0.000000
3rd Quartile:   0.000000
Maximum:        307.000000
Type:           Union{Missing, Int64}


### Traitement des données abérantes

In [13]:
idx_outliers = precipitations[!, :McTavish] .> 2000;
idx_outliers[isequal.(idx_outliers, missing)] .= false;
idx_outliers = convert(Array{Bool, 1}, idx_outliers);

date_outlier = precipitations[idx_outliers, :date];
precipitations[idx_outliers, :]

Unnamed: 0_level_0,date,heure,McTavish,Bellevue,Assomption,Trudeau,StHubert
Unnamed: 0_level_1,Date,Int64,Int64⍰,Int64⍰,Int64⍰,Int64⍰,Int64⍰
1,2013-10-10,13,2082,0,0,0,missing


In [14]:
precipitations[idx_outliers, :McTavish] .= 0;
precipitations[idx_outliers, :StHubert] .= 0;
precipitations[idx_outliers, :]

Unnamed: 0_level_0,date,heure,McTavish,Bellevue,Assomption,Trudeau,StHubert
Unnamed: 0_level_1,Date,Int64,Int64⍰,Int64⍰,Int64⍰,Int64⍰,Int64⍰
1,2013-10-10,13,0,0,0,0,0


In [16]:
precipitations[precipitations.date .== Date(2017, 7, 20), :Bellevue] .= 0;
precipitations[precipitations.date .== Date(2017, 7, 20), :Trudeau] .= 0; # VOIR SI GARDER LUI

precipitations[precipitations.date .== Date(2013, 6, 24), :Assomption] .= 0;
precipitations[precipitations.date .== Date(2014, 8, 5), :Assomption] .= 0;
precipitations[precipitations.date .== Date(2015, 6, 10), :Assomption] .= 0;
precipitations[precipitations.date .== Date(2018, 7, 26), :Trudeau] .= 0;

### Traitement des données manquantes

In [17]:
precipitation_by_day = by(precipitations, :date,  
                            McTavish = :McTavish=>mean_wo_missing, 
                            Bellevue = :Bellevue=>mean_wo_missing, 
                            Assomption = :Assomption=>mean_wo_missing,
                            Trudeau = :Trudeau=>mean_wo_missing,
                            StHubert = :StHubert=>mean_wo_missing)

for i=1:size(precipitations,1)
    if isequal(precipitations[i, :McTavish], missing)
        precipitations[i,:McTavish] = filter(row-> row.date == precipitations[i,:date], precipitation_by_day)[!,:McTavish][1]
    end
    if isequal(precipitations[i, :Bellevue], missing)
        precipitations[i,:Bellevue] = filter(row-> row.date == precipitations[i,:date], precipitation_by_day)[!,:Bellevue][1]
    end
    if isequal(precipitations[i, :Assomption], missing)
        precipitations[i,:Assomption] = filter(row-> row.date == precipitations[i,:date], precipitation_by_day)[!,:Assomption][1]
    end
    if isequal(precipitations[i, :Trudeau], missing)
        precipitations[i,:Trudeau] = filter(row-> row.date == precipitations[i,:date], precipitation_by_day)[!,:Trudeau][1]
    end
    if isequal(precipitations[i, :StHubert], missing)
        precipitations[i,:StHubert] = filter(row-> row.date == precipitations[i,:date], precipitation_by_day)[!,:StHubert][1]
    end
end

In [18]:
describe(precipitations[!, :StHubert])

Summary Stats:
Length:         30912
Missing Count:  0
Mean:           1.018957
Minimum:        0.000000
1st Quartile:   0.000000
Median:         0.000000
3rd Quartile:   0.000000
Maximum:        307.000000
Type:           Union{Missing, Int64}


In [20]:
first(shuffleDf(precipitations), 10)

Unnamed: 0_level_0,date,heure,McTavish,Bellevue,Assomption,Trudeau,StHubert
Unnamed: 0_level_1,Date,Int64,Int64⍰,Int64⍰,Int64⍰,Int64⍰,Int64⍰
1,2017-08-24,13,0,0,0,0,0
2,2018-10-04,18,0,0,0,0,0
3,2015-05-21,20,0,0,0,0,0
4,2014-10-17,7,0,0,0,0,0
5,2015-06-20,13,0,0,0,0,0
6,2017-05-05,21,2,0,10,0,0
7,2015-08-01,2,0,0,0,0,0
8,2019-10-01,16,102,0,0,19,307
9,2014-07-31,16,0,0,0,0,0
10,2017-07-21,11,0,0,0,0,0


In [21]:
pcp_sum = by(precipitations, :date,  
            McTavish = :McTavish=>sum, 
            Bellevue = :Bellevue=>sum,
            Assomption = :Assomption=>sum, 
            Trudeau = :Trudeau=>sum, 
            StHubert = :StHubert=>sum);

In [22]:
first(shuffleDf(pcp_sum), 5)

Unnamed: 0_level_0,date,McTavish,Bellevue,Assomption,Trudeau,StHubert
Unnamed: 0_level_1,Date,Int64,Int64,Int64,Int64,Int64
1,2015-10-20,0,0,0,0,0
2,2019-06-21,0,0,0,0,2
3,2014-09-21,20,43,20,51,9
4,2015-09-20,0,0,0,2,0
5,2019-10-18,2,0,0,4,2


In [None]:
df_for_plot = melt(pcp_sum, :date)
set_default_plot_size(25cm, 13cm)

plot(df_for_plot, x=:date, y=:value, Geom.point, color=:variable)

On réduit le gros outlier 

In [24]:
pcp_max = by(precipitations, :date,  
            McTavish = :McTavish=>maximum,
            Bellevue = :Bellevue=>maximum, 
            Assomption = :Assomption=>maximum,
            Trudeau = :Trudeau=>maximum,
            StHubert = :StHubert=>maximum);

In [None]:
df_for_plot = melt(pcp_max, :date)
plot(df_for_plot, x=:date, y=:value, Geom.point, color=:variable)

In [None]:
first(shuffleDf(pcp_max), 5)

In [26]:
pcp_max3 = by(precipitations, :date,
                McTavish = :McTavish=>maximum3,
                Bellevue = :Bellevue=>maximum3,
                Assomption = :Assomption=>maximum3,
                Trudeau = :Trudeau=>maximum3,
                StHubert = :StHubert=>maximum3);

In [None]:
df_for_plot = melt(pcp_max3, :date)
plot(df_for_plot, x=:date, y=:value, Geom.point, color=:variable)

In [None]:
filter(row -> row.McTavish > 550, pcp_max3)
pcp_max3[pcp_max3[:McTavish] .> 550, :McTavish] = 550;

In [None]:
first(shuffleDf(pcp_max3), 5)

## Standardiser les données de précipitations

In [None]:
function standardize_col(col)
    mean_col = mean(col);
    std_col = std(col);
    
    res = (col .- mean_col) ./ std_col;
    
    return res;
end

In [None]:
pcp_sum[!, :McTavish] = standardize_col(pcp_sum[!, :McTavish]);
pcp_sum[!, :Bellevue] = standardize_col(pcp_sum[!, :Bellevue]);
pcp_sum[!, :Assomption] = standardize_col(pcp_sum[!, :Assomption]);
pcp_sum[!, :Trudeau] = standardize_col(pcp_sum[!, :Trudeau]);
pcp_sum[!, :StHubert] = standardize_col(pcp_sum[!, :StHubert]);

pcp_max[!, :McTavish] = standardize_col(pcp_max[!, :McTavish]);
pcp_max[!, :Bellevue] = standardize_col(pcp_max[!, :Bellevue]);
pcp_max[!, :Assomption] = standardize_col(pcp_max[!, :Assomption]);
pcp_max[!, :Trudeau] = standardize_col(pcp_max[!, :Trudeau]);
pcp_max[!, :StHubert] = standardize_col(pcp_max[!, :StHubert]);

pcp_max3[!, :McTavish] = standardize_col(pcp_max3[!, :McTavish]);
pcp_max3[!, :Bellevue] = standardize_col(pcp_max3[!, :Bellevue]);
pcp_max3[!, :Assomption] = standardize_col(pcp_max3[!, :Assomption]);
pcp_max3[!, :Trudeau] = standardize_col(pcp_max3[!, :Trudeau]);
pcp_max3[!, :StHubert] = standardize_col(pcp_max3[!, :StHubert]);

In [27]:
first(shuffleDf(pcp_sum), 10)

Unnamed: 0_level_0,date,McTavish,Bellevue,Assomption,Trudeau,StHubert
Unnamed: 0_level_1,Date,Int64,Int64,Int64,Int64,Int64
1,2019-06-21,0,0,0,0,2
2,2014-10-15,38,31,50,41,19
3,2016-07-12,0,0,0,0,0
4,2019-08-27,0,0,0,0,0
5,2017-05-07,73,93,110,73,86
6,2015-10-15,107,132,60,125,87
7,2013-07-05,0,10,0,15,0
8,2015-06-19,0,0,0,0,0
9,2019-05-10,385,265,286,285,316
10,2018-08-20,2,0,16,0,0


## Stations

In [28]:
station_df = DataFrame(STATION = String[], LAT = Float64[], LNG = Float64[]);

push!(station_df, ["McTavish", 45.504742, -73.579167]);
push!(station_df, ["Bellevue", 45.427222, -73.929167]);
push!(station_df, ["Assomption", 45.809444, -73.434722]);
push!(station_df, ["Trudeau", 45.467778, -73.741667]);
push!(station_df, ["StHubert", 45.5175, -73.416944]);

station_df

Unnamed: 0_level_0,STATION,LAT,LNG
Unnamed: 0_level_1,String,Float64,Float64
1,McTavish,45.5047,-73.5792
2,Bellevue,45.4272,-73.9292
3,Assomption,45.8094,-73.4347
4,Trudeau,45.4678,-73.7417
5,StHubert,45.5175,-73.4169


### On ajoute les colonnes de précipitations

In [29]:
train_data = surverses;

train_data[!, :FS_dist] = zeros(size(train_data, 1));
train_data[!, :SS_dist] = zeros(size(train_data, 1));
train_data[!, :FS_sum] = zeros(size(train_data, 1));
train_data[!, :FS_max] = zeros(size(train_data, 1));
train_data[!, :FS_max3] = zeros(size(train_data, 1));
train_data[!, :SS_sum] = zeros(size(train_data, 1));
train_data[!, :SS_max] = zeros(size(train_data, 1));
train_data[!, :SS_max3] = zeros(size(train_data, 1));

In [30]:
first(shuffleDf(train_data), 5)

Unnamed: 0_level_0,ID_OUVRAGE,DATE,SURVERSE,FS_dist,SS_dist,FS_sum,FS_max,FS_max3,SS_sum
Unnamed: 0_level_1,String,Date,Int64,Float64,Float64,Float64,Float64,Float64,Float64
1,4240-01D,2014-07-19,0,0.0,0.0,0.0,0.0,0.0,0.0
2,3350-07D,2017-07-15,0,0.0,0.0,0.0,0.0,0.0,0.0
3,3260-01D,2015-05-07,0,0.0,0.0,0.0,0.0,0.0,0.0
4,4240-01D,2016-05-05,0,0.0,0.0,0.0,0.0,0.0,0.0
5,4240-01D,2018-08-16,0,0.0,0.0,0.0,0.0,0.0,0.0


In [31]:
describe(train_data[!, :SURVERSE])

Summary Stats:
Length:         5129
Missing Count:  0
Mean:           0.085202
Minimum:        0.000000
1st Quartile:   0.000000
Median:         0.000000
3rd Quartile:   0.000000
Maximum:        1.000000
Type:           Int64


Populate les fields de chaque data

In [34]:
for i=1:size(train_data, 1)
    curr_ouvrage = train_data[i, 1];
    ouvrage_data = filter(row -> row.ID_OUVRAGE == curr_ouvrage, ouvrages);
    
    closest_station = nothing;
    closest_distance = 9999;
    
    second_closest_station = nothing;
    second_closest_distance = 9999;
    
    # Pour chaque station
    for j=1:5
       current_station = station_df[j, :STATION];
       dist = findDistance(ouvrage_data[1, :TP_LAT], ouvrage_data[1, :TP_LNG], station_df[j, :LAT], station_df[j, :LNG]);
       
        if dist < closest_distance
            second_closest_distance = closest_distance;
            second_closest_station = closest_station;
            closest_distance = dist;
            closest_station = current_station;
        elseif dist < second_closest_distance
            second_closest_distance = dist;
            second_closest_station = current_station;
        end
    end
    
    train_data[i, :FS_dist] = closest_distance;
    train_data[i, :SS_dist] = second_closest_distance;
    
    # Add data for first station
    sum_p = pcp_sum[∈([train_data[i, :DATE]]).(pcp_sum.date), Symbol(closest_station)];
    train_data[i, :FS_sum] = sum_p[1];
    max_p = pcp_max[∈([train_data[i, :DATE]]).(pcp_max.date), Symbol(closest_station)];
    train_data[i, :FS_max] = max_p[1];
    max3_p = pcp_max3[∈([train_data[i, :DATE]]).(pcp_max3.date), Symbol(closest_station)];
    train_data[i, :FS_max3] = max3_p[1];
    
    # Find multiplier for second station
#     ratio = second_closest_distance / closest_distance;
#     logratio = log(sqrt(ratio));
#     multiplier = 1 - logratio;
    
    multiplier = 1;
    
    # Add data for second station
    s_sum_p = pcp_sum[∈([train_data[i, :DATE]]).(pcp_sum.date), Symbol(second_closest_station)];
    train_data[i, :SS_sum] = s_sum_p[1] * multiplier;
    s_max_p = pcp_max[∈([train_data[i, :DATE]]).(pcp_max.date), Symbol(second_closest_station)];
    train_data[i, :SS_max] = s_max_p[1] * multiplier;
    s_max3_p = pcp_max3[∈([train_data[i, :DATE]]).(pcp_max3.date), Symbol(second_closest_station)];
    train_data[i, :SS_max3] = s_max3_p[1] * multiplier;
end

In [35]:
cols = [:ID_OUVRAGE, :SURVERSE, :FS_dist, :SS_dist, :FS_sum, :SS_sum, :FS_max, :SS_max];
first(shuffleDf(train_data[!, cols]), 10)

Unnamed: 0_level_0,ID_OUVRAGE,SURVERSE,FS_dist,SS_dist,FS_sum,SS_sum,FS_max,SS_max
Unnamed: 0_level_1,String,Int64,Float64,Float64,Float64,Float64,Float64,Float64
1,4380-01D,0,0.0401006,0.154948,0.0,0.0,0.0,0.0
2,4380-01D,0,0.0401006,0.154948,0.0,0.0,0.0,0.0
3,4350-01D,0,0.0248354,0.139269,59.0,122.0,37.0,102.0
4,4380-01D,0,0.0401006,0.154948,10.0,0.0,10.0,0.0
5,4240-01D,0,0.149987,0.168295,0.0,0.0,0.0,0.0
6,4350-01D,0,0.0248354,0.139269,24.0,0.0,24.0,0.0
7,4380-01D,0,0.0401006,0.154948,0.0,0.0,0.0,0.0
8,4380-01D,0,0.0401006,0.154948,0.0,0.0,0.0,0.0
9,4380-01D,0,0.0401006,0.154948,222.0,0.0,180.0,0.0
10,4380-01D,0,0.0401006,0.154948,0.0,0.0,0.0,0.0


### Save dataframes in files per ouvrage

In [36]:
ouvrage_3260 = filter(row -> row.ID_OUVRAGE == "3260-01D", train_data);
select!(ouvrage_3260, Not(:ID_OUVRAGE));
CSV.write("data/parsed/ouvrage_3260.csv",ouvrage_3260)

"data/parsed/ouvrage_3260.csv"

In [37]:
ouvrage_3350 = filter(row -> row.ID_OUVRAGE == "3350-07D", train_data)
select!(ouvrage_3350, Not(:ID_OUVRAGE));
CSV.write("data/parsed/ouvrage_3350.csv",ouvrage_3350)

"data/parsed/ouvrage_3350.csv"

In [38]:
ouvrage_4240 = filter(row -> row.ID_OUVRAGE == "4240-01D", train_data)
select!(ouvrage_4240, Not(:ID_OUVRAGE));
CSV.write("data/parsed/ouvrage_4240.csv",ouvrage_4240)

"data/parsed/ouvrage_4240.csv"

In [39]:
ouvrage_4350 = filter(row -> row.ID_OUVRAGE == "4350-01D", train_data)
select!(ouvrage_4350, Not(:ID_OUVRAGE));
CSV.write("data/parsed/ouvrage_4350.csv",ouvrage_4350)

"data/parsed/ouvrage_4350.csv"

In [40]:
ouvrage_4380 = filter(row -> row.ID_OUVRAGE == "4380-01D", train_data)
select!(ouvrage_4380, Not(:ID_OUVRAGE));
CSV.write("data/parsed/ouvrage_4380.csv",ouvrage_4380)

"data/parsed/ouvrage_4380.csv"

### Tests

In [41]:
test_data = CSV.read("data/test.csv");
rename!(test_data, :NO_OUVRAGE => :ID_OUVRAGE);

In [42]:
levels(test_data[:,:ID_OUVRAGE])

5-element Array{String,1}:
 "3260-01D"
 "3350-07D"
 "4240-01D"
 "4350-01D"
 "4380-01D"

In [43]:
test_data[!, :FS_dist] = zeros(size(test_data, 1));
test_data[!, :SS_dist] = zeros(size(test_data, 1));
test_data[!, :FS_sum] = zeros(size(test_data, 1));
test_data[!, :FS_max] = zeros(size(test_data, 1));
test_data[!, :FS_max3] = zeros(size(test_data, 1));
test_data[!, :SS_sum] = zeros(size(test_data, 1));
test_data[!, :SS_max] = zeros(size(test_data, 1));
test_data[!, :SS_max3] = zeros(size(test_data, 1));

In [44]:
for i=1:size(test_data, 1)
    curr_ouvrage = test_data[i, 1];
    ouvrage_data = filter(row -> row.ID_OUVRAGE == curr_ouvrage, ouvrages);
    
    closest_station = nothing;
    closest_distance = 9999;
    
    second_closest_station = nothing;
    second_closest_distance = 9999;
    
    # Pour chaque station
    for j=1:5
       current_station = station_df[j, :STATION];
       dist = findDistance(ouvrage_data[1, :TP_LAT], ouvrage_data[1, :TP_LNG], station_df[j, :LAT], station_df[j, :LNG]);
       
        if dist < closest_distance
            second_closest_distance = closest_distance;
            second_closest_station = closest_station;
            closest_distance = dist;
            closest_station = current_station;
        elseif dist < second_closest_distance
            second_closest_distance = dist;
            second_closest_station = current_station;
        end
    end
    
    test_data[i, :FS_dist] = closest_distance;
    test_data[i, :SS_dist] = second_closest_distance;
    
    # Add data for first station
    sum_p = pcp_sum[∈([test_data[i, :DATE]]).(pcp_sum.date), Symbol(closest_station)];
    test_data[i, :FS_sum] = sum_p[1];
    max_p = pcp_max[∈([test_data[i, :DATE]]).(pcp_max.date), Symbol(closest_station)];
    test_data[i, :FS_max] = max_p[1];
    max3_p = pcp_max3[∈([test_data[i, :DATE]]).(pcp_max3.date), Symbol(closest_station)];
    test_data[i, :FS_max3] = max3_p[1];
    
    # Find multiplier for second station
#     ratio = second_closest_distance / closest_distance;
#     logratio = log(sqrt(ratio));
#     multiplier = 1 - logratio;
    
    multiplier = 1;
    
    # Add data for second station
    s_sum_p = pcp_sum[∈([test_data[i, :DATE]]).(pcp_sum.date), Symbol(second_closest_station)];
    test_data[i, :SS_sum] = s_sum_p[1] * multiplier;
    s_max_p = pcp_max[∈([test_data[i, :DATE]]).(pcp_max.date), Symbol(second_closest_station)];
    test_data[i, :SS_max] = s_max_p[1] * multiplier;
    s_max3_p = pcp_max3[∈([test_data[i, :DATE]]).(pcp_max3.date), Symbol(second_closest_station)];
    test_data[i, :SS_max3] = s_max3_p[1] * multiplier;
end

In [45]:
cols = [:ID_OUVRAGE, :FS_dist, :SS_dist, :FS_sum, :SS_sum, :FS_max, :SS_max];
first(shuffleDf(test_data[!, cols]), 10)

Unnamed: 0_level_0,ID_OUVRAGE,FS_dist,SS_dist,FS_sum,SS_sum,FS_max,SS_max
Unnamed: 0_level_1,String,Float64,Float64,Float64,Float64,Float64,Float64
1,3350-07D,0.0927373,0.12027,27.0,3.0,25.0,3.0
2,4350-01D,0.0248354,0.139269,0.0,0.0,0.0,0.0
3,3350-07D,0.0927373,0.12027,0.0,0.0,0.0,0.0
4,3260-01D,0.145981,0.210769,12.0,0.0,8.0,0.0
5,4240-01D,0.149987,0.168295,391.0,18.0,167.0,6.0
6,4380-01D,0.0401006,0.154948,2.0,0.0,2.0,0.0
7,3350-07D,0.0927373,0.12027,0.0,0.0,0.0,0.0
8,4380-01D,0.0401006,0.154948,89.0,67.0,45.0,32.0
9,4240-01D,0.149987,0.168295,14.0,22.0,12.0,12.0
10,3350-07D,0.0927373,0.12027,0.0,0.0,0.0,0.0


In [46]:
test_3260 = filter(row -> row.ID_OUVRAGE == "3260-01D", test_data);
select!(test_3260, Not(:ID_OUVRAGE));
CSV.write("data/parsed/test_3260.csv",test_3260)

"data/parsed/test_3260.csv"

In [47]:
test_3350 = filter(row -> row.ID_OUVRAGE == "3350-07D", test_data);
select!(test_3350, Not(:ID_OUVRAGE));
CSV.write("data/parsed/test_3350.csv",test_3350)

"data/parsed/test_3350.csv"

In [48]:
test_4240 = filter(row -> row.ID_OUVRAGE == "4240-01D", test_data);
select!(test_4240, Not(:ID_OUVRAGE));
CSV.write("data/parsed/test_4240.csv",test_4240)

"data/parsed/test_4240.csv"

In [49]:
test_4350 = filter(row -> row.ID_OUVRAGE == "4350-01D", test_data);
select!(test_4350, Not(:ID_OUVRAGE));
CSV.write("data/parsed/test_4350.csv",test_4350)

"data/parsed/test_4350.csv"

In [50]:
test_4380 = filter(row -> row.ID_OUVRAGE == "4380-01D", test_data);
select!(test_4380, Not(:ID_OUVRAGE));
CSV.write("data/parsed/test_4380.csv",test_4380)

"data/parsed/test_4380.csv"