In [1]:
using CSV, DataFrames, Distributions, Random, StatsBase, LinearAlgebra, SparseArrays
using Extremes, Dates, Gadfly, GMRF
using Optim

import Plots #pour faire des graphiques

In [2]:
PROVINCES = ["NB", "NL", "NS", "ON", "PE", "QC"]#provinces considerees

DURATION = "24 h"

"24 h"

In [3]:
station_list = CSV.read("dat/_station_list.csv", DataFrame)

filter!(row -> row.Province ∈ PROVINCES , station_list)#on ne selectionne que les stations qui nous interessent

first(station_list, 10)

Unnamed: 0_level_0,Name,Province,ID,Lat,Lon,Elevation
Unnamed: 0_level_1,String,String,String,Float64,Float64,Int64
1,BEECHWOOD,NB,8100512,46.53,-67.67,91
2,BELLEDUNE,NB,8100514,47.9,-65.83,7
3,BOUCTOUCHE CDA CS,NB,8100593,46.43,-64.77,35
4,CHARLO AUTO,NB,8100885,47.98,-66.33,42
5,MIRAMICHI RCS,NB,8100989,47.02,-65.47,33
6,EDMUNDSTON,NB,8101303,47.42,-68.32,154
7,FREDERICTON A,NB,8101500,45.87,-66.53,20
8,FREDERICTON CDA CS,NB,8101605,45.92,-66.62,35
9,MONCTON INTL A,NB,8103201,46.12,-64.68,70
10,ROYAL ROAD,NB,8104480,46.05,-66.72,115


In [4]:
function load_station(station_id::String)
    path = "dat/"*station_id*".csv"
    
    df = CSV.read(path, DataFrame)
    rename!(df,:Année => :Year)
    
    #on met le tableau sous forme tidy (cf cheat sheet de R)
    df_reshape = stack(df, Not(:Year); variable_name=:Duration, value_name=:Pcp)
    dropmissing!(df_reshape,:Pcp)
    
    return df_reshape
end

load_station (generic function with 1 method)

In [5]:
dat = DataFrame(StationName = String[],
                StationID = String[],
                Year = Int64[],
                Duration = String[],
                Pcp = Float64[])

for i in 1:(nrow(station_list))
    df = load_station(station_list[i,:ID])
    df[!, :StationName] .= station_list[i,:Name]
    df[!, :StationID] .= station_list[i, :ID]
    append!(dat, df)
end

filter!(row -> row[:Duration] == DURATION, dat)
first(dat, 10)

Unnamed: 0_level_0,StationName,StationID,Year,Duration,Pcp
Unnamed: 0_level_1,String,String,Int64,String,Float64
1,BEECHWOOD,8100512,1959,24 h,118.6
2,BEECHWOOD,8100512,1960,24 h,45.2
3,BEECHWOOD,8100512,1961,24 h,69.3
4,BEECHWOOD,8100512,1962,24 h,56.4
5,BEECHWOOD,8100512,1963,24 h,46.0
6,BEECHWOOD,8100512,1964,24 h,43.9
7,BEECHWOOD,8100512,1965,24 h,74.2
8,BEECHWOOD,8100512,1966,24 h,50.3
9,BEECHWOOD,8100512,1967,24 h,54.6
10,BEECHWOOD,8100512,1969,24 h,92.2


In [6]:
crcm = CSV.read("dat/CROQmeanPcp.csv", DataFrame)

lat = reshape(crcm[:,1],300,300)
lon = reshape(crcm[:,2],300,300)
pcp = reshape(crcm[:,3],300,300)

latlim = [60, 253];
lonlim = [20, 270];

lat = lat[latlim[1]:latlim[2],lonlim[1]:lonlim[2]]
lon = lon[latlim[1]:latlim[2],lonlim[1]:lonlim[2]]
pcp = pcp[latlim[1]:latlim[2],lonlim[1]:lonlim[2]]

m₁, m₂ = size(lat)
m = m₁*m₂

griddedCovariate = DataFrame(Lat = vec(lat), Lon = vec(lon), meanPcp = vec(pcp))
first(griddedCovariate,10)

Unnamed: 0_level_0,Lat,Lon,meanPcp
Unnamed: 0_level_1,Float64,Float64,Float64
1,42.4558,-94.9948,2.41792
2,42.5658,-94.9909,2.42233
3,42.6758,-94.987,2.41666
4,42.7857,-94.9832,2.46305
5,42.8957,-94.9792,2.47048
6,43.0057,-94.9753,2.46087
7,43.1156,-94.9714,2.44305
8,43.2256,-94.9674,2.40051
9,43.3355,-94.9634,2.39468
10,43.4455,-94.9595,2.34912


In [7]:
function nnsearch(X::Matrix{<:Real}, point::Vector{<:Real})

    d = X .- point
    d² = dropdims(sum(d.^2,dims=1),dims=1)

    # Find the index of the minimum
    ind = argmin(d²)

    return ind

end

function nnsearch(X::Matrix{<:Real}, points::Matrix{<:Real})

    nPoints = size(points,2)
    ind = zeros(Int64,nPoints)

    for i=1:nPoints
        ind[i] = nnsearch(X,points[:,i])
    end

    return ind

end



nnsearch (generic function with 2 methods)

In [8]:
stationLocation = collect([station_list[:,:Lat] station_list[:,:Lon]]')
gridCoords = collect([griddedCovariate[:,:Lat] griddedCovariate[:,:Lon]]')

V = nnsearch(gridCoords,stationLocation)
station_list[!,:GridCell] = V
first(station_list,10)

Unnamed: 0_level_0,Name,Province,ID,Lat,Lon,Elevation,GridCell
Unnamed: 0_level_1,String,String,String,Float64,Float64,Int64,Int64
1,BEECHWOOD,NB,8100512,46.53,-67.67,91,32276
2,BELLEDUNE,NB,8100514,47.9,-65.83,7,33455
3,BOUCTOUCHE CDA CS,NB,8100593,46.43,-64.77,35,35580
4,CHARLO AUTO,NB,8100885,47.98,-66.33,42,32873
5,MIRAMICHI RCS,NB,8100989,47.02,-65.47,33,34419
6,EDMUNDSTON,NB,8101303,47.42,-68.32,154,30924
7,FREDERICTON A,NB,8101500,45.87,-66.53,20,34019
8,FREDERICTON CDA CS,NB,8101605,45.92,-66.62,35,33825
9,MONCTON INTL A,NB,8103201,46.12,-64.68,70,35965
10,ROYAL ROAD,NB,8104480,46.05,-66.72,115,33632


In [9]:
records = Int64[]

for v in unique(V)
    ids = findall(v .== V)
    if length(ids) > 1
        push!(records, v)
    end
end

records

14-element Vector{Int64}:
 33632
 20197
 18645
 19620
 19619
 19813
 19814
 19621
 28003
 28002
 25464
 28572
 26047
 27434

In [27]:
station_near = DataFrame(Name = String[],
                        Province = String[],
                        ID = String[],
                        Lat = Float64[],
                        Lon = Float64[],
                        Elevation = Int64[],
                        GridCell = Int64[])

for cell in records
    ids = findall(cell .== V)
    for i in ids
        df = DataFrame(station_list[i,:])
        append!(station_near, df)
    end
end

CSV.write("results/same_cell.csv", station_near)
first(station_near, 10)

Unnamed: 0_level_0,Name,Province,ID,Lat,Lon,Elevation,GridCell
Unnamed: 0_level_1,String,String,String,Float64,Float64,Int64,Int64
1,ROYAL ROAD,NB,8104480,46.05,-66.72,115,33632
2,ROYAL ROAD WEST,NB,8104482,46.08,-66.73,160,33632
3,PORT WELLER (AUT),ON,6136699,43.25,-79.22,79,20197
4,ST CATHARINES A,ON,6137287,43.2,-79.17,97,20197
5,CAMBRIDGE GALT MOE,ON,6141095,43.33,-80.32,268,18645
6,PRESTON WPCP,ON,6146714,43.38,-80.35,272,18645
7,WATERLOO WELLINGTON A,ON,6149387,43.45,-80.38,317,18645
8,MAPLE,ON,6154950,43.87,-79.48,244,19620
9,TORONTO YORK MILLS,ON,615HHDF,43.75,-79.38,153,19620
10,TORONTO NORTH YORK,ON,615S001,43.78,-79.47,187,19620


In [23]:
DataFrame(station_list[10,:])

Unnamed: 0_level_0,Name,Province,ID,Lat,Lon,Elevation,GridCell
Unnamed: 0_level_1,String,String,String,Float64,Float64,Int64,Int64
1,ROYAL ROAD,NB,8104480,46.05,-66.72,115,33632


In [28]:
nrow(station_list)

336