# Data Exploration and Baseline Calibration

## Import Packages

In [3]:
using Pkg; Pkg.activate("../"); Pkg.update(); Pkg.instantiate()
using MagNav
using Plots
gr()

[32m[1m  Activating[22m[39m project at `c:\Users\taylo\Documents\GitHub\MagNav_v2.jl`
[32m[1m    Updating[22m[39m registry at `C:\Users\taylo\.julia\registries\General`
[32m[1m    Updating[22m[39m git-repo `https://github.com/JuliaRegistries/General.git`
[32m[1m   Installed[22m[39m StructArrays ─── v0.6.4
[32m[1m   Installed[22m[39m Polynomials ──── v2.0.24
[32m[1m   Installed[22m[39m ChainRulesCore ─ v1.11.5
[32m[1m   Installed[22m[39m ChainRules ───── v1.20.0
[32m[1m  No Changes[22m[39m to `C:\Users\taylo\Documents\GitHub\MagNav_v2.jl\Project.toml`
[32m[1m    Updating[22m[39m `C:\Users\taylo\Documents\GitHub\MagNav_v2.jl\Manifest.toml`
 [90m [082447d4] [39m[93m↑ ChainRules v1.18.1 ⇒ v1.20.0[39m
 [90m [d360d2e6] [39m[93m↑ ChainRulesCore v1.11.4 ⇒ v1.11.5[39m
 [90m [f27b6e38] [39m[93m↑ Polynomials v2.0.22 ⇒ v2.0.24[39m
 [90m [09ab397b] [39m[93m↑ StructArrays v0.6.3 ⇒ v0.6.4[39m
[32m[1mPrecompiling[22m[39m project...
[32m  ✓ [39m

Plots.GRBackend()

## Get flight data

In [4]:
data_dir  = "../data/"
data_file = string(data_dir,"/Flt1002-train.h5")
xyz_data  = get_flight_data(data_file);


> Reading in file: ../data//Flt1002-train.h5


Take a look at the fields of the `XYZ` struct, either in `MagNav.jl` module or right in this notebook. 

In [5]:
fieldnames(MagNav.XYZ)

(:N, :DT, :LINE, :FLT, :TIME, :UTM_X, :UTM_Y, :UTM_Z, :MSL_Z, :LAT, :LONG, :BARO, :RADAR, :TOPO, :DEM, :DRAPE, :PITCH, :ROLL, :AZIMUTH, :DIURNAL, :COMPMAG1, :LAGMAG1, :DCMAG1, :IGRFMAG1, :UNCOMPMAG1, :UNCOMPMAG2, :UNCOMPMAG3, :UNCOMPMAG4, :UNCOMPMAG5, :FLUXB_X, :FLUXB_Y, :FLUXB_Z, :FLUXB_TOT, :FLUXC_X, :FLUXC_Y, :FLUXC_Z, :FLUXC_TOT, :FLUXD_X, :FLUXD_Y, :FLUXD_Z, :FLUXD_TOT, :OGS_MAG, :OGS_HGT, :INS_ACC_X, :INS_ACC_Y, :INS_ACC_Z, :INS_WANDER, :INS_LAT, :INS_LON, :INS_HGT, :INS_VEL_N, :INS_VEL_W, :INS_VEL_V, :PITCHRT, :ROLLRT, :YAWRT, :LONG_ACC, :LAT_ACC, :NORM_ACC, :TRUE_AS, :PITOT_P, :STATIC_P, :TOT_P, :CUR_COM1, :CUR_ACHi, :CUR_ACLo, :CUR_TANK, :CUR_FLAP, :CUR_STRB, :CUR_SRVO_O, :CUR_SRVO_M, :CUR_SRVO_I, :CUR_IHTR, :CUR_ACPWR, :CUR_OUTPWR, :CUR_BAT1, :CUR_BAT2, :V_ACPWR, :V_OUTPWR, :V_BAT1, :V_BAT2, :V_RESp, :V_RESn, :V_BACKp, :V_BACKn, :V_GYRO1, :V_GYRO2, :V_ACCp, :V_ACCn, :V_BLOCK, :V_BACK, :V_SERVO, :V_CABT, :V_FAN)

## Tolles-Lawson Calibration

In [6]:
# define line numbers for Compensation 1 segment, this is the segment that generates the "intial" Tolles Lawson calibration
i1         = findfirst(xyz_data.LINE .== 1002.02)
i2         = findlast( xyz_data.LINE .== 1002.02)
i1, i2

(12702, 25002)

In [7]:
## create Tolles-Lawson coefficients
pass1 = 0.1  # first  passband frequency [Hz]
pass2 = 0.9  # second passband frequency [Hz]
fs    = 10.0 # sampling frequency [Hz]
TL_coef_1  = create_TL_coef(xyz_data.FLUXB_X[i1:i2],
                            xyz_data.FLUXB_Y[i1:i2],
                            xyz_data.FLUXB_Z[i1:i2],
                            xyz_data.UNCOMPMAG1[i1:i2];
                            pass1=pass1,pass2=pass2,fs=fs)
TL_coef_3  = create_TL_coef(xyz_data.FLUXB_X[i1:i2],
                            xyz_data.FLUXB_Y[i1:i2],
                            xyz_data.FLUXB_Z[i1:i2],
                            xyz_data.UNCOMPMAG3[i1:i2];
                            pass1=pass1,pass2=pass2,fs=fs)
TL_coef_5  = create_TL_coef(xyz_data.FLUXB_X[i1:i2],
                            xyz_data.FLUXB_Y[i1:i2],
                            xyz_data.FLUXB_Z[i1:i2],
                            xyz_data.UNCOMPMAG5[i1:i2];
                            pass1=pass1,pass2=pass2,fs=fs)


18-element Vector{Float64}:
  225.5485247927409
  337.1809350176073
  -76.2120948543329
  531.9068878175069
  195.69987382838244
 -277.7918811320834
  539.5741774120652
 -225.89452517872803
  923.2693316470381
 4469.906155680168
 1550.0247557993712
  694.9310104918216
 1541.3023848836708
 4652.449079079208
  329.03857171710956
  174.46696067279544
  188.068023291705
 6513.13686776687

In [8]:
## create Tolles-Lawson A matrix
A = create_TL_A(xyz_data.FLUXB_X,
                xyz_data.FLUXB_Y,
                xyz_data.FLUXB_Z);

In [9]:
## calibrated magnetometer measurements
mag_1_c = xyz_data.UNCOMPMAG1 - A*TL_coef_1 .+ mean(A*TL_coef_1); # Truth Signal
mag_3_c = xyz_data.UNCOMPMAG3 - A*TL_coef_3 .+ mean(A*TL_coef_3); # Medium Difficulty
mag_5_c = xyz_data.UNCOMPMAG5 - A*TL_coef_5 .+ mean(A*TL_coef_5); # Easy Difficulty

In [10]:
import Pkg; Pkg.add("Flux") # unnecessary if your julia environment has flux already
using Flux

[32m[1m   Resolving[22m[39m package versions...
[32m[1m  No Changes[22m[39m to `C:\Users\taylo\Documents\GitHub\MagNav_v2.jl\Project.toml`
[32m[1m  No Changes[22m[39m to `C:\Users\taylo\Documents\GitHub\MagNav_v2.jl\Manifest.toml`


## Basic Dense Model
This is a basic feed forward neural network. It shouldn't work as it isn't using windowing or anything complicated but there's quite a bit that can be done here if you're willing to involve multiple fields or incorporate time to some extent

an article about a similar network is available at https://fluxml.ai/tutorials/2021/01/26/mlp.html

General Background about the training loop implementation : https://github.com/FluxML/Flux.jl/blob/master/docs/src/training/training.md

In [11]:
model = Chain(Dense(1, 32), Dense(32, 1)) # Very Basic Dense Model

Chain(
  Dense(1, 32),                         [90m# 64 parameters[39m
  Dense(32, 1),                         [90m# 33 parameters[39m
)[90m                   # Total: 4 arrays, [39m97 parameters, 644 bytes.

In [12]:
display(model.layers[1]) # Displaying layer weights
display(model.layers[2])

Dense(1, 32)        [90m# 64 parameters[39m

Dense(32, 1)        [90m# 33 parameters[39m

In [13]:
using Flux.Losses: mse
function seq_loss(x, y) # Seq2Seq Loss for model
    sum(mse(model(xi), yi) for (xi, yi) in zip(x, y))
  end;

In [14]:
a = mag_3_c[1:100]
a = Float32.(a)
b = model([a[1]])
display(b)

1-element Vector{Float32}:
 -1094.3569

In [15]:
# Collect Data
data = a
display(data)
truth = Float32.(mag_1_c[1:100])

100-element Vector{Float32}:
 52784.082
 52788.543
 52790.887
 52788.7
 52782.336
 52775.2
 52772.4
 52776.2
 52784.11
 52792.156
     ⋮
 52924.242
 52933.3
 52938.918
 52938.844
 52932.13
 52921.22
 52911.5
 52907.0
 52907.637

100-element Vector{Float32}:
 53516.758
 53517.75
 53518.77
 53519.83
 53520.926
 53522.047
 53523.168
 53524.266
 53525.344
 53526.395
     ⋮
 53649.95
 53651.594
 53653.254
 53654.938
 53656.652
 53658.387
 53660.13
 53661.875
 53663.633

In [16]:
# Select Optimizer
opt = Flux.Optimise.ADAM()

ADAM(0.001, (0.9, 0.999), IdDict{Any, Any}())

In [17]:
# Grab Parameters
params = Flux.params(model)

Params([Float32[0.26701033; -0.24281141; … ; 0.041550186; -0.10614136;;], Float32[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], Float32[0.104893655 -0.39319044 … -0.38705203 0.4074169], Float32[0.0]])

In [18]:
Flux.train!(mse, params, zip(data, truth), opt)

In [19]:
display(model([data[1]]))
truth[1]

1-element Vector{Float32}:
 -1094.3569

53516.758f0

## Basic CNN

The data's clearly in the wrong format. For our purposes we'll want vectors and additional features to make each time slice look more like an image. We can go into more depth with windowing if people would like to pursue this.
https://fluxml.ai/tutorials/2021/02/07/convnet.html More Details

## Basic RNN
The data is currently in a format that could work but would only have 1 sample. For this subset I'll slice it up into vectors of length 10 acting as "windows" of length 10. You'll need to rework this code to work for your application.

 a generative RNN example is available at https://fluxml.ai/Flux.jl/v0.2/examples/char-rnn.html

In [20]:
vec_data = [Vector{Float32}([aii]) for aii in data]
display(vec_data)
vec_truth = [Vector{Float32}([aii]) for aii in truth]

100-element Vector{Vector{Float32}}:
 [52784.082]
 [52788.543]
 [52790.887]
 [52788.7]
 [52782.336]
 [52775.2]
 [52772.4]
 [52776.2]
 [52784.11]
 [52792.156]
 ⋮
 [52924.242]
 [52933.3]
 [52938.918]
 [52938.844]
 [52932.13]
 [52921.22]
 [52911.5]
 [52907.0]
 [52907.637]

100-element Vector{Vector{Float32}}:
 [53516.758]
 [53517.75]
 [53518.77]
 [53519.83]
 [53520.926]
 [53522.047]
 [53523.168]
 [53524.266]
 [53525.344]
 [53526.395]
 ⋮
 [53649.95]
 [53651.594]
 [53653.254]
 [53654.938]
 [53656.652]
 [53658.387]
 [53660.13]
 [53661.875]
 [53663.633]

In [21]:
rnn_model = Chain(
  RNN(1, 32),
  Dense(32, 1))
display(rnn_model)
# Sanity check our model real quick
rnn_model(vec_data[1])

Chain(
  Recur(
    RNNCell(1, 32, tanh),               [90m# 1_120 parameters[39m
  ),
  Dense(32, 1),                         [90m# 33 parameters[39m
)[90m         # Total: 6 trainable arrays, [39m1_153 parameters,
[90m          # plus 1 non-trainable, 32 parameters, summarysize [39m4.871 KiB.

1-element Vector{Float32}:
 0.48300937

In [22]:
params = Flux.params(rnn_model)

Params([Float32[0.37760386; -0.36185747; … ; 0.16164973; 0.28453472;;], Float32[0.20654798 -0.24457794 … -0.2517446 0.18415777; 0.22280876 0.20289324 … -0.046928983 -0.1260787; … ; 0.21314496 -0.21629037 … 0.17606473 0.05806192; 0.2755461 -0.092282 … 0.035987414 0.30364904], Float32[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], Float32[0.0; 0.0; … ; 0.0; 0.0;;], Float32[0.14366944 -0.058754448 … 0.09121899 -0.035052814], Float32[0.0]])

In [23]:
using Flux.Losses: mse

function seq_loss(x, y)
  Flux.reset!(rnn_model)
  sum(mse(rnn_model(xi), yi) for (xi, yi) in zip(x[2:end], y))
  # MSE (Generated Sequence, Truth Sequence)
  # Generated Sequence = All outputs of RNN(X) (X is fed into the RNN 1 at a time hence xi iterator preserving state)
  # Alternatively Generated = All outputs of RNN(X) after X has been fully fed into the network (Encoder Decoder paradigm) where you feed in X as xi and then a "start token" and then feed in your last output until you see an "end token" or get the correct # of outputs
  # Encoder Decoder setup is used when you're uncertain of the length of the input sequence (I am a student) : English to French 4 words to 3 words
  # If you know how long your input, ouput are you don't need to do that
end

seq_loss (generic function with 1 method)

In [24]:
seq_loss(vec_data[1:10], vec_truth[1:10]) # Sanity check sequence loss

2.5779995f10

In [25]:
# Terrible windowing example
# Sequence of 100 timestamps
# Sequence of 10 timestamp length Sequences so that we can train an RNN to compute "clean" magnetic signals in 10 second windows
# T1:T10, T2:T11, T3:T12 ... T91:T100 (because you only have 100) - maximizes sequences, better for training
# below is T1:T10, T11:T20, ... aka the laziest windows you can make - minimizes the number of times you "see" each data point, technically can bias your data

# For this data there's 2 major pitfalls
# 1 - different tie lines are not necessarily contiguous (gaps in the data in each flight don't mix tielines that have gaps), check for gaps with the dt value
# 2 - different flights (please don't take 5 seconds from flight 1002 and mix it with 5 seconds from flight 1003), don't cross the flight streams please.
sequence_example = Vector([Vector(Ai) for Ai in eachcol(reshape(vec_data, 10, 10))])
sequence_truth = Vector([Vector(Ai) for Ai in eachcol(reshape(vec_truth, 10, 10))])
pairs = zip(sequence_example, sequence_truth);
display([rnn_model(Ai) for Ai in sequence_example[1]])
display(sequence_truth[1])

10-element Vector{Vector{Float32}}:
 [0.48300937]
 [0.48300937]
 [0.48300937]
 [0.48300937]
 [0.48300937]
 [0.48300937]
 [0.48300937]
 [0.48300937]
 [0.48300937]
 [0.48300937]

10-element Vector{Vector{Float32}}:
 [53516.758]
 [53517.75]
 [53518.77]
 [53519.83]
 [53520.926]
 [53522.047]
 [53523.168]
 [53524.266]
 [53525.344]
 [53526.395]

In [26]:
Flux.train!(seq_loss, params, pairs, opt)

In [30]:
seq_loss(sequence_example[1], sequence_truth[1])

2.577968f10

In [31]:
display([rnn_model(Ai) for Ai in sequence_example[1]])
display(sequence_truth[1])

10-element Vector{Vector{Float32}}:
 [0.8130375]
 [0.8130375]
 [0.8130375]
 [0.8130375]
 [0.8130375]
 [0.8130375]
 [0.8130375]
 [0.8130375]
 [0.8130375]
 [0.8130375]

10-element Vector{Vector{Float32}}:
 [53516.758]
 [53517.75]
 [53518.77]
 [53519.83]
 [53520.926]
 [53522.047]
 [53523.168]
 [53524.266]
 [53525.344]
 [53526.395]

**Example Data Generator**

In [46]:
# Grab all of the tielines in the datafile
data_dir  = "../data/"
data_file = string(data_dir,"/Flt1002-train.h5")
xyz_data  = get_flight_data(data_file);
a = xyz_data.LINE
tielines = unique(a)


> Reading in file: ../data//Flt1002-train.h5


28-element Vector{Float64}:
 1002.01
 1002.02
 1002.03
  158.0
 1002.04
  160.0
 1002.05
 1367.0
 1002.06
 1368.0
    ⋮
 1002.13
 1002.14
 1002.15
 1002.16
 1002.17
 1002.18
 1002.19
 1002.2
 1002.21

In [62]:
# Model setup
rnn_model = Chain(
  RNN(1, 32),
  Dense(32, 1))
display(rnn_model)

# Grab Parameters
params = Flux.params(rnn_model)

# Build Loss
function seq_loss(x, y)
  Flux.reset!(rnn_model)
  sum(mse(rnn_model(xi), yi) for (xi, yi) in zip(x[2:end], y))
end

# Select Optimizer
opt = Flux.Optimise.ADAM()

Chain(
  Recur(
    RNNCell(1, 32, tanh),               [90m# 1_120 parameters[39m
  ),
  Dense(32, 1),                         [90m# 33 parameters[39m
)[90m         # Total: 6 trainable arrays, [39m1_153 parameters,
[90m          # plus 1 non-trainable, 32 parameters, summarysize [39m4.871 KiB.

ADAM(0.001, (0.9, 0.999), IdDict{Any, Any}())

In [61]:
windowsize = 10 # Set windowsize to 10 seconds
float32_data = Float32.(mag_5_c) # Type conversions
float32_truth = Float32.(mag_1_c) 
for tieline in a
    i1         = findfirst(xyz_data.LINE .== tieline)
    i2         = findlast(xyz_data.LINE .== tieline)
    if i2-i1 < windowsize # Ensure minimum window, could be combined with the iterator if you like
        continue
    end
    window_starts = i1:(i2-windowsize)
    for i in window_starts
        data = Vector([Vector{Float32}([Ai]) for Ai in float32_data[i:i+windowsize-1]]) # Datatype massaging, 1 indexing
        truth = Vector([Vector{Float32}([Ai]) for Ai in float32_truth[i:i+windowsize-1]])
        grads = Flux.gradient(params) do 
            seq_loss([data,truth]...)
        end
        Flux.Optimise.update!(opt, params, grads)
    end
end