# Data Exploration and Baseline Calibration

## Import Packages

In [1]:
using Pkg; Pkg.activate("../"); Pkg.update(); Pkg.instantiate()
using MagNav
using Plots
gr()

[32m[1m  Activating[22m[39m project at `c:\Users\taylo\Documents\GitHub\MagNav_v2.jl`
[32m[1m    Updating[22m[39m registry at `C:\Users\taylo\.julia\registries\General`
[32m[1m    Updating[22m[39m git-repo `https://github.com/JuliaRegistries/General.git`
[32m[1m   Installed[22m[39m HDF5_jll ─────────── v1.12.1+0
[32m[1m   Installed[22m[39m GR_jll ───────────── v0.63.1+0
[32m[1m   Installed[22m[39m RelocatableFolders ─ v0.1.3
[32m[1m   Installed[22m[39m ExprTools ────────── v0.1.7
[32m[1m   Installed[22m[39m StaticArrays ─────── v1.3.2
[32m[1m   Installed[22m[39m RecipesPipeline ──── v0.5.0
[32m[1m   Installed[22m[39m AbstractFFTs ─────── v1.1.0
[32m[1m   Installed[22m[39m Parsers ──────────── v2.2.0
[32m[1m   Installed[22m[39m OpenSSL_jll ──────── v1.1.13+0
[32m[1m   Installed[22m[39m Requires ─────────── v1.3.0
[32m[1m   Installed[22m[39m Plots ────────────── v1.25.6
[32m[1m   Installed[22m[39m JLLWrappers ──────── v1.4.0
[3

Plots.GRBackend()

## Get flight data

In [2]:
data_dir  = "../data/"
data_file = string(data_dir,"/Flt1002-train.h5")
xyz_data  = get_flight_data(data_file);


> Reading in file: ../data//Flt1002-train.h5


Take a look at the fields of the `XYZ` struct, either in `MagNav.jl` module or right in this notebook. 

In [3]:
fieldnames(MagNav.XYZ)

(:N, :DT, :LINE, :FLT, :TIME, :UTM_X, :UTM_Y, :UTM_Z, :MSL_Z, :LAT, :LONG, :BARO, :RADAR, :TOPO, :DEM, :DRAPE, :PITCH, :ROLL, :AZIMUTH, :DIURNAL, :COMPMAG1, :LAGMAG1, :DCMAG1, :IGRFMAG1, :UNCOMPMAG1, :UNCOMPMAG2, :UNCOMPMAG3, :UNCOMPMAG4, :UNCOMPMAG5, :FLUXB_X, :FLUXB_Y, :FLUXB_Z, :FLUXB_TOT, :FLUXC_X, :FLUXC_Y, :FLUXC_Z, :FLUXC_TOT, :FLUXD_X, :FLUXD_Y, :FLUXD_Z, :FLUXD_TOT, :OGS_MAG, :OGS_HGT, :INS_ACC_X, :INS_ACC_Y, :INS_ACC_Z, :INS_WANDER, :INS_LAT, :INS_LON, :INS_HGT, :INS_VEL_N, :INS_VEL_W, :INS_VEL_V, :PITCHRT, :ROLLRT, :YAWRT, :LONG_ACC, :LAT_ACC, :NORM_ACC, :TRUE_AS, :PITOT_P, :STATIC_P, :TOT_P, :CUR_COM1, :CUR_ACHi, :CUR_ACLo, :CUR_TANK, :CUR_FLAP, :CUR_STRB, :CUR_SRVO_O, :CUR_SRVO_M, :CUR_SRVO_I, :CUR_IHTR, :CUR_ACPWR, :CUR_OUTPWR, :CUR_BAT1, :CUR_BAT2, :V_ACPWR, :V_OUTPWR, :V_BAT1, :V_BAT2, :V_RESp, :V_RESn, :V_BACKp, :V_BACKn, :V_GYRO1, :V_GYRO2, :V_ACCp, :V_ACCn, :V_BLOCK, :V_BACK, :V_SERVO, :V_CABT, :V_FAN)

## Tolles-Lawson Calibration

In [5]:
# define line numbers for Compensation 1 segment, this is the segment that generates the "intial" Tolles Lawson calibration
i1         = findfirst(xyz_data.LINE .== 1002.02)
i2         = findlast( xyz_data.LINE .== 1002.02)
i1, i2

(12702, 25002)

In [6]:
## create Tolles-Lawson coefficients
pass1 = 0.1  # first  passband frequency [Hz]
pass2 = 0.9  # second passband frequency [Hz]
fs    = 10.0 # sampling frequency [Hz]
TL_coef_1  = create_TL_coef(xyz_data.FLUXB_X[i1:i2],
                            xyz_data.FLUXB_Y[i1:i2],
                            xyz_data.FLUXB_Z[i1:i2],
                            xyz_data.UNCOMPMAG1[i1:i2];
                            pass1=pass1,pass2=pass2,fs=fs)
TL_coef_3  = create_TL_coef(xyz_data.FLUXB_X[i1:i2],
                            xyz_data.FLUXB_Y[i1:i2],
                            xyz_data.FLUXB_Z[i1:i2],
                            xyz_data.UNCOMPMAG3[i1:i2];
                            pass1=pass1,pass2=pass2,fs=fs)
TL_coef_5  = create_TL_coef(xyz_data.FLUXB_X[i1:i2],
                            xyz_data.FLUXB_Y[i1:i2],
                            xyz_data.FLUXB_Z[i1:i2],
                            xyz_data.UNCOMPMAG5[i1:i2];
                            pass1=pass1,pass2=pass2,fs=fs)


18-element Vector{Float64}:
  225.5485247927409
  337.1809350176073
  -76.2120948543329
  531.9068878175069
  195.69987382838244
 -277.7918811320834
  539.5741774120652
 -225.89452517872803
  923.2693316470381
 4469.906155680168
 1550.0247557993712
  694.9310104918216
 1541.3023848836708
 4652.449079079208
  329.03857171710956
  174.46696067279544
  188.068023291705
 6513.13686776687

In [7]:
## create Tolles-Lawson A matrix
A = create_TL_A(xyz_data.FLUXB_X,
                xyz_data.FLUXB_Y,
                xyz_data.FLUXB_Z);

In [8]:
## calibrated magnetometer measurements
mag_1_c = xyz_data.UNCOMPMAG1 - A*TL_coef_1 .+ mean(A*TL_coef_1); # Truth Signal
mag_3_c = xyz_data.UNCOMPMAG3 - A*TL_coef_3 .+ mean(A*TL_coef_3); # Medium Difficulty
mag_5_c = xyz_data.UNCOMPMAG5 - A*TL_coef_5 .+ mean(A*TL_coef_5); # Easy Difficulty

In [11]:
import Pkg; Pkg.add("Flux") # unnecessary if your julia environment has flux already
using Flux

[32m[1m   Resolving[22m[39m package versions...
[32m[1m    Updating[22m[39m `C:\Users\taylo\Documents\GitHub\MagNav_v2.jl\Project.toml`
 [90m [587475ba] [39m[92m+ Flux v0.12.8[39m
[32m[1m    Updating[22m[39m `C:\Users\taylo\Documents\GitHub\MagNav_v2.jl\Manifest.toml`
 [90m [1520ce14] [39m[92m+ AbstractTrees v0.3.4[39m
 [90m [4fba245c] [39m[92m+ ArrayInterface v3.2.2[39m
 [90m [ab4f0b2a] [39m[92m+ BFloat16s v0.2.0[39m
 [90m [fa961155] [39m[92m+ CEnum v0.4.1[39m
 [90m [052768ef] [39m[92m+ CUDA v3.6.4[39m
 [90m [082447d4] [39m[92m+ ChainRules v1.18.1[39m
 [90m [1a297f60] [39m[92m+ FillArrays v0.12.7[39m
 [90m [587475ba] [39m[92m+ Flux v0.12.8[39m
 [90m [d9f16b24] [39m[92m+ Functors v0.2.7[39m
 [90m [0c68f7d7] [39m[92m+ GPUArrays v8.1.3[39m
 [90m [61eb1bfa] [39m[92m+ GPUCompiler v0.13.10[39m
 [90m [7869d1d1] [39m[92m+ IRTools v0.4.4[39m
 [90m [615f187c] [39m[92m+ IfElse v0.1.1[39m
 [90m [e5e0dc1b] [39m[92m+ Juno v0.8.4

## Basic Dense Model
This is a basic feed forward neural network. It shouldn't work as it isn't using windowing or anything complicated but there's quite a bit that can be done here if you're willing to involve multiple fields or incorporate time to some extent

an article about a similar network is available at https://fluxml.ai/tutorials/2021/01/26/mlp.html

General Background about the training loop implementation : https://github.com/FluxML/Flux.jl/blob/master/docs/src/training/training.md

In [96]:
model = Chain(Dense(1, 32), Dense(32, 1)) # Very Basic Dense Model

Chain(
  Dense(1, 32),                         [90m# 64 parameters[39m
  Dense(32, 1),                         [90m# 33 parameters[39m
)[90m                   # Total: 4 arrays, [39m97 parameters, 644 bytes.

In [97]:
display(model.layers[1]) # Displaying layer weights
display(model.layers[2])

Dense(1, 32)        [90m# 64 parameters[39m

Dense(32, 1)        [90m# 33 parameters[39m

In [98]:
using Flux.Losses: mse
function seq_loss(x, y) # Seq2Seq Loss for model
    sum(mse(model(xi), yi) for (xi, yi) in zip(x, y))
  end;

In [108]:
a = mag_3_c[1:100]
a = Float32.(a)
b = model([a[1]])
display(b)

1-element Vector{Float32}:
 12036.405

In [126]:
# Collect Data
data = a
display(data)
truth = Float32.(mag_1_c[1:100])

100-element Vector{Float32}:
 52784.082
 52788.543
 52790.887
 52788.7
 52782.336
 52775.2
 52772.4
 52776.2
 52784.11
 52792.156
     ⋮
 52924.242
 52933.3
 52938.918
 52938.844
 52932.13
 52921.22
 52911.5
 52907.0
 52907.637

100-element Vector{Float32}:
 53516.758
 53517.75
 53518.77
 53519.83
 53520.926
 53522.047
 53523.168
 53524.266
 53525.344
 53526.395
     ⋮
 53649.95
 53651.594
 53653.254
 53654.938
 53656.652
 53658.387
 53660.13
 53661.875
 53663.633

In [127]:
# Select Optimizer
opt = Flux.Optimise.ADAM()

ADAM(0.001, (0.9, 0.999), IdDict{Any, Any}())

In [128]:
# Grab Parameters
params = Flux.params(model)

Params([Float32[-0.034607787; -0.1583108; … ; -0.007617942; -0.18683483;;], Float32[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], Float32[0.4193311 0.027424144 … 0.041346405 -0.4040289], Float32[0.0]])

In [129]:
Flux.train!(mse, params, zip(data, truth), opt)

In [130]:
display(model([data[1]]))
truth[1]

1-element Vector{Float32}:
 12036.405

53516.758f0

## Basic CNN

The data's clearly in the wrong format. For our purposes we'll want vectors and additional features to make each time slice look more like an image. We can go into more depth with windowing if people would like to pursue this.
https://fluxml.ai/tutorials/2021/02/07/convnet.html More Details

## Basic RNN
The data is currently in a format that could work but would only have 1 sample. For this subset I'll slice it up into vectors of length 10 acting as "windows" of length 10. You'll need to rework this code to work for your application.

 a generative RNN example is available at https://fluxml.ai/Flux.jl/v0.2/examples/char-rnn.html

In [200]:
vec_data = [Vector{Float32}([aii]) for aii in data]
display(vec_data)
vec_truth = [Vector{Float32}([aii]) for aii in truth]

100-element Vector{Vector{Float32}}:
 [52784.082]
 [52788.543]
 [52790.887]
 [52788.7]
 [52782.336]
 [52775.2]
 [52772.4]
 [52776.2]
 [52784.11]
 [52792.156]
 ⋮
 [52924.242]
 [52933.3]
 [52938.918]
 [52938.844]
 [52932.13]
 [52921.22]
 [52911.5]
 [52907.0]
 [52907.637]

100-element Vector{Vector{Float32}}:
 [53516.758]
 [53517.75]
 [53518.77]
 [53519.83]
 [53520.926]
 [53522.047]
 [53523.168]
 [53524.266]
 [53525.344]
 [53526.395]
 ⋮
 [53649.95]
 [53651.594]
 [53653.254]
 [53654.938]
 [53656.652]
 [53658.387]
 [53660.13]
 [53661.875]
 [53663.633]

In [217]:
rnn_model = Chain(
  RNN(1, 32),
  Dense(32, 1))
display(rnn_model)
# Sanity check our model real quick
rnn_model(vec_data[1])

Chain(
  Recur(
    RNNCell(1, 32, tanh),               [90m# 1_120 parameters[39m
  ),
  Dense(32, 1),                         [90m# 33 parameters[39m
)[90m         # Total: 6 trainable arrays, [39m1_153 parameters,
[90m          # plus 1 non-trainable, 32 parameters, summarysize [39m4.871 KiB.

1-element Vector{Float32}:
 0.8471235

In [218]:
params = Flux.params(rnn_model)

Params([Float32[-0.1684712; -0.102477916; … ; 0.20592742; 0.19406015;;], Float32[-0.28689557 -0.015787048 … -0.058870036 0.21435508; -0.07076025 -0.049189042 … 0.2109813 0.003312835; … ; 0.25275856 -0.25520268 … 0.01926096 0.12769939; -0.23041661 -0.1272083 … -0.13329703 0.28333277], Float32[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], Float32[0.0; 0.0; … ; 0.0; 0.0;;], Float32[-0.20546368 -0.010122894 … -0.36216262 -0.21341798], Float32[0.0]])

In [219]:
using Flux.Losses: mse

function seq_loss(x, y)
  Flux.reset!(rnn_model)
  sum(mse(rnn_model(xi), yi) for (xi, yi) in zip(x[2:end], y))
end

seq_loss (generic function with 1 method)

In [220]:
seq_loss(vec_data[1:10], vec_truth[1:10]) # Sanity check sequence loss

2.5779644f10

In [221]:
sequence_example = Vector([Vector(Ai) for Ai in eachcol(reshape(vec_data, 10, 10))])
sequence_truth = Vector([Vector(Ai) for Ai in eachcol(reshape(vec_truth, 10, 10))])
pairs = zip(sequence_example, sequence_truth);
display([rnn_model(Ai) for Ai in sequence_example[1]])
display(sequence_truth[1])

10-element Vector{Vector{Float32}}:
 [0.8471235]
 [0.8471235]
 [0.8471235]
 [0.8471235]
 [0.8471235]
 [0.8471235]
 [0.8471235]
 [0.8471235]
 [0.8471235]
 [0.8471235]

10-element Vector{Vector{Float32}}:
 [53516.758]
 [53517.75]
 [53518.77]
 [53519.83]
 [53520.926]
 [53522.047]
 [53523.168]
 [53524.266]
 [53525.344]
 [53526.395]

In [222]:
Flux.train!(seq_loss, params, pairs, opt)

In [223]:
loss(sequence_example[1], sequence_truth[1])

2.5779331f10

In [224]:
display([rnn_model(Ai) for Ai in sequence_example[1]])
display(sequence_truth[1])

10-element Vector{Vector{Float32}}:
 [1.1771514]
 [1.1771514]
 [1.1771514]
 [1.1771514]
 [1.1771514]
 [1.1771514]
 [1.1771514]
 [1.1771514]
 [1.1771514]
 [1.1771514]

10-element Vector{Vector{Float32}}:
 [53516.758]
 [53517.75]
 [53518.77]
 [53519.83]
 [53520.926]
 [53522.047]
 [53523.168]
 [53524.266]
 [53525.344]
 [53526.395]