# Evaluation of the DA process for Big Data Assimilation

In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

In [8]:
def addEntryToDF(name, value, df):
    df[name] = value
    
def getAEres(backgroundType, percent, epoch, modelName, domain):
    dirRes = "../AEresults/{}_{}P_{}E_{}/{}/".format(backgroundType, percent, epoch, modelName, domain)
    fpRes = dirRes + "results.csv"
    fpAvgRes = dirRes + "resultsAvg.csv"

    res = pd.read_csv(fpRes)
    avgRes = pd.read_csv(fpAvgRes)
    realTime = res["time"] - res["time_overlap"] - res["time_saving_uc"] - res["time_saving_u0"] - res["time_saving_da"]
    addEntryToDF("time_DA_procedure", realTime / res.shape[0], avgRes)
    addEntryToDF("time_DA_minJ&Assimilation", res["time_online"] / res.shape[0], avgRes)
    
    return res, avgRes

def createTableRes(resAll, percentages):
    headers = ["Percent", "MSE_DA", "MSE Overlap", "% Improvement MSE DA", "J Minimisation & Assimilation (s)", "Execution_Time (s)"]
    df = pd.DataFrame(columns = headers)
    for idx, result in enumerate(resAll):
        oneRes = {"Percent": str(percentages[idx]), 
                  "MSE_DA": result["mse_DA"][0], 
                  "MSE Overlap": result["mse_da_overlap"][0],
                  "% Improvement MSE DA": result["percent_improvement"][0],
                  "J Minimisation & Assimilation (s)": result["time_DA_minJ&Assimilation"][0],
                  "Execution_Time (s)": result["time_DA_procedure"][0]
                 }
        df = df.append(oneRes, ignore_index=True)
    
    df.style.set_table_attributes("style='display:inline'").set_caption('Caption table')
    return df

def findBestDA(resAll, percentages):
    best = pd.DataFrame(columns=resAll[0].columns)
    best = best.rename(columns={"Unnamed: 0": "Percent"})
    for idx, result in enumerate(resAll):
        bestTest = result[result.mse_DA == result.mse_DA.min()]
        bestTestIdx = result[result.mse_DA == result.mse_DA.min()].index
        bestTest["Percent"] = percentages[idx]
        bestTest["Test Idx"] = bestTestIdx
        best = best.append(bestTest, ignore_index=True)
    
    best.style.set_table_attributes("style='display:inline'").set_caption('Caption table')
    return best

def getMeanFromDF(dfAll):
    df = dfAll.mean(axis = 0)
    return df["MSE_DA"], df["Execution_Time (s)"]

def getDAResults(background, percentages, epoch, model, subdomain):
    resAll, resAvgAll = [], []
    for idx in range(len(percentages)):
        res_i, res_iAvg = getAEres(background, percentages[idx], epoch, model, subdomain)
        resAll.append(res_i)
        resAvgAll.append(res_iAvg)
        
    dfAll = createTableRes(resAvgAll, percentages)
    display(dfAll)
    # best = findBestDA(res6All, percentages)
    # display(best)
    mse, execTime = getMeanFromDF(dfAll)
    loss = "L1" if "L1" in background else "L2"
    print("{} - u_0 = {} - Subdomain{} - {} - MSE DA mean = {} - Execution Time (s) = {}".format(model, background, subdomain, loss, mse, execTime))

## 1D - 2 Linear Layers - u_0 = Mean Historical Data

In [9]:
percentages = [10, 20, 40, 50, 60, 70, 80, 90]
getDAResults("MeanHist", percentages, 150, "1D2L", 8)
percentages = [10, 20, 40]
getDAResults("MeanHist", percentages, 150, "1D2L", 6)

Unnamed: 0,Percent,MSE_DA,MSE Overlap,% Improvement MSE DA,J Minimisation & Assimilation (s),Execution_Time (s)
0,10,0.078816,0.075709,28.819628,0.002165,0.010937
1,20,0.08158,0.079492,25.872866,0.002045,0.010204
2,40,0.08264,0.082265,24.38379,0.001773,0.010827
3,50,0.08317,0.081122,23.951239,0.001974,0.011564
4,60,0.083343,0.0817,23.778431,0.002022,0.011548
5,70,0.083359,0.077884,23.727399,0.001897,0.01134
6,80,0.083278,0.074812,23.831302,0.002504,0.012142
7,90,0.083336,0.074764,23.741241,0.001727,0.011346


1D2L - u_0 = MeanHist - Subdomain8 - L2 - MSE DA mean = 0.08244034765666435 - Execution Time (s) = 0.011238515934097441


Unnamed: 0,Percent,MSE_DA,MSE Overlap,% Improvement MSE DA,J Minimisation & Assimilation (s),Execution_Time (s)
0,10,0.075592,0.075494,77.580446,0.002177,0.01075
1,20,0.074824,1.946295,77.298672,0.001651,0.009471
2,40,0.07999,0.079411,73.286747,0.001719,0.01174


1D2L - u_0 = MeanHist - Subdomain6 - L2 - MSE DA mean = 0.07680215640581949 - Execution Time (s) = 0.010653650277871603


# 1D - 2 Linear Layers - u_0 = t_10 - R = 0.05

In [10]:
percentages = [10, 20, 40, 50, 60, 70, 80, 90]
getDAResults("Idx", percentages, 150, "1D2L", 8)

percentages = [10, 20, 40]
getDAResults("IdxL1", percentages, 150, "1D2L", 8)

percentages = [10, 20, 40, 50, 60, 70, 80, 90]
getDAResults("Idx", percentages, 150, "1D2L", 6)

percentages = [10, 20, 40]
getDAResults("IdxL1", percentages, 150, "1D2L", 6)

Unnamed: 0,Percent,MSE_DA,MSE Overlap,% Improvement MSE DA,J Minimisation & Assimilation (s),Execution_Time (s)
0,10,0.231931,0.233833,11.102313,0.004923,0.031678
1,20,0.231666,0.232599,10.364687,0.00267,0.029229
2,40,0.235365,0.232333,8.6707,0.005146,0.033305
3,50,0.234667,0.228309,8.862131,0.00747,0.072134
4,60,0.236082,0.240088,7.834036,0.006429,0.123434
5,70,0.23569,0.240398,8.379723,0.003898,0.066546
6,80,0.235629,0.228172,8.044218,0.00227,0.024586
7,90,0.234077,0.657805,8.918379,0.002896,0.065875


1D2L - u_0 = Idx - Subdomain8 - L2 - MSE DA mean = 0.2343882022716461 - Execution Time (s) = 0.05584836034017188


Unnamed: 0,Percent,MSE_DA,MSE Overlap,% Improvement MSE DA,J Minimisation & Assimilation (s),Execution_Time (s)
0,10,0.236802,0.776869,7.649742,0.00254,0.012039
1,20,0.23101,0.244204,9.826167,0.003935,0.026653
2,40,0.235301,2.326109,8.489758,0.005028,0.028691


1D2L - u_0 = IdxL1 - Subdomain8 - L1 - MSE DA mean = 0.23437095624121929 - Execution Time (s) = 0.022460933786314963


Unnamed: 0,Percent,MSE_DA,MSE Overlap,% Improvement MSE DA,J Minimisation & Assimilation (s),Execution_Time (s)
0,10,0.751357,0.75135,23.523592,0.00306,0.0243
1,20,0.766184,0.574345,21.596257,0.001691,0.010424
2,40,0.790763,0.791282,18.263678,0.001759,0.011953
3,50,0.784835,0.785349,18.95937,0.005894,0.076076
4,60,0.784835,0.785349,18.95937,0.001728,0.012343
5,70,0.784835,0.785349,18.95937,0.004994,0.075747
6,80,0.784835,0.785349,18.95937,0.003194,0.023791
7,90,0.784835,0.785349,18.95937,0.001805,0.012128


1D2L - u_0 = Idx - Subdomain6 - L2 - MSE DA mean = 0.7790597909396275 - Execution Time (s) = 0.030845115953516765


Unnamed: 0,Percent,MSE_DA,MSE Overlap,% Improvement MSE DA,J Minimisation & Assimilation (s),Execution_Time (s)
0,10,0.77533,0.775322,20.526705,0.001763,0.01078
1,20,0.770422,0.585594,20.897464,0.002395,0.02511
2,40,0.784835,0.42288,18.95937,0.001665,0.011631


1D2L - u_0 = IdxL1 - Subdomain6 - L1 - MSE DA mean = 0.7768625721712995 - Execution Time (s) = 0.015840446466226066


In [7]:
percentages = [40]
getDAResults("IdxMisfit", percentages, 150, "1D2L", 8)

percentages = [40]
getDAResults("IdxMisfit", percentages, 150, "1D2L", 6)

Unnamed: 0,Percent,MSE_DA,% Improvement,J Minimisation & Assimilation (s),Execution_Time (s)
0,40,0.23847,6.196323,0.001802,0.010796


1D2L - u_0 = IdxMisfit - Subdomain8 - L2 - MSE DA mean = 0.2384695437093723 - Execution Time (s) = 0.010795702444058709


Unnamed: 0,Percent,MSE_DA,% Improvement,J Minimisation & Assimilation (s),Execution_Time (s)
0,40,0.778003,19.792709,0.001696,0.011858


1D2L - u_0 = IdxMisfit - Subdomain6 - L2 - MSE DA mean = 0.7780028963729645 - Execution Time (s) = 0.01185794188597492


In [8]:
percentages = [40]
getDAResults("IdxR0.05", percentages, 150, "1D2L", 8)

percentages = [40]
getDAResults("IdxR0.05", percentages, 150, "1D2L", 6)

Unnamed: 0,Percent,MSE_DA,% Improvement,J Minimisation & Assimilation (s),Execution_Time (s)
0,40,0.235365,8.67046,0.003297,0.026637


1D2L - u_0 = IdxR0.05 - Subdomain8 - L2 - MSE DA mean = 0.2353648541931236 - Execution Time (s) = 0.026636856738652032


Unnamed: 0,Percent,MSE_DA,% Improvement,J Minimisation & Assimilation (s),Execution_Time (s)
0,40,0.790723,18.285977,0.00167,0.012207


1D2L - u_0 = IdxR0.05 - Subdomain6 - L2 - MSE DA mean = 0.7907234003212261 - Execution Time (s) = 0.012206547728208717


# 1D - 2 Linear Layers - u_0 = t_10 - R = 0.0000005

In [11]:
percentages = [40]
getDAResults("IdxR0.00000000005", percentages, 150, "1D2L", 8)

percentages = [40]
getDAResults("IdxR0.00000000005", percentages, 150, "1D2L", 6)

percentages = [40]
getDAResults("IdxR5", percentages, 150, "1D2L", 8)

percentages = [40]
getDAResults("IdxR5", percentages, 150, "1D2L", 6)

Unnamed: 0,Percent,MSE_DA,MSE Overlap,% Improvement MSE DA,J Minimisation & Assimilation (s),Execution_Time (s)
0,40,0.235364,2.264751,8.670756,0.003773,0.025714


1D2L - u_0 = IdxR0.00000000005 - Subdomain8 - L2 - MSE DA mean = 0.2353642882402057 - Execution Time (s) = 0.02571410330656532


Unnamed: 0,Percent,MSE_DA,MSE Overlap,% Improvement MSE DA,J Minimisation & Assimilation (s),Execution_Time (s)
0,40,0.790723,0.444236,18.286315,0.002666,0.023007


1D2L - u_0 = IdxR0.00000000005 - Subdomain6 - L2 - MSE DA mean = 0.7907226936433103 - Execution Time (s) = 0.023006604096599846


Unnamed: 0,Percent,MSE_DA,MSE Overlap,% Improvement MSE DA,J Minimisation & Assimilation (s),Execution_Time (s)
0,40,0.235273,2.268926,8.697485,0.001608,0.010787


1D2L - u_0 = IdxR5 - Subdomain8 - L2 - MSE DA mean = 0.2352727910428909 - Execution Time (s) = 0.010786709384383442


Unnamed: 0,Percent,MSE_DA,MSE Overlap,% Improvement MSE DA,J Minimisation & Assimilation (s),Execution_Time (s)
0,40,0.790896,0.447611,18.242075,0.001734,0.012275


1D2L - u_0 = IdxR5 - Subdomain6 - L2 - MSE DA mean = 0.7908959225306144 - Execution Time (s) = 0.01227534374344013


# 1D - 4 Linear Layers - u_0 = Mean Historical Data

In [12]:
percentages = [10, 20, 40, 60, 80]
getDAResults("MeanHist", percentages, 150, "1D4L", 8)

percentage = [10, 20, 40, 60]
getDAResults("MeanHist", percentages, 150, "1D4L", 6)

Unnamed: 0,Percent,MSE_DA,MSE Overlap,% Improvement MSE DA,J Minimisation & Assimilation (s),Execution_Time (s)
0,10,0.081914,0.083336,26.042046,0.001684,0.009421
1,20,0.08158,0.08248,25.872866,0.00176,0.009848
2,40,0.083636,0.089781,22.924157,0.001736,0.009933
3,60,0.085759,0.08356,19.140008,0.001715,0.010808
4,80,0.083258,0.076246,24.059493,0.001492,0.010825


1D4L - u_0 = MeanHist - Subdomain8 - L2 - MSE DA mean = 0.0832296728598868 - Execution Time (s) = 0.01016706841014256


Unnamed: 0,Percent,MSE_DA,MSE Overlap,% Improvement MSE DA,J Minimisation & Assimilation (s),Execution_Time (s)
0,10,0.074016,0.073919,78.735012,0.001578,0.009397
1,20,0.074824,0.422278,77.298672,0.001545,0.009418
2,40,0.080818,0.080226,72.529895,0.001616,0.011348
3,60,0.080809,0.080217,72.535307,0.002414,0.012893
4,80,0.080809,0.080217,72.535307,0.001787,0.011649


1D4L - u_0 = MeanHist - Subdomain6 - L2 - MSE DA mean = 0.07825513548075891 - Execution Time (s) = 0.010940953281438227


# 1D - 4 Linear Layers - u_0 = t_10

In [13]:
percentages = [10, 20, 30, 40, 60, 80]
getDAResults("Idx", percentages, 150, "1D4L", 8)

percentage = [10, 20, 30, 40, 60, 80]
getDAResults("Idx", percentages, 150, "1D4L", 6)

Unnamed: 0,Percent,MSE_DA,MSE Overlap,% Improvement MSE DA,J Minimisation & Assimilation (s),Execution_Time (s)
0,10,0.237697,0.236639,7.099275,0.00225,0.01046
1,20,0.232723,0.229842,10.038134,0.002084,0.017252
2,30,0.237353,0.244922,7.229716,0.005348,0.067463
3,40,0.234048,0.227086,8.876565,0.008359,0.090259
4,60,0.23243,0.235794,10.16553,0.001628,0.010742
5,80,0.234555,0.225874,8.888562,0.003048,0.022222


1D4L - u_0 = Idx - Subdomain8 - L2 - MSE DA mean = 0.2348008840226491 - Execution Time (s) = 0.03639953270136753


Unnamed: 0,Percent,MSE_DA,MSE Overlap,% Improvement MSE DA,J Minimisation & Assimilation (s),Execution_Time (s)
0,10,0.751845,6.492135,23.658234,0.002619,0.020784
1,20,0.776647,8.701482,20.195283,0.005055,0.058754
2,30,0.804706,0.805236,16.449719,0.005048,0.082462
3,40,0.804706,0.805236,16.449719,0.001704,0.012648
4,60,0.804706,0.805236,16.449719,0.001769,0.011746
5,80,0.804706,0.805236,16.449719,0.001733,0.011721


1D4L - u_0 = Idx - Subdomain6 - L2 - MSE DA mean = 0.7912195198570701 - Execution Time (s) = 0.03301923037318053


# 1D - 0 Linear Layers - u_0 = Mean Historical Data

In [8]:
percentages = [20, 40, 60, 80]

getDAResults("MeanHist", percentages, 150, "1D0L", 8)
getDAResults("MeanHist", percentages, 150, "1D0L", 6)

Unnamed: 0,Percent,MSE_DA,% Improvement,J Minimisation & Assimilation (s),Execution_Time (s)
0,20,0.069861,35.188344,0.026775,0.048178
1,40,0.068251,34.03535,0.063262,0.090528
2,60,0.073417,30.589112,0.097104,0.271406
3,80,0.069234,32.3225,0.09213,0.162108


1D0L - u_0 = MeanHist - Subdomain8 - L2 - MSE DA mean = 0.07019087532449816 - Execution Time (s) = 0.1430552150601539


Unnamed: 0,Percent,MSE_DA,% Improvement,J Minimisation & Assimilation (s),Execution_Time (s)
0,20,0.117714,59.029493,0.016822,0.036233
1,40,0.1191,59.480462,0.130287,0.229182
2,60,0.119537,59.230792,0.08951,0.184511
3,80,0.119537,59.230792,0.09124,0.187285


1D0L - u_0 = MeanHist - Subdomain6 - L2 - MSE DA mean = 0.11897218289601291 - Execution Time (s) = 0.15930282623968398


# 1D - 0 Linear Layers - u_0 = t_10

In [6]:
percentages = [20, 40, 60, 80] #add 80

getDAResults("Idx", percentages, 150, "1D0L", 8)
getDAResults("Idx", percentages, 150, "1D0L", 6)

Unnamed: 0,Percent,MSE_DA,% Improvement,J Minimisation & Assimilation (s),Execution_Time (s)
0,20,0.180817,35.854788,0.04395,0.098876
1,40,0.182899,39.001288,0.050375,0.089407
2,60,0.184738,38.9489,0.124047,0.196201
3,80,0.220468,18.949046,0.06611,0.11922


1D0L - u_0 = Idx - Subdomain8 - L2 - MSE DA mean = 0.19223041569561755 - Execution Time (s) = 0.12592591860584007


Unnamed: 0,Percent,MSE_DA,% Improvement,J Minimisation & Assimilation (s),Execution_Time (s)
0,20,0.685066,32.289561,0.018429,0.038112
1,40,0.82944,17.911655,0.08593,0.192182
2,60,0.82944,17.911655,0.0887,0.274309
3,80,0.82944,17.911655,0.088266,0.184033


1D0L - u_0 = Idx - Subdomain6 - L2 - MSE DA mean = 0.7933468132135035 - Execution Time (s) = 0.17215892199043917
