# Loading Data
**For more examples of what Kosh can do visit [GitHub Examples](https://github.com/LLNL/kosh/tree/stable/examples).**

In [None]:
from numbers import Number
from collections import defaultdict

import matplotlib.pyplot as plt
import ipywidgets as widgets
from IPython.display import display, clear_output

import kosh
import math
import statistics
import numpy as np

%matplotlib notebook

# Initialization
database = './data/num_res_output.sqlite'
target_type = "csv_rec"
datastore = kosh.connect(database)
print("Kosh is ready!")

# Adding Data to Records

In [None]:
def RMSE(x_true, x_pred):
    summation = 0
    for xt, xp in zip(x_true, x_pred):
        summation += (xt-xp)**2
    mean = summation/len(x_true)
    rmse = math.sqrt(mean)
    return rmse


def diff(x_true, x_pred):
    dif = []
    for xt, xp in zip(x_true, x_pred):
        dif.append(xt-xp)
    return dif


###################################################
# Finding common timesteps using largest timestep #
###################################################

val_largest_time_step = list(datastore.find(id_pool = '47bcda_3_15'))[0]
time_largest_time_step = val_largest_time_step['physics_cycle_series/time']

val_nominal_time_step = list(datastore.find(id_pool = '47bcda_3_20'))[0]
time_nominal_time_step = val_nominal_time_step['physics_cycle_series/time']

val_smallest_time_step = list(datastore.find(id_pool = '47bcda_3_25'))[0]
time_smallest_time_step = val_smallest_time_step['physics_cycle_series/time']

time_in_all = list(set.intersection(*map(set, [time_largest_time_step, time_nominal_time_step, time_smallest_time_step])))

for dataset in datastore.find(): # Each record is now a dataset

    x_pred = dataset['physics_cycle_series/x_pos'][:]
    y_pred = dataset['physics_cycle_series/y_pos'][:]
    z_pred = dataset['physics_cycle_series/z_pos'][:]
    time = dataset['physics_cycle_series/time'][:]


    x_pred_common = []
    y_pred_common = []
    z_pred_common = []
    time_common = []

    for i, t in enumerate(time):
        for t2 in time_in_all:
            if t == t2:
                x_pred_common.append(x_pred[i])
                y_pred_common.append(y_pred[i])
                z_pred_common.append(z_pred[i])
                time_common.append(time[i])

    dataset.add_curve(np.array(time_common).tolist(), 'common_data', 'time_common')
    dataset.add_curve(np.array(x_pred_common).tolist(), 'common_data', 'x_common')
    dataset.add_curve(np.array(y_pred_common).tolist(), 'common_data', 'y_common')
    dataset.add_curve(np.array(z_pred_common).tolist(), 'common_data', 'z_common')

###############################################################
# Comparing to TICKS_PER_SECOND = 20 with new common timestep #
###############################################################

val = list(datastore.find(id_pool = '47bcda_3_20'))[0]

# Printing Attributes and Features
print('Attributes:')
print('\t',val.list_attributes())
print('\n')
print('Features Sets:')
print('\t',val.list_features())
            
x_true = val['common_data/x_common'][:]
y_true = val['common_data/y_common'][:]
z_true = val['common_data/z_common'][:]
time_true = val['common_data/time_common'][:]

for dataset in datastore.find(): # Each record is now a dataset
    print(f"id: {dataset.id}")
    x_pred = dataset['common_data/x_common'][:]
    y_pred = dataset['common_data/y_common'][:]
    z_pred = dataset['common_data/z_common'][:]

    x_diff = diff(x_true, x_pred)
    x_rmse = RMSE(x_true, x_pred)
    y_diff = diff(y_true, y_pred)
    y_rmse = RMSE(y_true, y_pred)
    z_diff = diff(z_true, z_pred)
    z_rmse = RMSE(z_true, z_pred)

    print(f"\tx_rmse: {x_rmse}")
    print(f"\ty_rmse: {y_rmse}")
    print(f"\tz_rmse: {z_rmse}")

    setattr(dataset, 'x_rmse', x_rmse)
    dataset.add_curve(np.array(x_diff).tolist(), 'common_data', 'x_diff')
    setattr(dataset, 'y_rmse', y_rmse)
    dataset.add_curve(np.array(y_diff).tolist(), 'common_data', 'y_diff')
    setattr(dataset, 'z_rmse', z_rmse)
    dataset.add_curve(np.array(z_diff).tolist(), 'common_data', 'z_diff')


########################################################
# Mean and Standard Deviation with new common timestep #
########################################################

x_temp = []
y_temp = []
z_temp = []

x_mean = []
y_mean = []
z_mean = []
x_std = []
y_std = []
z_std = []

for i, t in enumerate(time_common):

    for dataset in datastore.find(): # Each record is now a dataset
        x_pred = dataset['common_data/x_common'][i]
        y_pred = dataset['common_data/y_common'][i]
        z_pred = dataset['common_data/z_common'][i]

        x_temp.append(x_pred)
        y_temp.append(y_pred)
        z_temp.append(z_pred)

    x_mean.append(statistics.mean(x_temp))
    y_mean.append(statistics.mean(y_temp))
    z_mean.append(statistics.mean(z_temp))
    x_std.append(statistics.stdev(x_temp))
    y_std.append(statistics.stdev(y_temp))
    z_std.append(statistics.stdev(z_temp))

    x_temp = []
    y_temp = []
    z_temp = []

datastore.create(id = 'mean')
mean = list(datastore.find(id_pool = 'mean'))[0]
mean.add_curve(np.array(time_common).tolist(), "mean_data", 'time_common')
mean.add_curve(np.array(x_mean).tolist(), "mean_data", 'x_pos_mean')
mean.add_curve(np.array(y_mean).tolist(), "mean_data", 'y_pos_mean')
mean.add_curve(np.array(z_mean).tolist(), "mean_data", 'z_pos_mean')
mean.add_curve(np.array(x_std).tolist(), "mean_data", 'x_pos_std')
mean.add_curve(np.array(y_std).tolist(), "mean_data", 'y_pos_std')
mean.add_curve(np.array(z_std).tolist(), "mean_data", 'z_pos_std')

mean.add_curve(np.array([x_mean[i] + x_std[i] for i in range(len(time_common))]).tolist(), "mean_data", 'x_pos_mean_plus_std')
mean.add_curve(np.array([y_mean[i] + y_std[i] for i in range(len(time_common))]).tolist(), "mean_data", 'y_pos_mean_plus_std')
mean.add_curve(np.array([z_mean[i] + z_std[i] for i in range(len(time_common))]).tolist(), "mean_data", 'z_pos_mean_plus_std')
mean.add_curve(np.array([x_mean[i] - x_std[i] for i in range(len(time_common))]).tolist(), "mean_data", 'x_pos_mean_minus_std')
mean.add_curve(np.array([y_mean[i] - y_std[i] for i in range(len(time_common))]).tolist(), "mean_data", 'y_pos_mean_minus_std')
mean.add_curve(np.array([z_mean[i] - z_std[i] for i in range(len(time_common))]).tolist(), "mean_data", 'z_pos_mean_minus_std')

mean.add_curve(np.array([x_mean[i] + 2*x_std[i] for i in range(len(time_common))]).tolist(), "mean_data", 'x_pos_mean_plus_2std')
mean.add_curve(np.array([y_mean[i] + 2*y_std[i] for i in range(len(time_common))]).tolist(), "mean_data", 'y_pos_mean_plus_2std')
mean.add_curve(np.array([z_mean[i] + 2*z_std[i] for i in range(len(time_common))]).tolist(), "mean_data", 'z_pos_mean_plus_2std')
mean.add_curve(np.array([x_mean[i] - 2*x_std[i] for i in range(len(time_common))]).tolist(), "mean_data", 'x_pos_mean_minus_2std')
mean.add_curve(np.array([y_mean[i] - 2*y_std[i] for i in range(len(time_common))]).tolist(), "mean_data", 'y_pos_mean_minus_2std')
mean.add_curve(np.array([z_mean[i] - 2*z_std[i] for i in range(len(time_common))]).tolist(), "mean_data", 'z_pos_mean_minus_2std')


# Parameter Table

In [None]:
header = 1

id_pool = list(datastore.find(ids_only=True))
id_pool.remove('mean')
id_pool = id_pool[::-1]
print(id_pool)

for dataset_id in id_pool:

    dataset = list(datastore.find(id_pool=dataset_id))[0]

    if header == 1:
        print('| dataset.id | x_pos_initial | y_pos_initial | z_pos_initial | x_vel_initial | y_vel_initial | z_vel_initial |')
        print('| --- | --- | --- | --- | --- | --- | --- |')
        header = 0
    print('|', dataset.id,
          '|', dataset.x_pos_initial,
          '|', dataset.y_pos_initial,
          '|', dataset.z_pos_initial,
          '|', dataset.x_vel_initial,
          '|', dataset.y_vel_initial,
          '|', dataset.z_vel_initial,
          '|'
          )

# Plotting QoIs and their Diff

In [None]:
fig, ax = plt.subplots(nrows=2, ncols=3, figsize=(15, 10))

for dataset_id in id_pool:

    dataset = list(datastore.find(id_pool=dataset_id))[0]

    ax[0, 0].plot(dataset['common_data/time_common'][:], dataset['common_data/x_common'][:], label=dataset.id)
    ax[0, 1].plot(dataset['common_data/time_common'][:], dataset['common_data/y_common'][:], label=dataset.id)
    ax[0, 2].plot(dataset['common_data/time_common'][:], dataset['common_data/z_common'][:], label=dataset.id)
    
    ax[1, 0].plot(dataset['common_data/time_common'][:], dataset['common_data/x_diff'][:], label=dataset.id)
    ax[1, 1].plot(dataset['common_data/time_common'][:], dataset['common_data/y_diff'][:], label=dataset.id)
    ax[1, 2].plot(dataset['common_data/time_common'][:], dataset['common_data/z_diff'][:], label=dataset.id)
        
ax[0, 0].set_xlabel('time_common')
ax[0, 0].set_ylabel('x_common')
ax[0, 0].set_title('x_common')
ax[0, 0].legend()

ax[0, 1].set_xlabel('time_common')
ax[0, 1].set_ylabel('y_common')
ax[0, 1].set_title('y_common')
ax[0, 1].legend()

ax[0, 2].set_xlabel('time_common')
ax[0, 2].set_ylabel('z_common')
ax[0, 2].set_title('z_common')
ax[0, 2].legend()

ax[1, 0].set_xlabel('time_common')
ax[1, 0].set_ylabel('x_diff')
ax[1, 0].set_title('x_diff')
ax[1, 0].legend()

ax[1, 1].set_xlabel('time')
ax[1, 1].set_ylabel('y_diff')
ax[1, 1].set_title('y_diff')
ax[1, 1].legend()

ax[1, 2].set_xlabel('time')
ax[1, 2].set_ylabel('z_diff')
ax[1, 2].set_title('z_diff')
ax[1, 2].legend()

fig.savefig("./images/QoIs_num_res.png")

In [None]:
mean = list(datastore.find(id_pool = 'mean'))[0]

time = mean['mean_data/time_common'][:]

x_pos_mean = mean['mean_data/x_pos_mean'][:]
y_pos_mean = mean['mean_data/y_pos_mean'][:]
z_pos_mean = mean['mean_data/z_pos_mean'][:]

x_pos_mean_plus_2std = mean['mean_data/x_pos_mean_plus_2std'][:]
y_pos_mean_plus_2std = mean['mean_data/y_pos_mean_plus_2std'][:]
z_pos_mean_plus_2std = mean['mean_data/z_pos_mean_plus_2std'][:]

x_pos_mean_minus_2std = mean['mean_data/x_pos_mean_minus_2std'][:]
y_pos_mean_minus_2std = mean['mean_data/y_pos_mean_minus_2std'][:]
z_pos_mean_minus_2std = mean['mean_data/z_pos_mean_minus_2std'][:]

fig, ax = plt.subplots(nrows=1, ncols=3, figsize=(15, 5))

ax[0].plot(time, x_pos_mean)
ax[1].plot(time, y_pos_mean)
ax[2].plot(time, z_pos_mean)

ax[0].fill_between(time, x_pos_mean_plus_2std, x_pos_mean_minus_2std, alpha=0.25)
ax[1].fill_between(time, y_pos_mean_plus_2std, y_pos_mean_minus_2std, alpha=0.25)
ax[2].fill_between(time, z_pos_mean_plus_2std, z_pos_mean_minus_2std, alpha=0.25)

ax[0].legend(labels=['Simulation Mean', '$\mu \pm 2 \sigma$'])
ax[1].legend(labels=['Simulation Mean', '$\mu \pm 2 \sigma$'])
ax[2].legend(labels=['Simulation Mean', '$\mu \pm 2 \sigma$'])

ax[0].set_xlabel('time_common')
ax[0].set_ylabel('x_pos_mean')
ax[0].set_title('x_pos_mean')

ax[1].set_xlabel('time_common')
ax[1].set_ylabel('y_pos_mean')
ax[1].set_title('y_pos_mean')

ax[2].set_xlabel('time_common')
ax[2].set_ylabel('z_pos_mean')
ax[2].set_title('z_pos_mean')

fig.savefig("./images/QoIs_u_num.png")