In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:76% !important; }</style>"))

In [2]:
import os
import sys
import tensorflow as tf
import keras_tuner as kt
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from tqdm.notebook import trange, tqdm
from pickle import (
    dump,
    load
)
from tensorflow.keras import (
    layers,
    Model,
    Sequential,
    optimizers
)
from pprint import pprint
from scipy import stats
from sklearn.preprocessing import StandardScaler, MinMaxScaler



In [3]:
BASE_DIR = "./"
DATA_FOLDER_NAME = "data"
DATA_FOLDER_PATH = os.path.join(BASE_DIR, DATA_FOLDER_NAME)
MODELS_FOLDER_NAME = "models"
MODELS_FOLDER_PATH = os.path.join(BASE_DIR, MODELS_FOLDER_NAME)
TUNERS_FOLDER_NAME = "tuners"
TUNERS_FOLDER_PATH = os.path.join(BASE_DIR, TUNERS_FOLDER_NAME)


OUTPUT_DATA_COLUMNS = ["position_x", "position_y", "position_z"]
INPUT_DATA_COLUMNS = ["angular_acceleration_x", "angular_acceleration_y", "angular_acceleration_z",
                      "angular_velocity_x", "angular_velocity_y", "angular_velocity_z",
                      "linear_acceleration_x", "linear_acceleration_y", "linear_acceleration_z",
                      "linear_velocity_x", "linear_velocity_y", "linear_velocity_z",
                      "orientation_x", "orientation_y", "orientation_z", "orientation_w", "motor_state_timestamp",
                      "barometer_altitude", "barometer_pressure", "barometer_qnh", "barometer_timestamp",
                      "magnetometer_magnetic_field_body_x", "magnetometer_magnetic_field_body_y",
                      "magnetometer_magnetic_field_body_x", "magnetometer_timestamp",
                      "rotor_a_speed", "rotor_a_thrust", "rotor_a_torque_scaler",
                      "rotor_b_speed", "rotor_b_thrust", "rotor_b_torque_scaler",
                      "rotor_c_speed", "rotor_c_thrust", "rotor_c_torque_scaler",
                      "rotor_d_speed", "rotor_d_thrust", "rotor_d_torque_scaler",
                      "rotor_timestamp"
                     ]

TIMESTAMP_COLUMNS = [
    "motor_state_timestamp",
    "barometer_timestamp",
    "magnetometer_timestamp",
    "rotor_timestamp"
]


INPUT_SEQUENCE_COLUMNS = ["angular_acceleration_x", "angular_acceleration_y", "angular_acceleration_z",
                          "linear_acceleration_x", "linear_acceleration_y", "linear_acceleration_z",
                          "orientation_x", "orientation_y", "orientation_z", "orientation_w", "motor_state_timestamp",
                          "barometer_altitude", "barometer_pressure", "barometer_qnh", "barometer_timestamp",
                          "magnetometer_magnetic_field_body_x", "magnetometer_magnetic_field_body_y",
                          "magnetometer_magnetic_field_body_x", "magnetometer_timestamp",
                          "rotor_a_speed", "rotor_a_thrust", "rotor_a_torque_scaler",
                          "rotor_b_speed", "rotor_b_thrust", "rotor_b_torque_scaler",
                          "rotor_c_speed", "rotor_c_thrust", "rotor_c_torque_scaler",
                          "rotor_d_speed", "rotor_d_thrust", "rotor_d_torque_scaler",
                          "rotor_timestamp"
                         ]
OUTPUT_SEQUENCE_COLUMNS = ["position_x", "position_y", "position_z"]
MAIN_TIMESTAMP_COLUMN = "motor_state_timestamp"
INPUT_SEQUENCE_LENGTH = 100


In [4]:
def save_model_with_scalers_binary(model, scaler_x, scaler_y, model_name: str):
    """
    Saves models with the x, y scaler objects to a binary library using pickle library
    """
    model_file_name = f"{model_name}_model.pkl"
    model_file_path = os.path.join(MODELS_FOLDER_PATH, model_file_name)
    scaler_x_file_name = f"{model_name}_scaler_x.pkl"
    scaler_x_file_path = os.path.join(MODELS_FOLDER_PATH, scaler_x_file_name)
    scaler_y_file_name = f"{model_name}_scaler_y.pkl"
    scaler_y_file_path = os.path.join(MODELS_FOLDER_PATH, scaler_y_file_name)

    with open(model_file_path, "wb") as file:
        dump(model, file)

    with open(scaler_x_file_path, "wb") as file:
        dump(scaler_x, file)

    with open(scaler_y_file_path, "wb") as file:
        dump(scaler_y, file)
        
def load_model_with_scalers_binary(model_name: str):
    """
    Saves models with the x, y scaler objects to a binary library using pickle library
    """
    model_file_name = f"{model_name}_model.pkl"
    model_file_path = os.path.join(MODELS_FOLDER_PATH, model_file_name)
    scaler_x_file_name = f"{model_name}_scaler_x.pkl"
    scaler_x_file_path = os.path.join(MODELS_FOLDER_PATH, scaler_x_file_name)
    scaler_y_file_name = f"{model_name}_scaler_y.pkl"
    scaler_y_file_path = os.path.join(MODELS_FOLDER_PATH, scaler_y_file_name)

    with open(model_file_path, "rb") as file:
        model = load(file)

    with open(scaler_x_file_path, "rb") as file:
        scaler_x = load(file)

    with open(scaler_y_file_path, "rb") as file:
        scaler_y = load(file)

    return model, scaler_x, scaler_y

In [5]:
def split_data(data: np.array):
    """
    Splits data into train, dev and test
    :return:
    """
    data_len = len(data)

    train, dev, test = np.split(data, [int(.7 * data_len), int(.95 * data_len)])

    return train, dev, test


def _convert_timestamp_to_interval_seconds(flight_input_df: pd.DataFrame, timestamp_columns: list):
    """
    Converts the timestamp fields into the amount of seconds between each two timestamps

    Note: each timestamp represents the amount eof NANO seconds (1,000,000,000 nanoseconds = 1 seconds)
    """
    # Converts the start time to time interval
    next_time_df = flight_input_df[timestamp_columns].shift(-1)
    time_diff_df = (next_time_df - flight_input_df[timestamp_columns]) / 1_000_000_000
    flight_input_df.loc[:, timestamp_columns] = time_diff_df
    return flight_input_df


def _convert_location_to_step(flight_output_df: pd.DataFrame):
    next_coordinates_df = flight_output_df.shift(-1)
    coordinate_diff = flight_output_df - next_coordinates_df

    return coordinate_diff


def load_flight_steps_from_file(csv_name: str, input_columns: list, output_columns: list):
    """

    @param csv_name:
    @param input_columns:
    @param output_columns:
    @return:
    """
    if not csv_name.endswith("csv"):
        raise ValueError(f"File with unsupported extension, expected csv (file: {csv_name})")

    csv_path = os.path.join(DATA_FOLDER_PATH, csv_name)
    flight_df = pd.read_csv(csv_path)

    x_df = flight_df[input_columns].copy()
    timestamp_columns = [column for column in input_columns if column in TIMESTAMP_COLUMNS]
    x_df = _convert_timestamp_to_interval_seconds(x_df, timestamp_columns)

    y_df = flight_df[output_columns].copy()
    y_df = _convert_location_to_step(y_df)

    # Drops the last record because the process is based of difference
    x_df.drop(x_df.tail(1).index, inplace=True)
    y_df.drop(y_df.tail(1).index, inplace=True)

    return x_df, y_df

In [6]:
def load_preprocessed_flight_sequences(input_columns: list, output_columns: list, sequence_length:int):
    """
    Loads flight steps and orders it to sequences of sequence_length length.
    In order to feed it to rnn/lstm s2s model

    @param input_columns: The input columns
    @param output_columns: The outputs columns
    @param sequence_length: Target sequence length
    @return:
    """
    all_csv_files = os.listdir(DATA_FOLDER_PATH)

    # x, y data from all flight sessions
    x_sessions = []
    y_sessions = []

    # The data feed to the rnn model
    sequences_x = []
    sequences_y = []

    for csv_name in all_csv_files:
        try:
            x_df, y_df = load_flight_steps_from_file(csv_name, input_columns, output_columns)

            x_sessions.append(x_df.to_numpy())
            y_sessions.append(y_df.to_numpy())

        except ValueError as error:
            print(str(error))

    all_x_data = np.concatenate(x_sessions)
    all_y_data = np.concatenate(y_sessions)

    # creating normalizers
    scaler_x = MinMaxScaler()
    scaler_y = MinMaxScaler()
    
    scaler_x.fit(all_x_data)
    scaler_y.fit(all_y_data)

    for session_data_x, session_data_y in zip(x_sessions, y_sessions):
        normalized_data_x = scaler_x.transform(session_data_x)
        steps_amount = normalized_data_x.shape[0]
        normalized_data_y = scaler_y.transform(session_data_y)

        # Splits the data into data sequences
        for offset in range(steps_amount - sequence_length):
            sequences_x.append(normalized_data_x[offset: offset + sequence_length, :])
            sequences_y.append(np.add.accumulate(normalized_data_y[offset: offset + sequence_length, :]))
        
    return sequences_x, sequences_y, scaler_x, scaler_y

def load_preprocessed_rnn_dataset(input_columns: list, output_columns: list):
    """
    Loads the whole dataset with preprocessing

    @param input_columns: Input columns names
    @param output_columns: Output columns names
    @param sequence_length: The size of input x sequence
    @return: Loaded, preprocessed, shuffled, splitted data set
    """
    flight_data_x, flight_data_y, scaler_x, scaler_y = load_preprocessed_flight_sequences(input_columns, output_columns,
                                                                                         INPUT_SEQUENCE_LENGTH)

    # flight_data_x, flight_data_y = shuffle_data_set(flight_data_x, flight_data_y)
    
    train_x, dev_x, test_x = split_data(flight_data_x)
    train_y, dev_y, test_y = split_data(flight_data_y)
    
    return train_x, train_y, dev_x, dev_y, scaler_x, scaler_y


In [7]:
train_x, train_y, dev_x, dev_y, scaler_x, scaler_y = \
    load_preprocessed_rnn_dataset(INPUT_SEQUENCE_COLUMNS, OUTPUT_SEQUENCE_COLUMNS)


In [8]:
def create_model(input_columns_amount: int, output_columns_amount: int, sequence_length: int) -> Model:
    """
    Creates LSTM model
    @return:
    """
    input_layer = layers.Input(shape=(None,input_columns_amount))
    layer = layers.LSTM(512, return_sequences=True)(input_layer)
    output_layer = layers.Dense(output_columns_amount)(layer)
    
    model = Model(inputs=input_layer, outputs=output_layer)
    model.compile(loss='mean_squared_error',
                  optimizer=optimizers.Adam(learning_rate=0.01))

    return model



def create_model_dynamic(hp: kt.HyperParameters) -> Model:
    """
    Creates LSTM model using the input hyperparameters chose by keras_tuner
    """
    input_layer = layers.Input(shape=(None, hp.get("input_columns_amount")))
    lstm_layer_1_units = hp.Int("lstm_layer_1_units", min_value=8, max_value=256, step=8)
    layer = layers.LSTM(lstm_layer_1_units, return_sequences=True)(input_layer)
    output_layer = layers.Dense(hp.get("output_columns_amount"))(layer)
    
    model = Model(inputs=input_layer, outputs=output_layer)
    model.compile(loss='mean_squared_error',
                  optimizer=optimizers.Adam(learning_rate=0.001, clipvalue=.5))

    return model

def _create_regulator(hp: kt.HyperParameters, layer_name: str):
    regulator_type = kernels = hp.Choice(f"{layer_name}_regulator_type", values=["l1", "l2"])
    kernels = hp.Choice("{layer_name}_regulator_kernel", values=[0.01,0.001,0.1,0.005,0.05])
    if regulator_type == "l1":
        return tf.keras.regularizers.l1(kernels)
    return tf.keras.regularizers.l2(kernels)
        
 
def create_model_dynamic(hp: kt.HyperParameters) -> Model:
    """
    Creates LSTM model using the input hyperparameters chose by keras_tuner
    """

    input_layer = layers.Input(shape=(None, hp.get("input_columns_amount")))
    
    lstm_layer_1_units = hp.Int("lstm_layer_units", min_value=32, max_value=256, step=1)
    lstm_props = {}
    if hp.Boolean(f"lstm_regulator"):
        lstm_props["kernel_regularizer"] = _create_regulator(hp, "lstm")
        
    layer = layers.LSTM(lstm_layer_1_units, return_sequences=True, **lstm_props)(input_layer)
    if hp.Boolean(f"lstm_dropout"):
        dropout_rate = hp.Float(f"lstm_dropout_rate", min_value=0.0, max_value=0.4, step=0.05)
        layer = layers.Dropout(dropout_rate)(layer)
    
    dense_layers = hp.Int("dense_layers_amount", min_value=0, max_value=2)
    dense_activation = hp.Choice("dense_activation", ["sigmoid", "relu", "sigmoid", "relu", "tanh"])

    for layer_index in range(1, dense_layers + 1):
        layer_name = f"dense_{layer_index}"
        units = hp.Int(f"{layer_name}_units", min_value=16, max_value=128, step=1)
        
        dense_props = {}
        if hp.Boolean(f"{layer_name}_regulator"):
            dense_props["kernel_regularizer"] = _create_regulator(hp, layer_name)
        
        layer = layers.Dense(units, activation=dense_activation, **dense_props)(layer)

        if hp.Boolean(f"dense_{layer_index}_dropout"):
            dropout_rate = hp.Float(f"dense_{layer_index}_dropout_rate", min_value=0, max_value=0.4, step=0.05)
            layer = layers.Dropout(dropout_rate)(layer)

    output_layer = layers.Dense(hp.get("output_columns_amount"))(layer)

    learning_rate = hp.Choice("adam_learning_rate", [0.1, 0.2, 0.05, 1e-2,2e-2, 1e-3,2e-3,3e-3, 5e-3, 5e-4])
    if hp.Boolean(f"learning_rate_decay"):
        learning_rate = optimizers.schedules.ExponentialDecay(
            initial_learning_rate=learning_rate,
            decay_steps=10000,
            decay_rate=0.9)
    
    model = Model(inputs=input_layer, outputs=output_layer)
    
    model.compile(loss='mean_squared_error',
                  optimizer=optimizers.Adam(learning_rate=learning_rate))
    
    return model
    

In [9]:
FORCE_CPU_RUN = False
if FORCE_CPU_RUN:
    print("###########################")
    tf.config.set_visible_devices([], 'GPU')

In [10]:

model = create_model(len(INPUT_SEQUENCE_COLUMNS), len(OUTPUT_SEQUENCE_COLUMNS), INPUT_SEQUENCE_LENGTH)

# history = model.fit(train_x,
#           train_y,
#           epochs=100,
#           batch_size=256,
#           validation_data=(dev_x, dev_y))

2022-01-07 00:57:31.430484: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-01-07 00:57:31.435341: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-01-07 00:57:31.435862: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-01-07 00:57:31.436962: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compil

In [11]:
# plt.plot(all_train_loss)
# plt.plot(all_dev_loss) 


In [12]:
# plt.plot(all_train_loss[2:])
# plt.plot(all_dev_loss[2:]) 

In [13]:
# save_model_with_scalers_binary(model, scaler_x, scaler_y, "s2s_model_static")

In [14]:
from settings import TUNERS_FOLDER_PATH
import copy 


hp = kt.HyperParameters()
hp.Fixed("input_columns_amount", len(INPUT_SEQUENCE_COLUMNS))
hp.Fixed("output_columns_amount", len(OUTPUT_SEQUENCE_COLUMNS))

project_name = "s2s_exp1_6Jan22"
tuner = kt.BayesianOptimization(
    create_model_dynamic,
    hyperparameters=hp,
    tune_new_entries=True,
    objective="val_loss",
    max_trials=1000,
    directory=TUNERS_FOLDER_PATH,
    project_name=project_name,
)

INFO:tensorflow:Reloading Oracle from existing project /home/israzex/Desktop/drone_homecoming_rl/artificial_gps/tuners/s2s_exp1_6Jan22/oracle.json
INFO:tensorflow:Reloading Tuner from /home/israzex/Desktop/drone_homecoming_rl/artificial_gps/tuners/s2s_exp1_6Jan22/tuner0.json


In [15]:
model = tuner.get_best_models(num_models=10)[0]
save_model_with_scalers_binary(model, scaler_x, scaler_y, "s2s_100seq_1")



2022-01-07 00:57:34.672443: W tensorflow/python/util/util.cc:368] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.


INFO:tensorflow:Assets written to: ram://03dc120a-4081-4cab-97bd-3e2af19e796b/assets


INFO:tensorflow:Assets written to: ram://03dc120a-4081-4cab-97bd-3e2af19e796b/assets


In [None]:
tensorboard_dir = os.path.join(os.path.join(TUNERS_FOLDER_PATH,project_name), "tensorboard")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=tensorboard_dir, 
                                                      histogram_freq=1)
early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, baseline=80)

tuner.search(train_x,
            train_y,
             validation_data=(dev_x, dev_y),
             epochs = 1000,
             batch_size=512,
             callbacks=[early_stop, tensorboard_callback])

# tuner.search(train_x,
#             train_y,
#              validation_data=(dev_x, dev_y),
#              epochs = 1000,
#              batch_size=512,
#              callbacks=[early_stop])

Trial 48 Complete [00h 10m 45s]
val_loss: 12.985352516174316

Best val_loss So Far: 8.593206405639648
Total elapsed time: 00h 10m 45s


2022-01-07 01:08:47.528086: I tensorflow/core/util/cuda_solvers.cc:179] Creating GpuSolver handles for stream 0xe6f7310



Search: Running Trial #49

Hyperparameter    |Value             |Best Value So Far 
input_columns_a...|32                |32                
output_columns_...|3                 |3                 
lstm_layer_units  |256               |256               
lstm_regulator    |False             |False             
lstm_dropout      |False             |True              
dense_layers_am...|0                 |0                 
dense_activation  |tanh              |tanh              
adam_learning_rate|0.0005            |0.0005            
learning_rate_d...|False             |True              
lstm_regulator_...|l1                |l2                
{layer_name}_re...|0.05              |0.05              
lstm_dropout_rate |0.4               |0.4               
dense_1_units     |128               |128               
dense_1_regulator |False             |False             
dense_1_dropout   |True              |True              
dense_2_units     |16                |16                
den

2022-01-07 01:08:50.759697: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 4029849600 exceeds 10% of free system memory.


Epoch 1/1000




Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
137/615 [=====>........................] - ETA: 12s - loss: 14.2968

In [None]:
# %load_ext tensorboard
# os.environ['TENSORBOARD_BINARY'] = '/home/israzex/Desktop/drone_homecoming_rl/venv/bin/tensorboard'
# %tensorboard --logdir "/home/israzex/Desktop/drone_homecoming_rl/artificial_gps/tuners/s2s_exp1_3/tensorboard"


In [None]:
def test_lstm_model_predictions(model_name: str,
                                data_csv_name: str,
                                input_columns: list,
                                output_columns: list,
                                sequence_length: int):
    flight_x_df, flight_y_df = load_flight_steps_from_file(data_csv_name, input_columns, output_columns)
    data_x = flight_x_df.to_numpy()
    real_y = flight_y_df.to_numpy()

    try:
        model, scaler_x, scaler_y = load_model_with_scalers_binary(model_name)
    except FileNotFoundError:
        print(f"There is no model in name: {model_name}")
        return

    normalized_real_x = scaler_x.transform(data_x)
    recording_length = data_x.shape[0]
    
    

    # Splits the data into data sequences
    sequences_x = []
    for offset in range(recording_length - sequence_length):
        sequences_x.append(normalized_real_x[offset: offset + sequence_length, :])

    sequences_x = np.stack(sequences_x)
    predicted_sequence = model.predict(sequences_x)
    predicted_sequence_len = predicted_sequence.shape[0]
    
    
    # opposite operation of np.add.accumulate
    for pred_index in range(predicted_sequence_len):
        for seq_index in range(sequence_length - 1, 0 ,-1) :
            predicted_sequence[pred_index][seq_index] -= predicted_sequence[pred_index][seq_index - 1]
    
    predicted_values = np.zeros(real_y.shape)

    # finds best betta
    betta = 0.0
    best_betta = 0.0
    normalized_real_y = scaler_y.transform(real_y)
    lowerst_mse = sys.float_info.max
    while betta < 1:
        predicted_values = np.zeros(real_y.shape)
        values_in_average = np.zeros(real_y.shape)
        for seq_index in range(predicted_sequence_len):
            values_in_average[seq_index:seq_index + sequence_length] += 1
            predicted_values[seq_index:seq_index + sequence_length] = \
                (betta * predicted_values[seq_index:seq_index + sequence_length] + 
                (1 - betta) * predicted_sequence[seq_index, :]) 
#             / \
#             (1 - np.power(betta, values_in_average[seq_index:seq_index + sequence_length]))

        mse = ((predicted_values - normalized_real_y)**2).mean(axis=0).reshape((3,1))
        if not np.isnan(np.sum(mse)) and np.sum(np.sqrt(np.power(mse, 2))) < np.sum(np.sqrt(np.power(lowerst_mse,2))):
#             if not np.isnan(np.sum(mse)) and np.sum(mse) < np.sum(lowerst_mse):
            lowerst_mse = mse
            best_betta = betta
            print(mse)
        print(betta)
        betta += 0.01
        
    # Inserts all the predicted values using Exponentially Weighted Averages with bias correction
    values_in_average = np.zeros(real_y.shape)
#     betta = 0.94
    betta = best_betta
    for seq_index in range(predicted_sequence_len):
        values_in_average[seq_index:seq_index + sequence_length] += 1
        predicted_values[seq_index:seq_index + sequence_length] = \
            (betta * predicted_values[seq_index:seq_index + sequence_length] + 
            (1 - betta) * predicted_sequence[seq_index, :]) 
    
    
#     normalized_real_y = scaler_y.transform(real_y)
#     print(predicted_values[100:110])
#     print(normalized_real_y[100:110])

#     # Inserts all the predicted values using Exponentially Weighted Averages with bias correction
#     values_in_average = np.zeros(real_y.shape)
#     for seq_index in range(predicted_sequence_len):
#         values_in_average[seq_index:seq_index + sequence_length] += 1
#         predicted_values[seq_index:seq_index + sequence_length] += predicted_sequence[seq_index,:]
#     predicted_values = predicted_values /  values_in_average 
    
    predicted_values = scaler_y.inverse_transform(predicted_values)
    
    print(real_y[100:110])
    print(predicted_values[100:110])
    
    predicted_offset = np.add.accumulate(predicted_values)
    real_offset = np.add.accumulate(real_y)
    
    print(predicted_offset[100:120])
    print(real_offset[100:120])
        
#     print(real_offset)
    time_intervals = flight_x_df[MAIN_TIMESTAMP_COLUMN].to_numpy().reshape(-1, 1)
    time_offset = np.add.accumulate(time_intervals)
    
    return predicted_offset, real_offset


In [None]:
# model = tuner.get_best_models(num_models=10)[0]
# save_model_with_scalers_binary(model, scaler_x, scaler_y, "s2s_model_4")
# model.summary()

In [None]:
trained_on = ["flight_2021:12:26_21:05:34_1_record.csv",
             "create_10_seconds_down_only_flight_record_data.csv",
              "_flight_2021:12:31_22:00:06_record.csv"]
new_data = ["flight_2021:12:31_22:03:16_record.csv",
            "flight_2021:12:28_00:25:05_1_record.csv"]

predicted_offset, real_offset = test_lstm_model_predictions("s2s_50seq_1",
                                                                             trained_on[0],
                                                                             INPUT_SEQUENCE_COLUMNS,
                                                                             OUTPUT_SEQUENCE_COLUMNS,
                                                                             INPUT_SEQUENCE_LENGTH)
# predicted_offset, real_offset, time_offset  = test_model_predictions("s2s_model_static",
#                        trained_on[0],
#                        INPUT_SEQUENCE_COLUMNS,
#                        OUTPUT_SEQUENCE_COLUMNS)

In [None]:
normalized_real_y[0:10]


In [None]:
predicted_values[0:10]

In [None]:
plt.plot(predicted_offset[:,0])
plt.plot(real_offset[:, 0])

In [None]:
plt.plot(predicted_offset[:,1])
plt.plot(real_offset[:, 1])

In [None]:
plt.plot(predicted_offset[:,2])
plt.plot(real_offset[:, 2])

In [None]:
model_name = "s2s_50seq_1"
data_csv_name = trained_on[0],
input_columns = INPUT_SEQUENCE_COLUMNS,
output_columns = OUTPUT_SEQUENCE_COLUMNS,
sequence_length = INPUT_SEQUENCE_LENGTH

flight_x_df, flight_y_df = load_flight_steps_from_file(data_csv_name, input_columns, output_columns)
data_x = flight_x_df.to_numpy()
real_y = flight_y_df.to_numpy()

try:
    model, scaler_x, scaler_y = load_model_with_scalers_binary(model_name)
except FileNotFoundError:
    print(f"There is no model in name: {model_name}")
    return

normalized_real_x = scaler_x.transform(data_x)
recording_length = data_x.shape[0]



# Splits the data into data sequences
sequences_x = []
for offset in range(recording_length - sequence_length):
    sequences_x.append(normalized_real_x[offset: offset + sequence_length, :])

sequences_x = np.stack(sequences_x)
predicted_sequence = model.predict(sequences_x)
predicted_sequence_len = predicted_sequence.shape[0]


# opposite operation of np.add.accumulate
for pred_index in range(predicted_sequence_len):
    for seq_index in range(sequence_length - 1, 0 ,-1) :
        predicted_sequence[pred_index][seq_index] -= predicted_sequence[pred_index][seq_index - 1]

predicted_values = np.zeros(real_y.shape)

# finds best betta
betta = 0.0
best_betta = 0.0
normalized_real_y = scaler_y.transform(real_y)
lowerst_mse = sys.float_info.max
while betta < 1:
    predicted_values = np.zeros(real_y.shape)
    values_in_average = np.zeros(real_y.shape)
    for seq_index in range(predicted_sequence_len):
        values_in_average[seq_index:seq_index + sequence_length] += 1
        predicted_values[seq_index:seq_index + sequence_length] = \
            (betta * predicted_values[seq_index:seq_index + sequence_length] + 
            (1 - betta) * predicted_sequence[seq_index, :]) 
#             / \
#             (1 - np.power(betta, values_in_average[seq_index:seq_index + sequence_length]))

    mse = ((predicted_values - normalized_real_y)**2).mean(axis=0).reshape((3,1))
    if not np.isnan(np.sum(mse)) and np.sum(np.sqrt(np.power(mse, 2))) < np.sum(np.sqrt(np.power(lowerst_mse,2))):
#             if not np.isnan(np.sum(mse)) and np.sum(mse) < np.sum(lowerst_mse):
        lowerst_mse = mse
        best_betta = betta
        print(mse)
    print(betta)
    betta += 0.01

# Inserts all the predicted values using Exponentially Weighted Averages with bias correction
values_in_average = np.zeros(real_y.shape)
#     betta = 0.94
betta = best_betta
for seq_index in range(predicted_sequence_len):
    values_in_average[seq_index:seq_index + sequence_length] += 1
    predicted_values[seq_index:seq_index + sequence_length] = \
        (betta * predicted_values[seq_index:seq_index + sequence_length] + 
        (1 - betta) * predicted_sequence[seq_index, :]) 


#     normalized_real_y = scaler_y.transform(real_y)
#     print(predicted_values[100:110])
#     print(normalized_real_y[100:110])

#     # Inserts all the predicted values using Exponentially Weighted Averages with bias correction
#     values_in_average = np.zeros(real_y.shape)
#     for seq_index in range(predicted_sequence_len):
#         values_in_average[seq_index:seq_index + sequence_length] += 1
#         predicted_values[seq_index:seq_index + sequence_length] += predicted_sequence[seq_index,:]
#     predicted_values = predicted_values /  values_in_average 

predicted_values = scaler_y.inverse_transform(predicted_values)

print(real_y[100:110])
print(predicted_values[100:110])

predicted_offset = np.add.accumulate(predicted_values)
real_offset = np.add.accumulate(real_y)

print(predicted_offset[100:120])
print(real_offset[100:120])

#     print(real_offset)
time_intervals = flight_x_df[MAIN_TIMESTAMP_COLUMN].to_numpy().reshape(-1, 1)
time_offset = np.add.accumulate(time_intervals)

return predicted_offset, real_offset
