In [None]:
# Fix randomness and hide warnings
seed = 42

import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
os.environ['PYTHONHASHSEED'] = str(seed)
os.environ['MPLCONFIGDIR'] = os.getcwd()+'/configs/'

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=Warning)

import numpy as np
np.random.seed(seed)

import logging

import random
random.seed(seed)

In [None]:
# Import tensorflow
import tensorflow as tf
from tensorflow import keras as tfk
from keras import layers as tfkl
from keras.models import Sequential
from keras.layers import LSTM, Dropout, Dense, RepeatVector, Attention, TimeDistributed, Input, BatchNormalization, multiply, concatenate, Flatten, Activation, Dot, Bidirectional, GRU, Bidirectional, Concatenate
from tensorflow.keras.utils import Sequence
from tensorflow.keras.layers import Layer
from keras import initializers, regularizers, constraints
from tensorflow.keras.regularizers import l2
from tensorflow.keras import backend as K
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping
tf.autograph.set_verbosity(0)
tf.get_logger().setLevel(logging.ERROR)
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
tf.random.set_seed(seed)
tf.compat.v1.set_random_seed(seed)
print(tf.__version__)

In [None]:
tpu = tf.distribute.cluster_resolver.TPUClusterResolver()

tf.tpu.experimental.initialize_tpu_system(tpu)
tpu_strategy = tf.distribute.TPUStrategy(tpu)

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
plt.rc('font', size=16)
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, classification_report
from sklearn.metrics import confusion_matrix
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import LabelEncoder
from keras.utils import to_categorical
from keras.models import Model
from keras.layers import Input, Conv1D, BatchNormalization, Activation, Add, GlobalAveragePooling1D, Dense, MaxPooling1D

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import RobustScaler

In [None]:
training_data = np.load('/kaggle/input/homework2-anndl/training_data.npy')
valid_periods = np.load('/kaggle/input/homework2-anndl/valid_periods.npy')

In [None]:
#training_data = training_data[-10000:]
#valid_periods = valid_periods[-10000:]

In [None]:
print(len(training_data))
print(training_data.shape)

In [None]:
# Calcola la mediana per ogni colonna
median_column1 = np.median(valid_periods[:, 0])
median_column2 = np.median(valid_periods[:, 1])

# Se vuoi calcolare la mediana complessiva per l'intero array
overall_median = np.median(valid_periods)

print("Mediana per la Colonna 1:", median_column1)
print("Mediana per la Colonna 2:", median_column2)

In [None]:
start_before_indices = np.where(valid_periods[:, 0] > median_column1 * 0.7)[0]

#training_data = training_data[start_before_indices, :]
#valid_periods = valid_periods[start_before_indices]

In [None]:
#print(len(start_before_indices))

In [None]:
lengths = valid_periods[:, 1] - valid_periods[:, 0]
average_length = np.mean(lengths)
print(average_length)

In [None]:
# Identify indices of time series shorter than the average length
shorter_indices = np.where(lengths < average_length * 1.5)[0]

# Keep only the shorter time series
training_data = training_data[shorter_indices, :]

# Keep the corresponding start and end indices as well
valid_periods = valid_periods[shorter_indices]

In [None]:
print(len(training_data))
print(training_data.shape)

print(len(valid_periods))
print(valid_periods.shape)

In [None]:
def extract_sequences(array, sequence_length, stride, valid_periods, shuffle=True, seed=1):
    X = []
    y = []
    
    for idx in range(len(array)):
        start_idx, end_idx = valid_periods[idx]
        actual_data = array[idx, start_idx:end_idx]
        
        for i in range(0, len (actual_data), stride):
            if i+sequence_length+18 < len(actual_data):
                X.append(actual_data[i:i+sequence_length])
                y.append(actual_data[i+sequence_length:i+sequence_length+18])
                
    X = np.array(X)
    y = np.array(y)
    
    if shuffle:
        np.random.seed(seed)
        np.random.shuffle(X)
        np.random.seed (seed)
        np.random.shuffle(y)
        
    return X,y

In [None]:
sequence_length = 200
future = 18 # Multiple-Output
stride = 1

In [None]:
X_train, y_train = extract_sequences(training_data, 200, 1, valid_periods)
X_train.shape, y_train.shape

In [None]:
sample_index = 0
feature = 0

plt.figure(figsize=(20,3))
plt.plot(X_train[sample_index,:],color='steelblue',label='X train')
plt.plot(range(sequence_length,sequence_length+future),y_train[sample_index,:],color='orange',label='y train')
plt.title('Time-serie sample')
plt.legend()

In [None]:
print(X_train.shape), print(y_train.shape)

In [None]:
input_shape = X_train.shape[1:]
output_shape = y_train.shape[1:]

input_shape = (200, 1)

In [None]:
def build_lstm_seq2seq_attention(input_shape,n_units):
    input_layer = tfkl.Input(shape=input_shape, name='Input')
    
    encoder_x,encoder_h,encoder_c = tfkl.LSTM(units=n_units, return_sequences=True,return_state=True)(input_layer)
    
    decoder_in = tfkl.RepeatVector(1)(encoder_h)
    
    x = tfkl.LSTM(units=n_units, return_sequences=True,return_state=False)(decoder_in,initial_state=[encoder_h,encoder_c])
    decoder_x = tfkl.Bidirectional(tfkl.LSTM(units=int(n_units/2), return_sequences=True,return_state=False))(x)
    
    attention = tfkl.Dot(axes=[2,2])([decoder_x, encoder_x])
    attention = tfkl.Activation('softmax')(attention)
    context = tfkl.Dot(axes=[2,1])([attention,encoder_x])
    
    concatenated_c = tfkl.Concatenate()([context,decoder_x]) 
    concatenated_c = tfkl.Flatten()(concatenated_c)
    output_layer = tfkl.Dense(18)(concatenated_c)
    
    model = tfk.Model(inputs=input_layer, outputs=output_layer, name='model')
    
    return model

In [None]:
with tpu_strategy.scope():
    model = build_lstm_seq2seq_attention(input_shape,512)
    model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])

In [None]:
model.summary()
tfk.utils.plot_model(model, expand_nested=True)

In [None]:
def check_nan_values(data, dataset_name):
    nan_indices = np.isnan(data)
    nan_count = np.sum(nan_indices)

    if nan_count > 0:
        print(f"NaN values found in {dataset_name} set:")
        nan_positions = np.argwhere(nan_indices)
        for position in nan_positions:
            print(f"At index {position}: {data[position]}")
    else:
        print(f"No NaN values found in {dataset_name} set.")

# Check NaN values in training set
check_nan_values(X_train, "training")

# Check NaN values in test set
check_nan_values(y_train, "test")

In [None]:
from keras.callbacks import ModelCheckpoint

checkpoint = ModelCheckpoint('model_checkpoint.h5',
                             monitor='loss',
                             save_best_only=True,
                             mode='min',
                             verbose=1)

In [None]:
epc = 150
batch_size = 256

es = [
    #tfk.callbacks.EarlyStopping(monitor='val_loss', mode='min', patience=10, restore_best_weights=True),
    tfk.callbacks.ReduceLROnPlateau(monitor='loss', mode='min', patience=5, factor=0.01, min_lr=1e-5),
    checkpoint
]

history = model.fit(X_train,
                    y_train,
                    #validation_split=.2,
                    epochs=epc,
                    verbose=1,
                    callbacks=es,
                    batch_size=batch_size
).history

In [None]:
save_locally = tf.saved_model.SaveOptions(experimental_io_device='/job:localhost')
model.save('./SubmissionModel', options=save_locally)

#model.save('SubmissionModel')

In [None]:
import shutil
shutil.make_archive('SubmissionModel', 'zip', 'SubmissionModel')

from IPython.display import FileLink
FileLink(r'SubmissionModel.zip')