# Import libraries

In [46]:
config = {
    "model": "lstm", # "lstm", "random_forest", "gru
    "shuffle": False,
    "augmentation": ["from_utils"], # "shifting", "from_utils"
    "validation": True,
    "save_model": True
}

In [61]:
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from Utils import *
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
import os

In [48]:
# verify if GPU is available
gpu_available = tf.test.is_gpu_available()
is_cuda_gpu_available = tf.test.is_gpu_available(cuda_only=True)
is_cuda_gpu_min_3 = tf.test.is_gpu_available(True, (3,0))
print("gpu_available: ", gpu_available, "\tis_cuda_gpu_available: ", is_cuda_gpu_available)

gpu_available:  True 	is_cuda_gpu_available:  True


2023-12-15 23:57:22.200951: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:306] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2023-12-15 23:57:22.200967: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:272] Created TensorFlow device (/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)
2023-12-15 23:57:22.201514: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:306] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2023-12-15 23:57:22.201522: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:272] Created TensorFlow device (/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)
2023-12-15 23:57:22.201561: I tensor

# Import data

In [49]:
# Load the data
training_data = np.load('training_dataset/training_data.npy')
valid_periods = np.load('training_dataset/valid_periods.npy')
categories = np.load('training_dataset/categories.npy')

# Preprocess data

## Shuffle data

In [50]:
def shuffle_data(data, valid_periods, categories):
    permutations = np.random.permutation(data.shape[0])
    data = data[permutations]
    valid_periods = valid_periods[permutations]
    categories = categories[permutations]
    return data, valid_periods, categories

if config["shuffle"]:
    training_data, valid_periods, categories = shuffle_data(training_data, valid_periods, categories)

## Build sequences

In [51]:
dataset, labels = build_sequences_optimized(training_data, valid_periods)

Dataset shape:  (48000, 200)


## Split dataset

In [52]:
if config["validation"]:
    train_validation_data, train_validation_labels, test_data, test_labels = split_dataset(dataset, labels)
    train_data, train_labels, validation_data, validation_labels = split_dataset(train_validation_data, train_validation_labels)
else:
    train_data, train_labels, test_data, test_labels = split_dataset(dataset, labels)

Train data shape:  (38400, 200)
Test data shape:  (9600, 200)
Train data shape:  (30720, 200)
Test data shape:  (7680, 200)


## Data augmentation

In [53]:
def augment_shifting(data, valid_periods, categories, amount_of_augmentations=8000, max_augmentation=0.3):
    training_data_lasts = data[-amount_of_augmentations:]
    valid_periods_lasts = valid_periods[-amount_of_augmentations:]
    categories_lasts = categories[-amount_of_augmentations:]

    data = np.concatenate((training_data, training_data_lasts))
    valid_periods = np.concatenate((valid_periods, valid_periods_lasts))
    categories = np.concatenate((categories, categories_lasts))

    for sequence in training_data[-amount_of_augmentations:]:
        num = np.random.ranf()
        while num > max_augmentation:
            num = np.random.ranf()
        sequence += num
    
    return data, valid_periods, categories

import numpy as np

def augment_window_slice_and_scale(data, window_size, scale_range):
    augmented_data = []
    for series in data:
        # Randomly select a window
        start_idx = np.random.randint(0, len(series) - window_size)
        end_idx = start_idx + window_size

        # Extract the windowed segment
        window = series[start_idx:end_idx]

        # Randomly scale the window
        scale_factor = np.random.uniform(scale_range[0], scale_range[1])
        scaled_window = window * scale_factor

        augmented_data.append(scaled_window)

    return np.array(augmented_data)

if "shifting" in config["augmentation"]:
    training_data, valid_periods, categories = augment_shifting(training_data, valid_periods, categories)
if "scaling" in config["augmentation"]:
    training_data = augment_window_slice_and_scale(training_data, 200, (0.5, 1.5))
if "from_utils" in config["augmentation"]:
    train_data, train_labels = augment_data(train_data, train_labels)
    

Augmentation round:  0
Augmentation round:  1
Augmentation round:  2


# Models

## LSTM

In [54]:
if config["model"] == "lstm":
    # Build LSTM model
    model = Sequential()
    model.add(LSTM(50, input_shape=(200, 1)))
    model.add(Dense(18))
    
    model.compile(optimizer='adam', loss='mean_squared_error')
    
    callbacks = []
    
    # Train the model
    if config["validation"]:
        from keras.callbacks import EarlyStopping
        callbacks.append(tf.keras.callbacks.EarlyStopping(patience=10, monitor='val_loss'))
        model.fit(train_data, train_labels, validation_data=(validation_data, validation_labels), epochs=100, batch_size=32, callbacks=callbacks)
    else:
        model.fit(train_data, train_labels, epochs=25, batch_size=32, callbacks=callbacks)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100


## Random Forest

In [55]:
if config["model"] == "random_forest": # non funziona
    # Train a forecasting model
    model = RandomForestRegressor(n_estimators=100, random_state=42)
    model.fit(train_data, train_labels)

## GRU

In [56]:
if config["model"] == "gru":
    from tensorflow.keras.layers import GRU, Dense
    # Build GRU model
    model = Sequential()
    model.add(GRU(64, input_shape=(200, 1)))
    model.add(Dense(18))
    
    model.compile(optimizer='adam', loss='mean_squared_error')
    
    # Train the model
    model.fit(train_data, train_labels, epochs=25, batch_size=32)

# Evaluate model

In [62]:
telescope_musk = 9
# Make predictions on the test data
predictions = model.predict(test_data)
final_predictions = predictions[:, :telescope_musk] if telescope_musk != 18 else predictions
final_test_labels = test_labels[:, :telescope_musk] if telescope_musk != 18 else test_labels

result = evaluate_model(final_test_labels, final_predictions)

MAE: 0.09900879912018257
MSE: 0.02170533407850327
RMSE: 0.14732730255625828


In [63]:
if config["save_model"]:
    file_name = config["model"]
    if config["shuffle"]:
        file_name += "_shuffled"
    if config["augmentation"]:
        file_name += "_augmented"
    final_file_name = file_name
    count = 1
    while os.path.exists("models/" + final_file_name + ".h5"):
        count += 1
        final_file_name = file_name + f"_V{count}"
    
    model.save("models/" + final_file_name + ".h5")
    # save text file with model name
    with open("models/" + final_file_name + ".txt", "w") as f:
        f.write(result)
    print("Model saved as: " + final_file_name + ".h5")

  saving_api.save_model(


TypeError: write() argument must be str, not None