# ITU-ML5G-PS-005-KDDI Training Notebook

In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

In [None]:
# please change your path
cd /content/drive/MyDrive/your/path/ITU-ML5G-PS-005-KDDI-UT-NakaoLab-AI

# Data Load and Preprocess

In [None]:
import gc
import numpy as np
import pandas as pd
from tools.preprocessing import Processor
from tools.generator import ReccurentTrainingGenerator

Please change training mode 
- If you want to train the model with all metrics, set training mode to all.
- If you want to remove 0 metrics and train the model, set training mode to removed.
- If you want to train a model using differences, set training mode to diff.
- If you want to train a model using feature importance, set training mode to RF and set  metrics num to the number of features you want to use

In [None]:
training_mode = "all"
metrics_num = 1500

In [None]:
train_0 = pd.read_csv('data/ML5G-PS-005_train_0.csv', index_col=0, header=0)
train_1 = pd.read_csv('data/ML5G-PS-005_train_1.csv', index_col=0, header=0)
train_2 = pd.read_csv('data/ML5G-PS-005_train_2.csv', index_col=0, header=0)
train_3 = pd.read_csv('data/ML5G-PS-005_train_3.csv', index_col=0, header=0)
train_4 = pd.read_csv('data/ML5G-PS-005_train_4.csv', index_col=0, header=0)
train_5 = pd.read_csv('data/ML5G-PS-005_train_5.csv', index_col=0, header=0)
train_data = pd.concat([train_0, train_1, train_2, train_3, train_4, train_5])
del train_0, train_1, train_2, train_3, train_4, train_5

test_0 = pd.read_csv('data/ML5G-PS-005_test_0.csv', index_col=0, header=0)
test_1 = pd.read_csv('data/ML5G-PS-005_test_1.csv', index_col=0, header=0)
test_2 = pd.read_csv('data/ML5G-PS-005_test_2.csv', index_col=0, header=0)
test_data = pd.concat([test_0, test_1, test_2])
del test_0, test_1, test_2
gc.collect()

In [None]:
preprocessor = Processor(train_data, test_data)

In [None]:
if training_mode == 'all':
  X, Y, test_X, test_Y = preprocessor.get_scaled_data()
elif training_mode == 'removed':
  X, Y, test_X, test_Y = preprocessor.get_removed_data()
elif training_mode == 'diff':
  X, Y, test_X, test_Y = preprocessor.get_diff_data()
selif training_mode == 'cadvisor':
  X, Y, test_X, test_Y = preprocessor.get_cadvisor_data()

In [None]:
train_X = X[:500*70]
train_Y = Y[:500*70]
val_X = X[500*70:]
val_Y = Y[500*70:]

In [None]:
del train_data, test_data
gc.collect()

# Training

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Input, Dense, LSTM, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

By changing timesteps and delay, you can specify the detection time.

- timesteps is the input sequence size.
- delay represents detection time _t_.

The sum of timesteps adn delay should not exceed 60 because the total represents the first time of output. 

Combination of timesteps and delay

- timesteps : 15, delay : 35
- timesteps : 15, delay : 40
- timesteps : 15, delay : 45
- timesteps : 14, delay : 46
- timesteps : 13, delay : 47
- timesteps : 12, delay : 48

In [None]:
timesteps = 15
delay = 45
batch_size = 64
input_dim = X.shape[-1]

In [None]:
train_generator = ReccurentTrainingGenerator(train_X, train_Y, batch_size, timesteps, delay)
val_generator = ReccurentTrainingGenerator(val_X, val_Y, batch_size, timesteps, delay)

In [None]:
input = Input(shape=(timesteps, input_dim))
lstm = LSTM(256, input_shape=(timesteps, input_dim), return_sequences=True)(input)
lstm = LSTM(256)(lstm)
dropout = Dropout(0.5)(lstm)
dense = Dense(1)(dropout)
model = Model(inputs=input, outputs=dense)
model.compile(loss='mean_squared_error', optimizer='adam')
model.summary()

In [None]:
history = model.fit(x=train_generator, epochs=15, verbose=1, validation_data=val_generator)

In [None]:
# Please change delay to metrics_num if you train RF model
model.save('models/'+training_mode+'/LSTM_ITU_'+str(delay))