In [None]:
#---This program is used to forecast 1 week CDRs based on 55 previous days' data

#---Install these bibs so the program works properly
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers
from tensorflow.keras.layers import *
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.losses import MeanSquaredError
from tensorflow.keras.losses import MeanAbsoluteError
from tensorflow.keras.metrics import RootMeanSquaredError
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import load_model
from numpy import loadtxt
import matplotlib.pyplot as plt
import os
import pandas as pd
import numpy as np
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error

In [None]:
#--Loading Cell_ID vector, whose elements tell from which ID the matrix_TS row is the corresponding time-series

Cell_ID = loadtxt('ID_labels.csv', delimiter=',') 
df = pd.read_csv('matrixTS.csv',header=None)
df.head(6)


In [None]:
#--Defines the Cell ID to be forecasted, size of training data

days = 62 #Train and validation sizes need to be changed if this value is tunned
matrix = df.to_numpy()

#-- This part is to get the cell ID time-series of IDs that surround 1051 [1050, 1052, 1150, 1151, 1152]
ID_1051=60
cells = [1050, 1052, 1150, 1151, 1152]
pos = []

for item in cells:
    for i in range (0,len(Cell_ID)):
        if item == Cell_ID[i]:
            pos.append(i)
            break
print('1050, 1052, 1150, 1151, 1152:',Cell_ID[pos[0]], Cell_ID[pos[1]], Cell_ID[pos[2]], Cell_ID[pos[3]], Cell_ID[pos[4]])

TS_950 = np.array(loadtxt('CellID950.csv', delimiter=','))
TS_951 = np.array(loadtxt('CellID951.csv', delimiter=','))
TS_952 = np.array(loadtxt('CellID952.csv', delimiter=','))

TS_1050 = matrix[pos[0]][:]
TS_1051 = matrix[ID_1051][:] #-This is the one to predict
TS_1052 = matrix[pos[1]][:]
TS_1150 = matrix[pos[2]][:]
TS_1151 = matrix[pos[3]][:]
TS_1152 = matrix[pos[4]][:]

#--Spliting the data set into Training/Validating/Testing
#--Standardizing the time-series
train_size = 24*50 
validation_size = 24*5

mean_TS_950 = TS_950[:train_size].mean() #--Considering just the training part
std_TS_950 = TS_950[:train_size].std() #--Considering just the training part

mean_TS_951 = TS_951[:train_size].mean() #--Considering just the training part
std_TS_951 = TS_951[:train_size].std() #--Considering just the training part

mean_TS_952 = TS_952[:train_size].mean() #--Considering just the training part
std_TS_952 = TS_952[:train_size].std() #--Considering just the training part

mean_TS_1050 = TS_1050[:train_size].mean() #--Considering just the training part
std_TS_1050 = TS_1050[:train_size].std() #--Considering just the training part

mean_TS_1051 = TS_1051[:train_size].mean() #--Considering just the training part
std_TS_1051 = TS_1051[:train_size].std() #--Considering just the training part

mean_TS_1052 = TS_1052[:train_size].mean() #--Considering just the training part
std_TS_1052 = TS_1052[:train_size].std() #--Considering just the training part

mean_TS_1150 = TS_1150[:train_size].mean() #--Considering just the training part
std_TS_1150 = TS_1150[:train_size].std() #--Considering just the training part

mean_TS_1151 = TS_1151[:train_size].mean() #--Considering just the training part
std_TS_1151 = TS_1151[:train_size].std() #--Considering just the training part

mean_TS_1152 = TS_1152[:train_size].mean() #--Considering just the training part
std_TS_1152 = TS_1152[:train_size].std() #--Considering just the training part


Norm_TS_950 = (TS_950 - mean_TS_950) / std_TS_950
Norm_TS_951 = (TS_951 - mean_TS_951) / std_TS_951
Norm_TS_952 = (TS_952 - mean_TS_952) / std_TS_952

Norm_TS_1050 = (TS_1050 - mean_TS_1050) / std_TS_1050
Norm_TS_1051 = (TS_1051 - mean_TS_1051) / std_TS_1051
Norm_TS_1052 = (TS_1052 - mean_TS_1052) / std_TS_1052

Norm_TS_1150 = (TS_1150 - mean_TS_1150) / std_TS_1150
Norm_TS_1151 = (TS_1151 - mean_TS_1151) / std_TS_1151
Norm_TS_1152 = (TS_1152 - mean_TS_1152) / std_TS_1152

In [None]:
#---Plot of the Cell ID CDR
hour = np.linspace(0,24*days,24*days)

plt.figure(figsize=(15,6))
plt.title('Traffic load of Cell IDs') 
plt.plot(hour,TS_1051,'b',label="Cell ID: %d" % (Cell_ID[ID_1051]))
plt.plot(hour,TS_950,'r',label="Cell ID: 950")
plt.legend(loc='upper right',fontsize=13)
plt.xlabel("Hour")
plt.ylabel("CDR")

## Constructing the 3D tensor and respectives Outputs

In [None]:
tensor = []

for i in range(0,len(TS_950)):
    tensor.append([[Norm_TS_1150[i], Norm_TS_1151[i], Norm_TS_1152[i]], 
                        [Norm_TS_1050[i], Norm_TS_1051[i], Norm_TS_1052[i]],
                        [Norm_TS_950[i], Norm_TS_951[i], Norm_TS_952[i]]])


In [None]:
#---This function creates the sliding window to construct the training data set. 

def df_to_X_y(df,window_size):
    df_as_np = df
    X = []
    y = []
    for i in range(len(tensor)-window_size):
        row = [a for a in df_as_np[i:i+window_size]] 
        X.append(row)
        y.append(df_as_np[i+window_size])
    return np.array(X), np.array(y)

#-X contains 1464 tensors with 24 matrices (24 hours) of dimension 3x3 (IDs surrounding ID 1051- see Figure in the Survey Paper)
#-y contains 1464 matrices of dimension 3x3 used in the training

In [None]:
WINDOW_SIZE = 24
X,y = df_to_X_y(tensor,WINDOW_SIZE)
print('Initial Shape', X.shape, y.shape)

y_flatten = []
for item in y:
    y_flatten.append(item.flatten())

y_flatten = np.array(y_flatten)
print('Final Shape', X.shape, y.shape)

In [None]:
#---Reshape tensor X
X = tf.expand_dims(X,axis=4)

## Splitting data to training and testing

In [None]:
X_train, y_train = X[:train_size], y_flatten[:train_size]
X_val, y_val = X[train_size:(train_size+validation_size)], y_flatten[train_size:(train_size+validation_size)]

X_train.shape, X_val.shape

## Constructing Simple CNN model

In [None]:
X_train[0].shape

In [None]:
model1 = Sequential()

#-Comment or uncomment lines to insert/remove layers of the CNN
#-Change the number of neurons inside each layers

d=1 # Kernel size in time-dimension
model1.add(layers.Conv3D(32, (d, 2, 2), activation='relu', padding='same', input_shape=(24, 3, 3, 1)))
model1.add(layers.AveragePooling3D((2, 2, 2),padding='same'))

model1.add(layers.Conv3D(32, (d, 2, 2), activation='relu',padding='valid'))
model1.add(layers.AveragePooling3D((2, 1, 1),padding='valid'))

model1.add(layers.Flatten())
model1.add(Dense(32,'relu'))
model1.add(Dense(9,'linear'))

model1.summary()


In [None]:
#--Model Training

#-Set the hyperparemeters according to your application: learning_rate, epochs, batch size
model1.compile(loss=MeanSquaredError(), optimizer=Adam(learning_rate=0.0001), metrics=[RootMeanSquaredError()])
model1.fit(X_train,y_train, validation_data=(X_val,y_val),epochs=1000, batch_size=32)

In [None]:
#----Predictions: the prediction of one step becomes input for the next one

input_pred_tensor = np.array(X_val[X_val.shape[0]-1]) # last position of validation tensor
input_pred_tensor = tf.expand_dims(input_pred_tensor,axis=0)
prediction_ID_1051 = []

N_day_pred = 7
for i in range(0,24*N_day_pred):
    prediction = model1.predict(input_pred_tensor)
    new_values = [[[prediction[0][0]],[prediction[0][1]],[prediction[0][2]]],
                 [ [prediction[0][3]],[prediction[0][4]],[prediction[0][5]]],
                 [ [prediction[0][6]],[prediction[0][7]],[prediction[0][8]]]]
    
    prediction_ID_1051.append(prediction[0][4])
    
    #Erase first position and add new values to the last one
    input_pred_tensor = np.array(input_pred_tensor[0][1:])
    input_pred_tensor = input_pred_tensor.tolist()
    input_pred_tensor.append(new_values)
    input_pred_tensor = np.array(input_pred_tensor)
    input_pred_tensor = tf.expand_dims(input_pred_tensor,axis=0)


In [None]:
# Plotting the predictions
prediction_ID_1051 = np.array(prediction_ID_1051)

plt.figure(figsize=(15,6))
plt.title('Prediction for Cell ID = %d (Standardized)' % (Cell_ID[60])) 
plt.plot(hour[(train_size+validation_size):],Norm_TS_1051[(train_size+validation_size):],'b',label="Observed")
plt.plot(hour[(train_size+validation_size):], prediction_ID_1051,'r',label="Predicted")
plt.legend(loc='upper right',fontsize=13)


In [None]:
#---Non-standardized prediction
NN_prediction_ID_1051 = prediction_ID_1051*std_TS_1051+mean_TS_1051

In [None]:
#---Plotting the predictions
prediction_ID_1051 = np.array(prediction_ID_1051)

plt.figure(figsize=(15,6))
plt.title('Prediction for Cell ID = %d' % (Cell_ID[60])) 
plt.plot(hour[(train_size+validation_size):],TS_1051[(train_size+validation_size):],'b',label="Observed")
plt.plot(hour[(train_size+validation_size):], NN_prediction_ID_1051,'r',label="Predicted")
plt.legend(loc='upper right',fontsize=13)

In [None]:
#---Accuracy results

MAE = mean_absolute_error(NN_prediction_ID_1051,TS_1051[(train_size+validation_size):])
MSE = mean_squared_error(NN_prediction_ID_1051,TS_1051[(train_size+validation_size):])
print('MAE:', "%.3f" % MAE)
print('MSE:', "%.3f" % MSE)