# DNN approach for EsTA

Author: Goodsol Lee, NETLAB, Seoul National University
        Junseok Kim, NETLAB, Seoul National University


## Get datasets

In [1]:
# These are all the modules we'll be using later. Make sure you can import them
# before proceeding further.
from __future__ import print_function
import numpy as np
import tensorflow as tf
from six.moves import cPickle as pickle
from six.moves import range
import os

#configuration for gpu usage
conf = tf.ConfigProto()
# you can modify below as you want
#conf.gpu_options.per_process_gpu_memory_fraction = 0.4
#conf.gpu_options.allow_growth = True
os.environ['CUDA_VISIBLE_DEVICES']='0'
print(tf.__version__)

1.12.0


<font size="5">Sample training data, validation data, test data from raw data</font>

In [2]:
node_num = 155
num_RSSI_sample = 20
scs = 30

data_set = np.empty((num_RSSI_sample))
channel_label_set = np.empty((1))
TA_label_set = np.empty((1))

for node_index in range(1,node_num+1):
    print('Get sample from node'+str(node_index))
    file_name = 'data-'+str(scs)+'/node-'+str(node_index)+'.txt'
    previous_cellId = -1
    cell_sample = list()
    cell_sample_set = list()
    
    with open(file_name, 'r') as f:
        while 1:
            line = f.readline()
            if not line: break
            parsed_line =line.split(' ')
            
            rssi = parsed_line[9]
            channel_state = parsed_line[8]
            current_cellId = parsed_line[10]
            TA_region = parsed_line[11]
            
            if previous_cellId == current_cellId:
                cell_sample.append([rssi, channel_state, TA_region])
            else:
                previous_cellId = current_cellId
                if cell_sample:
                    cell_sample_set.append(cell_sample)
                    cell_sample = list()
        
        cell_sample_set.append(cell_sample)
        sample_length = 0
        for i in range(len(cell_sample_set)):
            sample_length += len(cell_sample_set[i])-num_RSSI_sample+1
        
        print(sample_length)
        
        temp_data_set = np.zeros((sample_length,num_RSSI_sample))
        temp_channel_label_set = np.zeros((sample_length,1))
        temp_TA_label_set = np.zeros((sample_length,1))
        
        iter_start = 0
        iter_end = 0
        for i in range(len(cell_sample_set)):
            cell_sample = cell_sample_set[i]
            cell_sample = np.array(cell_sample)
            iter_start = iter_end
            iter_end = iter_end + len(cell_sample_set[i])-num_RSSI_sample+1
            for j in range(len(cell_sample_set[i])-num_RSSI_sample+1):
                temp_data_set[iter_start+j] = cell_sample[j:j+num_RSSI_sample,0]
                temp_channel_label_set[iter_start+j] = cell_sample[j+num_RSSI_sample-1,1]
                temp_TA_label_set[iter_start+j] = cell_sample[j+num_RSSI_sample-1,2] 
              
    if node_index == 1:
        data_set = temp_data_set
        channel_label_set = temp_channel_label_set
        TA_label_set = temp_TA_label_set
    else:
        data_set = np.append(data_set,temp_data_set,axis = 0)
        channel_label_set = np.append(channel_label_set,temp_channel_label_set,axis = 0)
        TA_label_set = np.append(TA_label_set,temp_TA_label_set,axis=0)
    
print('Sampling is completed, sample length: ',data_set.shape[0])

Get sample from node1
25703
Get sample from node2
15563
Get sample from node3
25310
Get sample from node4
18562
Get sample from node5
26542
Get sample from node6
21153
Get sample from node7
30533
Get sample from node8
52925
Get sample from node9
26561
Get sample from node10
21729
Get sample from node11
21502
Get sample from node12
16722
Get sample from node13
33341
Get sample from node14
23348
Get sample from node15
52122
Get sample from node16
34743
Get sample from node17
26342
Get sample from node18
29130
Get sample from node19
22342
Get sample from node20
24542
Get sample from node21
41922
Get sample from node22
40882
Get sample from node23
11781
Get sample from node24
23942
Get sample from node25
27941
Get sample from node26
42326
Get sample from node27
29342
Get sample from node28
20762
Get sample from node29
24161
Get sample from node30
25164
Get sample from node31
35146
Get sample from node32
34142
Get sample from node33
25161
Get sample from node34
20541
Get sample from node35


In [3]:
zeroidx = np.where(data_set==0)

print(zeroidx)

print(np.max(TA_label_set))

for i in range(1):
    print('-----------------------')
    

(array([], dtype=int64), array([], dtype=int64))
7.0
-----------------------


In [4]:
import random

data_length = data_set.shape[0]
valid_num = 100000
test_num = 100000

#get test set
test_index = random.sample(range(0,data_length-test_num),test_num)
test_data = data_set[test_index]
test_channel_label = channel_label_set[test_index]
test_TA_label = TA_label_set[test_index]

#get training set/validation set
train_data_set = np.delete(data_set,test_index,axis=0)
train_channel_label_set = np.delete(channel_label_set,test_index,axis=0)
train_TA_label_set = np.delete(TA_label_set,test_index,axis=0)
data_length = train_data_set.shape[0]

valid_index = random.sample(range(0,data_length),valid_num)

training_data = train_data_set
training_channel_label = train_channel_label_set
training_TA_label = train_TA_label_set

valid_data = train_data_set[valid_index]
valid_channel_label = train_channel_label_set[valid_index]
valid_TA_label = train_TA_label_set[valid_index]

save_data ={
    'training_data':training_data,
    'training_channel_label':training_channel_label,
    'training_TA_label':training_TA_label,
    
    'valid_data':valid_data,
    'valid_channel_label':valid_channel_label,
    'valid_TA_label':valid_TA_label,
    
    'test_data':test_data,
    'test_channel_label':test_channel_label,
    'test_TA_label':test_TA_label
}

In [5]:
# get name of the data path
data_path = 'data-'+str(scs)+'/save_data'+str(num_RSSI_sample)+'.pickle'

In [6]:
# save data
with open(data_path,'wb') as f:
    pickle.dump(save_data,f,pickle.HIGHEST_PROTOCOL)

In [7]:
# restore data
with open(data_path,'rb') as f:
    save = pickle.load(f)
    
    train_dataset = save['training_data']
    train_channel_labels = save['training_channel_label']
    train_labels = save['training_TA_label']
    
    valid_dataset = save['valid_data']
    valid_channel_labels = save['valid_channel_label']
    valid_labels = save['valid_TA_label']
    
    test_dataset = save['test_data']
    test_channel_labels = save['test_channel_label']
    test_labels = save['test_TA_label']
    
    del save  # hint to help gc free up memory
    print('Training set', train_dataset.shape, train_labels.shape)
    print('Validation set', valid_dataset.shape, valid_labels.shape)
    print('Test set', test_dataset.shape, test_labels.shape)

Training set (3954728, 20) (3954728, 1)
Validation set (100000, 20) (100000, 1)
Test set (100000, 20) (100000, 1)


In [8]:
from sklearn.preprocessing import OneHotEncoder
enc = OneHotEncoder()

def reformat(dataset, labels):
    dataset = dataset.reshape((-1, num_RSSI_sample)).astype(np.float32)
    # Map 0 to [1.0, 0.0, 0.0 ...], 1 to [0.0, 1.0, 0.0 ...]
    enc.fit(labels)
    labels = enc.transform(labels).toarray()
    return dataset, labels
train_dataset, train_labels = reformat(train_dataset, train_labels)
valid_dataset, valid_labels = reformat(valid_dataset, valid_labels)
test_dataset, test_labels = reformat(test_dataset, test_labels)
print('Training set', train_dataset.shape, train_labels.shape)
print('Validation set', valid_dataset.shape, valid_labels.shape)
print('Test set', test_dataset.shape, test_labels.shape)

In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.


Training set (3954728, 20) (3954728, 8)
Validation set (100000, 20) (100000, 8)
Test set (100000, 20) (100000, 8)


In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.
In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.


In [9]:
sample_size = num_RSSI_sample
num_labels = np.max(TA_label_set)+1

layer1_output_num = 200
layer2_output_num = 200
layer3_output_num = 200
layer4_output_num = 200
layer5_output_num = 200
num_steps = 100
batch_size = 1024

graph_gs=tf.Graph()
with graph_gs.as_default():
    tf_dataset_gs=tf.placeholder(tf.float32, shape=(None, sample_size))
    tf_labels_gs=tf.placeholder(tf.float32, shape=(None, num_labels))
    keep_prob = tf.placeholder(tf.float32, shape=(None))
    is_train = tf.placeholder(tf.bool)
    
    lambda_reg = 0.000000000001
        
    #Regularization
    regularizer = tf.contrib.layers.l2_regularizer(scale=lambda_reg)
    initializer = tf.contrib.layers.variance_scaling_initializer(dtype=tf.float32)
    #neural network consists of two lines
    dense1 = tf.layers.dense(tf_dataset_gs, layer1_output_num, kernel_regularizer=regularizer)
    dense1 = tf.nn.dropout(dense1, keep_prob = keep_prob)
    dense1 = tf.layers.batch_normalization(dense1, training=is_train)
    
    dense2 = tf.layers.dense(dense1,layer2_output_num, activation=tf.nn.relu, kernel_regularizer=regularizer)
    dense2 = tf.nn.dropout(dense2, keep_prob = keep_prob)
    dense2 = tf.layers.batch_normalization(dense2, training=is_train)
    
    dense3 = tf.layers.dense(dense2 ,layer3_output_num, activation=tf.nn.relu, kernel_regularizer=regularizer)
    dense3 = tf.nn.dropout(dense3, keep_prob = keep_prob)
    dense3 = tf.layers.batch_normalization(dense3, training=is_train)
    
    dense4 = tf.layers.dense(dense3 ,layer4_output_num, activation=tf.nn.relu, kernel_regularizer=regularizer)
    dense4 = tf.nn.dropout(dense4, keep_prob = keep_prob)
    dense4 = tf.layers.batch_normalization(dense4, training=is_train)
    
    dense5 = tf.layers.dense(dense4 ,layer4_output_num, activation=tf.nn.relu, kernel_regularizer=regularizer)
    dense5 = tf.nn.dropout(dense5, keep_prob = keep_prob)
    dense5 = tf.layers.batch_normalization(dense4, training=is_train)
    logits_gs = tf.layers.dense(dense3,num_labels, activation=None)
    
    #Loss
    loss_gs = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=tf_labels_gs, logits=logits_gs))
    
    global_step = tf.Variable(0, trainable=False)
    starter_learning_rate = 0.01
    decay_num =  50*(int)(train_labels.shape[0]/batch_size)
    learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step, decay_num, 1, staircase=True)
    # Optimizer
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    with tf.control_dependencies(update_ops):
        optimizer_gs = tf.train.AdamOptimizer(learning_rate).minimize(loss_gs, global_step=global_step)
    
    #Predictions for the training
    prediction_gs = tf.nn.softmax(logits_gs)

In [10]:
def accuracy(predictions, labels):
    #original 
    #return (100.0 * np.sum(np.equal(np.argmax(predictions, 1), np.argmax(labels, 1)))
    #      / predictions.shape[0])
    #timing error limit requirement
    return (100.0 * np.sum(np.equal(np.argmax(predictions, 1), np.argmax(labels, 1))+
                           np.equal(np.argmax(predictions, 1), np.argmax(labels, 1)+1)+
                           np.equal(np.argmax(predictions, 1), np.argmax(labels, 1)-1))/predictions.shape[0])   

tf.reset_default_graph()
batch_num = (int)(train_labels.shape[0]/batch_size)

with tf.Session(graph=graph_gs, config=conf) as session_gs:
    tf.global_variables_initializer().run()
    print("Initialized")
    for step in range(num_steps):
        random = np.random.permutation(train_labels.shape[0])
        loss = 0
        for b in range(batch_num):
            batch_data = train_dataset[random[b*batch_size:(b+1)*batch_size]]
            batch_labels = train_labels[random[b*batch_size:(b+1)*batch_size]].astype(float)
            feed_dict_gs = {tf_dataset_gs: batch_data, tf_labels_gs: batch_labels, keep_prob:1, is_train:True}
            _, l_gs, predictions_l = session_gs.run([optimizer_gs, loss_gs, prediction_gs], feed_dict=feed_dict_gs)
            loss += l_gs
        print("Minibatch loss at step %d: %f" % (step, loss))
        
        temp_acc = 0
        valid_batch_num = (int)(valid_labels.shape[0]/1000)
        for i in range(valid_batch_num):
            valid_prediction_gs = session_gs.run(prediction_gs, feed_dict={tf_dataset_gs: valid_dataset[i*1000:(i+1)*1000], tf_labels_gs: valid_labels[i*1000:(i+1)*1000], keep_prob:1, is_train: False})
            temp_acc += accuracy(valid_prediction_gs, valid_labels[i*1000:(i+1)*1000])
        print("Validation accuracy: %.1f%%" %(temp_acc/valid_batch_num))
        
    test_acc = 0
    test_batch_num = (int)(test_labels.shape[0]/1000)
    for i in range(test_batch_num):
        feed_dict_test_gs = {tf_dataset_gs: test_dataset[i*1000:(i+1)*1000], keep_prob:1.0, is_train: False}
        test_prediction_gs = session_gs.run(prediction_gs, feed_dict=feed_dict_test_gs)
        test_acc += accuracy(test_prediction_gs, test_labels[i*1000:(i+1)*1000])
    print("Test accuracy: %.1f%%" % (test_acc/test_batch_num))
    saver = tf.train.Saver()
    saver.save(session_gs, "./model_checkpoints/centralized_sample"+str(sample_size))

Initialized
Minibatch loss at step 0: 3270.693318
Validation accuracy: 95.2%
Minibatch loss at step 1: 3227.908419
Validation accuracy: 96.3%
Minibatch loss at step 2: 3215.467755
Validation accuracy: 98.0%
Minibatch loss at step 3: 3207.573943
Validation accuracy: 98.3%
Minibatch loss at step 4: 3200.917734
Validation accuracy: 98.3%
Minibatch loss at step 5: 3189.303746
Validation accuracy: 98.7%
Minibatch loss at step 6: 3176.897088
Validation accuracy: 98.7%
Minibatch loss at step 7: 3172.828338
Validation accuracy: 98.7%
Minibatch loss at step 8: 3168.722084
Validation accuracy: 98.8%
Minibatch loss at step 9: 3162.638945
Validation accuracy: 98.9%
Minibatch loss at step 10: 3158.684735
Validation accuracy: 98.8%
Minibatch loss at step 11: 3155.552126
Validation accuracy: 98.8%
Minibatch loss at step 12: 3154.032474
Validation accuracy: 98.9%
Minibatch loss at step 13: 3152.083965
Validation accuracy: 98.9%
Minibatch loss at step 14: 3151.096395
Validation accuracy: 98.7%
Minibatc