# Data Loading Notebook
This notebook loads data from dataset file then saves numpy arrays into tensorflow dataset files.

In [0]:
#Reset Button. Press when memory goes crazy fat !
%reset

Once deleted, variables cannot be recovered. Proceed (y/[n])? y


In [0]:
# imports cell
import numpy as np
import _pickle as cPickle
import sys 
import matplotlib.pyplot as plt
import scipy as sp

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer

import tensorflow as tf
import gc


## Understanding data

We use DeepSig Dataset: RadioML 2016.04C<br>
A synthetic dataset, generated with GNU Radio, consisting of 11 modulations. This is a
variable-SNR dataset with moderate LO drift, light fading, and numerous different
labeled SNR increments for use in measuring performance across different signal and
noise power scenarios.

## Loading data

The file is formatted as a "pickle" file which can be open for example in python by using cPickle.load(...)



In [0]:
#Loading the data from google drive 

from google.colab import drive
drive.mount('/content/drive')
filename = "/content/drive/My Drive/RML2016.10b.dat"
open_file = open(filename,'rb')
data = cPickle.load(open_file, encoding='latin1')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


## Loading to Numpy arrays

In [0]:
# Use lists when accessing data from dict for ease of access.
keys_list = list(data.keys())
temp_list = []
label_list = []
for i in range(len(keys_list)):
    curr_item = data[keys_list[i]] 
    temp_list.append(curr_item)
    for j in range(curr_item.shape[0]):
        label_list.append(keys_list[i])
        
# Convert all lists into numpy arrays.
X = np.array(temp_list).reshape(1200000,2,128)
Y = np.array(label_list)

# Clear All un-neccsarry lists created.
temp_list.clear()
label_list.clear()
keys_list.clear()
data.clear()
del(data)
del(temp_list)
del(label_list)
del(keys_list)
del(curr_item)
gc.collect()

119

In [0]:
gc.collect()

0

## Splitting data

Split the data into 50% for training/validation and 50% for testing.

In [0]:
def split_data(X,Y,dataset_name): 
  (trainX, testX, trainY, testY) = train_test_split(X,Y, test_size=0.50, random_state=42)
  lb=LabelBinarizer()
  lb.fit_transform(trainY[:,0])
#   del(testX)
#   del(testY)
#   gc.collect()
  #(trainX, valX, trainY, valY) = train_test_split(trainX,trainY, test_size=0.05, random_state=42)
  del(trainX)
  del(trainY)
  gc.collect()
  np.save('/content/drive/My Drive/'+dataset_name+'_SNR.npy',testY[:,1])
  #return trainX,lb.fit_transform(trainY[:,0]),testX,lb.transform(testY[:,0]),valX,lb.transform(valY[:,0])

  return testX,lb.transform(testY[:,0])

## Creating a tensorflow record

In [0]:
def convert_data_2(data,labels,filename):
   with tf.python_io.TFRecordWriter('/content/drive/My Drive/TFDatasets/'+filename+'.tfrecord') as writer:
            for i in range(data.shape[0]):
                example =convert_signal(data[i],labels[i])  
                writer.write(example.SerializeToString())

def convert_signal_2(signal,label):
  example = tf.train.Example(features = tf.train.Features(feature = {
            'feature1': tf.train.Feature(float_list = tf.train.FloatList(value = signal[0].tolist())),
            'feature2': tf.train.Feature(float_list = tf.train.FloatList(value = signal[1].tolist())),
            'label': tf.train.Feature(int64_list = tf.train.Int64List(value = label.tolist()))
  }))
  return example
  

In [0]:
def convert_data_4(data,labels,filename):
   with tf.python_io.TFRecordWriter('/content/drive/My Drive/TFDatasets/'+filename+'.tfrecord') as writer:
            for i in range(data.shape[0]):
                example =convert_signal_4(data[i],labels[i])  
                writer.write(example.SerializeToString())

def convert_signal_4(signal,label):
  example = tf.train.Example(features = tf.train.Features(feature = {
            'feature1': tf.train.Feature(float_list = tf.train.FloatList(value = signal[0].tolist())),
            'feature2': tf.train.Feature(float_list = tf.train.FloatList(value = signal[1].tolist())),
            'feature3': tf.train.Feature(float_list = tf.train.FloatList(value = signal[2].tolist())),
            'feature4': tf.train.Feature(float_list = tf.train.FloatList(value = signal[3].tolist())),
            'label': tf.train.Feature(int64_list = tf.train.Int64List(value = label.tolist()))
  }))
  return example

In [0]:
def convert_data_6(data,labels,filename):
   with tf.python_io.TFRecordWriter('/content/drive/My Drive/TFDatasets/'+filename+'.tfrecord') as writer:
            for i in range(data.shape[0]):
                example =convert_signal_6(data[i],labels[i])  
                writer.write(example.SerializeToString())

def convert_signal_6(signal,label):
  example = tf.train.Example(features = tf.train.Features(feature = {
            'feature1': tf.train.Feature(float_list = tf.train.FloatList(value = signal[0].tolist())),
            'feature2': tf.train.Feature(float_list = tf.train.FloatList(value = signal[1].tolist())),
            'feature3': tf.train.Feature(float_list = tf.train.FloatList(value = signal[2].tolist())),
            'feature4': tf.train.Feature(float_list = tf.train.FloatList(value = signal[3].tolist())),
            'feature5': tf.train.Feature(float_list = tf.train.FloatList(value = signal[4].tolist())),
            'feature6': tf.train.Feature(float_list = tf.train.FloatList(value = signal[5].tolist())),
            'label': tf.train.Feature(int64_list = tf.train.Int64List(value = label.tolist()))
  }))
  return example

In [0]:
gc.collect()

0

## Create feature Spaces for data

Every sample is presented using two vectors each of them has 128 elements. Those two vectors are the in-phase and quadrature phase components of a sample.

X array consists of those two raw features.

### Raw Features

In [0]:
#split the data into test, train and validation sets.
(trainX, trainY, testX, testY, valX, valY) = split_data(X,Y,'raw')

In [0]:
convert_data(trainX,trainY,'raw_train_data')
convert_data(testX,testY,'raw_test_data')
convert_data(valX,valY,'raw_val_data')

### Integration Features (2 features)
One for the in-phase component and one for quadrature phase component.

In [0]:
int_X = np.apply_along_axis(lambda column:np.hstack((0,column)),2,np.apply_along_axis(lambda column:sp.integrate.cumtrapz(column,dx=1,axis=-1),2,X))

In [0]:
del(X)
gc.collect()

In [0]:
#split the data into test, train and validation sets.
(trainX, trainY, testX, testY, valX, valY) = split_data(int_X,Y,'int')

In [0]:
convert_data(trainX,trainY,'int_train_data')
convert_data(testX,testY,'int_test_data')
convert_data(valX,valY,'int_val_data')

In [0]:
del(int_X)
del(trainX,trainY,testX, testY, valX, valY)
gc.collect()

453

### Derivative Features (2 features)
One for the in-phase component and one for quadrature phase component.

In [0]:
der_X = np.apply_along_axis(lambda column:np.gradient(column),2,X)

In [0]:
del(X)
gc.collect()

0

In [0]:
#split the data into test, train and validation sets.
(trainX, trainY, testX, testY, valX, valY) = split_data(der_X,Y,'der')

In [0]:
convert_data(trainX,trainY,'der_train_data')
convert_data(testX,testY,'der_test_data')
convert_data(valX,valY,'der_val_data')

In [0]:
del(der_X)
del(trainX,trainY,testX, testY, valX, valY)
gc.collect()

288

### Combination 1: Derivative Features + Raw Features (4 features)

In [0]:
com_one_X = np.concatenate((np.apply_along_axis(lambda column:np.gradient(column),2,X),X),axis=1)

In [0]:
#split the data into test, train and validation sets.
(trainX, trainY, testX, testY, valX, valY) = split_data(com_one_X,Y,'com_one')

In [0]:
convert_data_4(trainX,trainY,'com1_train_data')
convert_data_4(testX,testY,'com1_test_data')
convert_data_4(valX,valY,'com1_val_data')

In [0]:
#del(X)
#del(trainX,trainY,testX, testY, valX, valY)
gc.collect()

0

### Combination 2: Integration Features + Raw Features (4 features)

In [0]:
com_two_X = np.concatenate((np.apply_along_axis(lambda column:np.hstack((0,column)),2,np.apply_along_axis(lambda column:sp.integrate.cumtrapz(column,dx=1,axis=-1),2,X)),X),axis=1)

In [0]:
#split the data into test, train and validation sets.
(testX,testY) = split_data(com_two_X,Y,'com_two')

In [0]:
del(X)
#del(trainX,trainY)
gc.collect()

0

In [0]:
gc.collect()

0

In [0]:
#convert_data_4(trainX,trainY,'com2_train_data')
convert_data_4(testX,testY,'com2_test_data')
#convert_data_4(valX,valY,'com2_val_data')

In [0]:
#del(com_two_X)
#del(trainX,trainY,valX, valY)
gc.collect()

0

### Combination 3: Integration Features + Derivative Features (4 features)

In [0]:
com_three_X = np.concatenate((np.apply_along_axis(lambda column:np.gradient(column),2,X)
                              ,np.apply_along_axis(lambda column:np.hstack((0,column)),2,np.apply_along_axis(lambda column:sp.integrate.cumtrapz(column,dx=1,axis=-1),2,X))),axis=1)

In [0]:
del(X)
gc.collect()

0

In [0]:
#split the data into test, train and validation sets.
(testX,testY) = split_data(com_three_X,Y, 'com_three')

In [0]:
gc.collect()

0

In [0]:
#convert_data_4(trainX,trainY,'com3_train_data')
convert_data_4(testX,testY,'com3_test_data')
#convert_data_4(valX,valY,'com3_val_data')

### Combination 4: Integration Features + Derivative Features + Raw Features (6 features)

In [0]:
com_four_X = np.concatenate((np.apply_along_axis(lambda column:np.gradient(column),2,X)
                              ,np.apply_along_axis(lambda column:np.hstack((0,column)),2,np.apply_along_axis(lambda column:sp.integrate.cumtrapz(column,dx=1,axis=-1),2,X)),X),axis=1)

In [0]:
del(X)
gc.collect()

0

In [0]:
dat_int_der_raw=np.load('/content/drive/My Drive/X_rawdiffinteg.npy')

In [0]:
#split the data into test, train and validation sets.
(trainX, trainY, testX, testY, valX, valY) = split_data(dat_int_der_raw,Y)

In [0]:
convert_data(trainX,trainY,'com4_train_data')
convert_data(testX,testY,'com4_test_data')
convert_data(valX,valY,'com4_val_data')