In [47]:
import h5py
import numpy as np
import pandas as pd
import csv 
import matplotlib.pyplot as plt
from scipy.signal import savgol_filter 
from sklearn.preprocessing import normalize
from scipy.signal import welch
from detecta import detect_peaks
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn import preprocessing

import torch
import torch.nn as nn
import torch.nn.functional as F 
import torchvision
import torchvision.transforms as transforms
import torch.optim as optim


f = h5py.File('data.hdf5',  'r')
labels = pd.read_csv('train_labels.csv')['Category']
labels = np.array(labels)
le = preprocessing.LabelEncoder()
classes = ["FM","OQPSK", "BPSK", "8PSK", "AM-SSB-SC", "4ASK", "16PSK", "AM-DSB-SC", "QPSK", "OOK"]
le.fit(classes)

y_transformed = le.transform(labels)

train_data = f['train'] 
test_data = f['test']

train_data = np.array(train_data) #casting data to a numpy array
test_data = np.array(test_data)

train_data_cmplx = train_data[:,:,0] + 1j*train_data[:,:,1]
train_data_abs = abs(train_data_cmplx)
mean = np.mean(train_data_abs, axis=1)

for i in range(train_data.shape[0]):
    train_data_abs[i] = np.subtract(train_data_abs[i], mean[i])
    

In [48]:

def obtain_stats(x):
    
    n = x.shape[0]
    d = x.shape[1]
    mean = np.zeros(n)
    std = np.zeros(n)
    gamma = np.zeros(n)
    beta = np.zeros(n)
    mean_s = np.zeros(n)
    std_s = np.zeros(n)
    gamma_s = np.zeros(n)
    beta_s = np.zeros(n)
    
    for i in range(n):
        s = np.sum(x[i])
        mean[i] = s/d
        std[i] = np.sqrt(np.var(x[i]))
        g = 0
        b = 0
        for j in range(d):
            g += ((x[i][j] - mean[i])/std[i])**3
            b += ((x[i][j] - mean[i])/std[i])**4
        gamma[i] = g/d
        beta[i] = b/d - 3
        
        m_s = 0
        s_s = 0
        g_s = 0
        b_s = 0
        
        for j in range(d):
            m_s += j*x[i][j]
        mean_s[i] = m_s/s
        for j in range(d):
            s_s += ((j - mean_s[i])**2)*x[i][j]
        std_s[i] = np.sqrt(s_s/s)
        for j in range(d):
            g_s += (((j - mean_s[i])/std_s[i])**3)*x[i][j]
            b_s += (((j - mean_s[i])/std_s[i])**4)*x[i][j]
        gamma_s[i] = g_s/s
        beta_s[i] = b_s/s - 3
    
    amp_stats = np.zeros((n, 4))
    shape_stats = np.zeros((n,4))
    for i in range(n):
        amp_stats[i] = [mean[i], std[i], gamma[i], beta[i]]
        shape_stats[i] = [mean_s[i], std_s[i], gamma_s[i], beta_s[i]]
    return amp_stats, shape_stats


def get_psd_values(y_values, f_s):
    f_values, psd_values = welch(y_values, fs=f_s, axis=1)
    return f_values, psd_values




In [49]:

#Obtaining fft values and psd values from train data.
n = train_data.shape[1]
fft_train_data = np.fft.fft(train_data_abs,axis=1)
freq = np.fft.fftfreq(fft_train_data[1].shape[-1])

f_values, psd_values = get_psd_values(train_data_abs, 1)


In [54]:
#calculate amp and shape stats
def obtain_stats(x):
    
    n = x.shape[0]
    d = x.shape[1]
    mean = np.zeros(n)
    std = np.zeros(n)
    gamma = np.zeros(n)
    beta = np.zeros(n)
    mean_s = np.zeros(n)
    std_s = np.zeros(n)
    gamma_s = np.zeros(n)
    beta_s = np.zeros(n)
    
    for i in range(n):
        s = np.sum(x[i])
        mean[i] = s/d
        std[i] = np.sqrt(np.var(x[i]))
        g = 0
        b = 0
        for j in range(d):
            g += ((x[i][j] - mean[i])/std[i])**3
            b += ((x[i][j] - mean[i])/std[i])**4
        gamma[i] = g/d
        beta[i] = b/d - 3
        
        m_s = 0
        s_s = 0
        g_s = 0
        b_s = 0
        
        for j in range(d):
            m_s += j*x[i][j]
        mean_s[i] = m_s/s
        for j in range(d):
            s_s += ((j - mean_s[i])**2)*x[i][j]
        std_s[i] = np.sqrt(s_s/s)
        for j in range(d):
            g_s += (((j - mean_s[i])/std_s[i])**3)*x[i][j]
            b_s += (((j - mean_s[i])/std_s[i])**4)*x[i][j]
        gamma_s[i] = g_s/s
        beta_s[i] = b_s/s - 3
    
    amp_stats = np.zeros((n, 4))
    shape_stats = np.zeros((n,4))
    for i in range(n):
        amp_stats[i] = [mean[i], std[i], gamma[i], beta[i]]
        shape_stats[i] = [mean_s[i], std_s[i], gamma_s[i], beta_s[i]]
    return amp_stats, shape_stats


#previous method of processing data
def preprocess_data():
    
    n = fft_train_data.shape[0]
    fft_train_data_right = fft_train_data
    psd_right = psd_values
    freq_right = freq
    num_features = 66
    num_peaks = 10
    data_mod = np.zeros((n,num_features))
    abs_fft = abs(fft_train_data_right)
    fft_amp_stats, fft_shape_stats = obtain_stats(abs_fft)
    psd_amp_stats, psd_shape_stats = obtain_stats(psd_right)
    
    peak_data = np.zeros((n, num_peaks*3 + num_peaks*2) )
    for j in range(n):
        ind_fft = detect_peaks(abs_fft[j], mpd = 7,mph=10)

        if len(ind_fft) < num_peaks:
            ind_fft = np.concatenate([ind_fft,[-1 for i in range(num_peaks-len(ind_fft))]]) 
        else:
            peaks = np.array([abs_fft[j][i] for i in ind_fft])
            ind = peaks.argsort()[::-1][:num_peaks]
            ind_fft = [ind_fft[i] for i in ind] 
        for i in range(num_peaks):
            if ind_fft[i] == -1:
                peak_data[j][3*i] = 0
                peak_data[j][3*i +1] = 0
                peak_data[j][3*i + 2] = 0
            else:
                peak_data[j][3*i] = freq_right[ind_fft[i]]
                peak_data[j][3*i +1] = abs_fft[j][ind_fft[i]]
                peak_data[j][3*i + 2] = np.angle(fft_train_data_right[j][ind_fft[i]])

        ind_psd = detect_peaks(psd_right[j], mph=0.1)
        if len(ind_psd) < num_peaks:
            ind_psd = np.concatenate([ind_psd,[-1 for i in range(num_peaks-len(ind_psd))]])  
        else:
            peaks = np.array([psd_right[j][i] for i in ind_psd])
            ind = peaks.argsort()[::-1][:num_peaks]
            ind_psd = [ind_psd[i] for i in ind] 
            ind_psd = ind_psd[0:num_peaks]
            
        for i in range(num_peaks, num_peaks*2):
            if  ind_psd[i-num_peaks] ==-1:
                peak_data[j][num_peaks + 2*i] = 0
                peak_data[j][num_peaks + 2*i + 1] = 0
            else:
                peak_data[j][num_peaks + 2*i] = f_values[ind_psd[i-num_peaks]]
                peak_data[j][num_peaks + 2*i + 1] = psd_right[j][ind_psd[i-num_peaks]]
        
#         ind_auto = detect_peaks(cleaned_auto[j], mph=1)
#         if len(ind_auto) < num_peaks:
#             ind_auto = np.concatenate([ind_auto,[-1 for i in range(num_peaks-len(ind_auto))]])  
#         else:
#             ind_auto = ind_auto[0:num_peaks]
#         for i in range(2*num_peaks, 3*num_peaks):
#             if ind_auto[i-2*num_peaks] == -1:
#                 data_mod[j][2*i] = 0
#                 data_mod[j][2*i + 1] = 0
#             else:
#                 data_mod[j][2*i] = ind_auto[i-2*num_peaks]
#                 data_mod[j][2*i + 1] = cleaned_auto[j][ind_auto[i-2*num_peaks]]
        data_mod = np.concatenate((fft_amp_stats, fft_shape_stats,psd_amp_stats, psd_shape_stats, peak_data), axis = 1)
    return data_mod

#main way of processing data for features
def preprocess_data2(fft_train_data, psd_values):
    
    n = fft_train_data.shape[0]
    num_features = 300 + 16 + 50
    data_mod = np.zeros((n,num_features))
    abs_fft = abs(fft_train_data)
    fft_amp_stats, fft_shape_stats = obtain_stats(abs_fft)
    psd_amp_stats, psd_shape_stats = obtain_stats(psd_values)
    
    pca = PCA(300)
    pca.fit(abs_fft)
    new_fft_train_data = pca.transform(abs_fft)
    pca_psd = PCA(50)
    pca_psd.fit(psd_values)
    psd_data = pca_psd.transform(psd_values)
    
    

    data_mod = np.concatenate((fft_amp_stats, fft_shape_stats,psd_amp_stats, psd_shape_stats, new_fft_train_data, psd_data), axis = 1)

    return data_mod


In [51]:
data = preprocess_data2(fft_train_data, psd_values)
X_train, X_test, y_train, y_test = train_test_split(data, y_transformed, test_size=0.10, random_state=42)


In [52]:
batch_size = 32
input_dim = 366 #Curr dimen of data 
output_dim = 10 #There are 10 classes of modulation schemes
hidden_neurons = 254


class NeuralNet(nn.Module):
    def __init__(self,ip_dim,hidden_neuron,output_dim):
        self.ip_dim = ip_dim
        super(NeuralNet,self).__init__()
        self.lin1 = nn.Linear(ip_dim,hidden_neuron) #First hidden layer 
        self.lin2 = nn.Linear(hidden_neuron,output_dim) #Output layer with 10 neurons
        
    def forward(self,input):
        x = input.view(-1,self.ip_dim) #numpy reshape function
        x = F.relu(self.lin1(x))
        x = self.lin2(x)
        return x

model = NeuralNet(input_dim,hidden_neurons,output_dim)

#Fix Loss function 
loss = nn.CrossEntropyLoss()

#Define Optimizer to update weights
optimizer = optim.SGD(model.parameters(), lr=0.00001)

train_data = []
for i in range(len(X_train)):
    train_data.append([X_train[i], y_train[i]])
    
test_data = []
for i in range(len(X_test)):
    test_data.append([X_test[i], y_test[i]])

trainloader = torch.utils.data.DataLoader(train_data, shuffle=True, batch_size = batch_size)
testloader = torch.utils.data.DataLoader(test_data, shuffle=False, batch_size = batch_size, num_workers=2)

#Training function 
def train(epoch, log_interval=10000):
    for batch_idx, data in enumerate(trainloader,0):
        inputs, labels = data
        optimizer.zero_grad() #Always zero gradient buffers because gradient accumalates instead of being replaced
        output = model(inputs.float())
        loss_v = loss(output, labels.long())
        loss_v.backward()
        optimizer.step()
#         if batch_idx % log_interval == 0:
#             print(loss_v.data)
epochs=3000
for epoch in range(1, epochs + 1):
    train(epoch)

correct = 0
total = 0
for data in testloader:
    tests, labels = data
    outputs = model(tests.float())
    _, predicted = torch.max(outputs.data, 1)
    total += labels.size(0)
    correct += (predicted == labels).sum()

print('Accuracy of the network on the %d signals: %f %%' % (
    total, 100.0 * correct / total))

correct = 0
total = 0

for data in trainloader:
    tests, labels = data
    outputs = model(tests.float())
    _, predicted = torch.max(outputs.data, 1)
    total += labels.size(0)
    correct += (predicted == labels).sum()

print('Accuracy of the network on the training %d signals: %f %%' % (
    total, 100.0 * correct / total))


Accuracy of the network on the 3000 signals: 43.033333 %
Accuracy of the network on the training 27000 signals: 57.000000 %


In [55]:
#preprocessing of test data
test_data1 = f['test']
test_data1 = np.array(test_data1)
test_data1 = test_data1[:,:,0] + 1j*test_data1[:,:,1]
test_data_abs = abs(test_data1)
mean = np.mean(test_data_abs, axis = 1)
for i in range(test_data1.shape[0]):
    test_data_abs[i] = np.subtract(test_data_abs[i], mean[i])
    
n = test_data1.shape[1]
fft_test_data = np.fft.fft(test_data_abs,axis=1)
freq = np.fft.fftfreq(fft_test_data[1].shape[-1])

test_f_values, test_psd_values = get_psd_values(test_data_abs, 1)
data_test = preprocess_data2(fft_test_data, test_psd_values)

In [42]:
#using model to predict test data and export to a file name "predictions.csv"
predict = []
test_data = []

for i in range(len(data_test)):
    test_data.append([data_test[i],i])

testloader = torch.utils.data.DataLoader(test_data, shuffle=False,  num_workers=2)

i = 0
for data in testloader:
    tests, labels = data
    outputs = model(tests.float())
    _, predicted = torch.max(outputs.data, 1)
    for i in predicted:
        predict.append(i)

predict_words = le.inverse_transform(predict)
data1 = np.zeros((predict_words.shape[0], 2), dtype='object')
for i in range(predict_words.shape[0]):
    data1[i][0] = i
    data1[i][1] = predict_words[i]

file = open('predictions.csv', 'a+', newline ='') 
  
# writing the data into the file 
with file:     
    write = csv.writer(file) 
    write.writerows(data1) 

(20000,)
