In [1]:
from sklearn.model_selection import train_test_split
from ast import literal_eval
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
import random
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

In [42]:
df = pd.read_csv('/Users/ajayiidowu/PycharmProjects/pythonProject1/data/logged_data.csv')

In [43]:
df.dropna(subset=['I/Q Data'], inplace=True)

In [44]:
df.columns

Index(['Timestamp', 'Frequency', 'Signal Strength', 'Modulation', 'Bandwidth',
       'Location', 'Device Type', 'Antenna Type', 'Temperature', 'Humidity',
       'Wind Speed', 'Precipitation', 'Weather Condition', 'Interference Type',
       'Battery Level', 'Power Source', 'CPU Usage', 'Memory Usage',
       'WiFi Strength', 'Disk Usage', 'System Load', 'Latitude', 'Longitude',
       'Altitude(m)', 'Air Pressure', 'Device Status', 'I/Q Data'],
      dtype='object')

In [45]:
df.dtypes

Timestamp             object
Frequency              int64
Signal Strength        int64
Modulation            object
Bandwidth              int64
Location              object
Device Type           object
Antenna Type          object
Temperature            int64
Humidity               int64
Wind Speed             int64
Precipitation        float64
Weather Condition     object
Interference Type     object
Battery Level        float64
Power Source            bool
CPU Usage            float64
Memory Usage         float64
WiFi Strength          int64
Disk Usage           float64
System Load          float64
Latitude             float64
Longitude            float64
Altitude(m)          float64
Air Pressure         float64
Device Status         object
I/Q Data              object
dtype: object

# Function to convert complex numbers in string format to a pair of real-valued arrays
def complex_str_to_real_array(complex_str):
    # Convert the string representation to a list of complex numbers
    complex_list = np.array(literal_eval(complex_str), dtype=np.complex64)
    # Separate the real and imaginary parts
    real_parts = np.real(complex_list)
    # Concatenate the real and imaginary parts along the last axis
    real_mean = np.mean(real_parts, axis=0)
    return real_mean

# Function to convert complex numbers in string format to a pair of real-valued arrays
def complex_str_to_imag_array(complex_str):
    # Convert the string representation to a list of complex numbers
    complex_list = np.array(literal_eval(complex_str), dtype=np.complex64)
    # Separate the real and imaginary parts
    imag_parts = np.imag(complex_list)
    # Concatenate the real and imaginary parts along the last axis
    imag_mean = np.mean(imag_parts, axis=0)
    return imag_mean

In [47]:
# Function to convert complex numbers in string format to a pair of real-valued arrays
def complex_str_to_concat_array(complex_str):
    # Convert the string representation to a list of complex numbers
    complex_list = np.array(literal_eval(complex_str), dtype=np.complex64)
    # Separate the real and imaginary parts
    abs_value = np.abs(complex_list)
    phase_value = np.angle(complex_list)
    # Concatenate the real and imaginary parts along the last axis
    concatenate = np.concatenate((abs_value,phase_value)).reshape(-1,order='F')
    return concatenate

In [48]:
df_concat = df['I/Q Data'].apply(complex_str_to_concat_array)

In [49]:
df_concat = pd.DataFrame(df_concat.explode().values.reshape (109324,200))

df_IQ_real2 = df_IQ_real2.rename(columns={'I/Q Data' : 'I/Q Real'})

df_IQ_imag2 = df_IQ_imag2.rename(columns={'I/Q Data' : 'I/Q Imag'})

In [50]:
df_new = df.drop('I/Q Data', axis=1)

In [51]:
cat_columns = ['Device Type','Antenna Type','Interference Type','Device Status']

In [52]:
num_columns = ['Frequency','Signal Strength','Bandwidth','WiFi Strength']

In [54]:
df_new_num = df_new[num_columns]

In [61]:
scaler2 = MinMaxScaler()

df_new_num = scaler2.fit_transform(df_new_num)

In [63]:
df_new_num = pd.DataFrame(df_new_num)

In [66]:
df_new_num_comb = pd.concat([df_new_num, df_concat], axis=1)

In [67]:
df_new_num_comb.shape

(109324, 204)

In [68]:
X_numerical = df_new_num_comb.values

In [69]:
print(X_numerical.shape)

(109324, 204)


In [70]:
label = ['Modulation']

In [71]:
for category in cat_columns:
    df[cat_columns] = df[cat_columns].astype('category')

In [73]:
df.dtypes

Timestamp              object
Frequency             float64
Signal Strength       float64
Modulation             object
Bandwidth             float64
Location               object
Device Type          category
Antenna Type         category
Temperature             int64
Humidity                int64
Wind Speed              int64
Precipitation         float64
Weather Condition      object
Interference Type    category
Battery Level         float64
Power Source             bool
CPU Usage             float64
Memory Usage          float64
WiFi Strength         float64
Disk Usage            float64
System Load           float64
Latitude              float64
Longitude             float64
Altitude(m)           float64
Air Pressure          float64
Device Status        category
I/Q Data               object
dtype: object

df['Device Type'].cat.categories

df['Interference Type'].cat.categories

df['Antenna Type'].cat.categories

df['Device Status'].cat.categories

df['Interference Type'].head().cat.codes

In [74]:
from sklearn.preprocessing import StandardScaler, LabelEncoder

label_encoder = LabelEncoder()

In [75]:
df['Device Type'] = label_encoder.fit_transform(df['Device Type'])

df['Interference Type'] = label_encoder.fit_transform(df['Interference Type'])

df['Antenna Type'] = label_encoder.fit_transform(df['Antenna Type'])

df['Device Status'] = label_encoder.fit_transform(df['Device Status'])

In [76]:
df['Device Type'].head()

0    0
1    0
2    1
3    1
4    0
Name: Device Type, dtype: int64

In [77]:
data_label = df['Modulation'].values.reshape(-1, 1)


In [78]:
from sklearn.preprocessing import OneHotEncoder

ohe = OneHotEncoder(handle_unknown='ignore',sparse=False).fit(data_label.reshape(-1, 1))

data_label = ohe.transform(data_label)

In [79]:
print(data_label.shape)

(109324, 6)


In [80]:
X_categorical = df[cat_columns].values

In [81]:
print(X_categorical.shape)

(109324, 4)


In [21]:
data_features_reshape=np.vstack(data_features_reshape).astype(np.float32)

In [117]:
X_numerical_test=np.vstack(X_numerical).astype(np.float32)

In [118]:
X_numerical_test.shape

(109324, 204)

In [119]:
X_numerical_test2 = torch.from_numpy(X_numerical_test)

In [120]:
X_numerical_test2.shape

torch.Size([109324, 204])

In [115]:
X_numerical.shape

(109324, 204)

In [132]:
from torch.utils.data import Dataset
class CustomDataset(Dataset):
    def __init__(self, X_numerical_test2, X_categorical, y):
        # convert into PyTorch tensors and remember them
        self.X_numerical = torch.Tensor(X_numerical_test2)
        self.X_categorical = torch.Tensor(X_categorical)
        self.y = torch.Tensor(y)
 
    def __len__(self):
        # this should return the size of the dataset
        return len(self.y)
 
    def __getitem__(self, idx):
        # this should return one sample from the dataset
        numerical_data = self.X_numerical[idx]
        categorical_data = self.X_categorical[idx].long()
        target = self.y[idx]
        return numerical_data, categorical_data, target

In [133]:
# set up DataLoader for data set
dataset = CustomDataset(X_numerical_test2, X_categorical, data_label)


In [134]:

train_set, val_set = train_test_split(dataset, test_size = 0.2, random_state=1)


In [135]:
print(len(train_set))

87459


In [136]:
val_set, test_set = train_test_split(val_set, test_size = 0.5, random_state=1)

In [137]:
print(len(val_set))
print(len(test_set))

10932
10933


In [138]:
# Create DataLoaders
train_loader = DataLoader(train_set, batch_size=50, shuffle=True)
val_loader = DataLoader(val_set, batch_size=50, shuffle=False)
test_loader = DataLoader(test_set, batch_size=50, shuffle=False)


In [139]:
print(len(train_loader))
print(len(val_loader))
print(len(test_loader))

1750
219
219


for batch_idx, (X_num_batch, X_cat_batch, y_batch) in enumerate(test_loader):
    print(f"Batch {batch_idx + 1}:")
    print(f"X_num_batch.shape: {X_num_batch.shape}")
    print(f"X_cat_batch.shape: {X_cat_batch.shape}")
    print(f"y_batch.shape: {y_batch.shape}")

In [156]:
import torch.nn as nn
class ModulationCNN(nn.Module):
    def __init__(self, num_numerical_features, embedding_size):
        super().__init__()
        
        # Embeddings
        #self.embeddings = nn.ModuleList([nn.Embedding(num, emb_dim) for num, emb_dim in zip(num_categories, emb_dims)])
        self.embeddings = nn.ModuleList([nn.Embedding(num, emb_dim) for num, emb_dim in embedding_size])
        #self.embeddings = nn.ModuleList([nn.Embedding(num, emb_dim) for num, emb_dim in zip(num_categories, embedding_size)])

        num_categorical_cols = sum((nf for ni, nf in embedding_size))
        input_size = num_categorical_cols + num_numerical_features
        
        # Now define the fully connected layers using self.fc1_input_size
        self.conv1 = nn.Conv1d(in_channels=1, out_channels=48, kernel_size=2, stride=2)
        self.batchnorm1 = nn.BatchNorm1d(48)

        self.conv2 = nn.Conv1d(in_channels=48, out_channels=48, kernel_size=2, stride=2)
        self.batchnorm2 = nn.BatchNorm1d(48)
        
        self.conv3 = nn.Conv1d(in_channels=48, out_channels=24, kernel_size=1, stride=1)
        self.batchnorm3 = nn.BatchNorm1d(24)
        
        self.flatten = nn.Flatten()

        self.fc1 = nn.Linear(1320, 120)
        self.batchnorm_fc1 = nn.BatchNorm1d(120)
        
        self.fc2 = nn.Linear(120, 6)

        self.batch_norm_num = nn.BatchNorm1d(204)
        self.af = nn.ReLU()
        self.af_out = nn.Softmax(dim=1)

    def forward(self, X_numerical, X_categorical):
        embeddings = []
        
        for i, e in enumerate(self.embeddings):
            #print(self.embeddings)
            #print(X_categorical)
            
            embeddings.append(e(X_categorical[:,i].int()))
            x = torch.cat(embeddings, 1)
        
        X_numerical = self.batch_norm_num(X_numerical)
        x = torch.cat([x.unsqueeze(2), X_numerical.unsqueeze(2)], 1)
        
        x = self.conv1(x.permute(0,2,1))
        #x = self.af(x)
        x = self.batchnorm1(x)
        x = self.conv2(x)
        #x = self.af(x)
        x = self.batchnorm2(x)
        x = self.conv3(x)
        #x = self.af(x)
        x = self.batchnorm3(x)
        x = x.view(x.size(0), -1)  # Flatten
        x = self.fc1(x)
        #x = self.af(x)
        x = self.batchnorm_fc1(x)
        x = self.fc2(x)
        x = self.af_out(x)
        return x



In [157]:
num_numerical_features = X_numerical.shape[1]
#num_categories = [2, 4, 4, 2]
#num_categories = [len(set(col)) for col in X_categorical.T]
#emb_dim = [2, 2, 2, 2]
#embedding_size = [5, 5, 5, 5]
embedding_size = [(3, 4), (4, 4), (4, 4), (3, 4)]


In [158]:
model = ModulationCNN(num_numerical_features, embedding_size)

In [159]:
#print(model)

In [168]:
import torch.optim as optim

n_epochs = 100

loss_fn = nn.CrossEntropyLoss()
l1_lambda = 0.005
optimizer = optim.Adam(model.parameters(), lr=0.001)
#optimizer = optim.SGD(model.parameters(), lr=0.05)

In [169]:
train_loss_hist = []
train_acc_hist = []

val_loss_hist = []
val_acc_hist = []

In [170]:
print(X_numerical.shape)

(109324, 204)


In [None]:
# training loop
from tqdm import tqdm
import copy


for epoch in range(n_epochs):
    # set model in training mode and run through each batch
    model.train()
    running_train_loss = 0
    running_train_acc = 0
    progress_bar_train = tqdm(enumerate(train_loader))
    for index, (X_batch_num_train, X_batch_cat_train, y_batch_train) in progress_bar_train:
        optimizer.zero_grad()
        # forward pass
        y_pred_train = model(X_batch_num_train, X_batch_cat_train)
        
        """
        l1_reg = 0
        for param in model.parameters():
            l1_reg += torch.abs(param).sum()
        loss_train = loss_fn(y_pred_train, y_batch_train) + l1_lambda*l1_reg
        """
        loss_train = loss_fn(y_pred_train, y_batch_train)
        running_train_loss += loss_train.item()
        # backward pass
        optimizer.zero_grad()
        loss_train.backward()
        # update weights
        optimizer.step()
        
        # compute accuracy metrics
        correct_pred_train = (torch.argmax(y_pred_train, 1) == torch.argmax(y_batch_train, 1)).float()
        acc_train = correct_pred_train.sum() / len(correct_pred_train)
        running_train_acc += acc_train
         
        
        progress_bar_train.set_description(f'Epoch [{epoch+1}/{n_epochs}] Training Loss: {running_train_loss/(index+1):.4f} Training accuracy: {(running_train_acc/(index+1))*100:.2f}%')
    
     # store metrics
    train_loss_hist.append(running_train_loss/(index+1))
    train_acc_hist.append(running_train_acc/(index+1))
    
    model.eval()
    running_val_loss = 0
    running_val_acc = 0
    progress_bar_val = tqdm(enumerate(val_loader)) 
    for index, (X_batch_num_val, y_batch_num_val, y_batch_val) in progress_bar_val:
        with torch.no_grad():
            y_pred_val = model(X_batch_num_val, y_batch_num_val)
            val_test = loss_fn(y_pred_val, y_batch_val)
            running_val_loss += val_test.item()
            
            # Calculate accuracy metric
            correct_pred_val = (torch.argmax(y_pred_val, 1) == torch.argmax(y_batch_val, 1)).float()
            acc_val = correct_pred_val.sum() / len(correct_pred_val)
            running_val_acc += acc_val
            
            progress_bar_val.set_description(f'Epoch [{epoch+1}/{n_epochs}] Validation Loss: {running_val_loss/(index+1):.4f} Validation accuracy: {(running_val_acc/(index+1))*100:.2f}%')
        
    val_loss_hist.append(running_val_loss/(index+1))
    val_acc_hist.append(running_val_acc/(index+1))
    


Epoch [1/100] Training Loss: 1.7931 Training accuracy: 17.04%: : 1750it [00:56, 30.72it/s]
Epoch [1/100] Validation Loss: 1.7934 Validation accuracy: 16.51%: : 219it [00:02, 101.11it/s]
Epoch [2/100] Training Loss: 1.7926 Training accuracy: 16.81%: : 423it [00:14, 60.52it/s]

# Set the model to evaluation mode
model.eval()

# Initialize variables to keep track of correct predictions and total predictions
correct_predictions = 0
total_predictions = 0

# Pass the test data through the model
with torch.no_grad():
    for inputs, labels in test_loader:
        test_outputs = model(inputs)

        # Convert the model outputs to predicted classes
        predicted_classes = torch.argmax(test_outputs, dim=1)

        # Update correct and total predictions counters
        correct_predictions += (predicted_classes == labels).sum().item()
        total_predictions += labels.size(0)

# Calculate the accuracy
accuracy = correct_predictions / total_predictions
print(f'Test Accuracy: {accuracy * 100:.2f}%')

In [36]:
#1. calculate the right ACC
#2. shuffle the order of batch?
#3. loss function
#4. activation function: softmax for outputlayer

In [37]:
#remove pooli