# Family Classification


The purpose of this notebook is to train our new model (ResNet1-size?) in order to classify signal family from impaired data

### Import Libraries

In [1]:
import torch
from torch.utils.data import DataLoader
from matplotlib import pyplot as plt
from tqdm import tqdm
import torch.nn.functional as F
import torchsig.transforms as ST
import numpy as np
import torchsig
import os
import shutil
from torch.utils.data import Subset
from sigfam import Sig53
from torch.utils.data import DataLoader
import torch.nn.functional as F
from sklearn.metrics import classification_report, confusion_matrix
import numpy as np
from Train_evaluate import *
from modeling import *
from torch import nn, optim
from torchsummary import summary


----
### Data reading

In [2]:
selected_classes = [
    "psk",
    "qam",
    "ofdm"
    ]

modulation_mapping = {
    "psk": 0,
    "qam": 1,
    "ofdm": 2
}

In [3]:
class_family_dict = {
        "bpsk": "psk",
        "qpsk": "psk",
        "8psk": "psk",
        "16psk": "psk",
        "32psk": "psk",
        "64psk": "psk",
        "16qam": "qam",
        "32qam": "qam",
        "32qam_cross": "qam",
        "64qam": "qam",
        "128qam_cross": "qam",
        "256qam": "qam",
        "512qam_cross": "qam",
        "1024qam": "qam",
        "ofdm-64": "ofdm",
        "ofdm-72": "ofdm",
        "ofdm-128": "ofdm",
        "ofdm-180": "ofdm",
        "ofdm-256": "ofdm",
        "ofdm-300": "ofdm",
        "ofdm-512": "ofdm",
        "ofdm-600": "ofdm",
        "ofdm-900": "ofdm",
        "ofdm-1024": "ofdm",
        "ofdm-1200": "ofdm",
        "ofdm-2048": "ofdm",
    }

In [4]:
# Specify Sig53 Options
root = "../../../../data/torchsig/sigfam/"
train = True
impaired = True
class_list = list(Sig53._idx_to_name_dict.values())
transform = ST.Compose([
    ST.RandomPhaseShift(phase_offset=(-1, 1)),
    ST.Normalize(norm=np.inf),
    ST.ComplexTo2D(),
])
target_transform = ST.DescToClassIndex(class_list=class_list)

# Instantiate the Sig53 Clean Training Dataset
sig53_clean_train = Sig53(
    root=root,
    train=train,
    impaired=impaired,
    transform=transform,
    target_transform=target_transform,
    use_signal_data=True,
)

# Instantiate the Sig53 Clean Validation Dataset

train = False
sig53_clean_val = Sig53(
    root=root,
    train=train,
    impaired=impaired,
    transform=transform,
    target_transform=target_transform,
    use_signal_data=True,
)

# Retrieve a sample and print out information to verify
idx = np.random.randint(len(sig53_clean_train))
data, label = sig53_clean_train[idx]
print("Dataset length: {}".format(len(sig53_clean_train)))
print("Data shape: {}".format(data.shape))
print("Label Index: {}".format(label))
print("Label Class: {}".format(Sig53.convert_idx_to_name(label)))

Dataset length: 200000
Data shape: (2, 4096)
Label Index: 18
Label Class: 32qam


In [5]:
# Initialize a list to hold the new dataset
family_train_dataset = []

# Iterate over the training dataset
for i in range(len(sig53_clean_train)):
    # Retrieve data and label for the current index
    data, label = sig53_clean_train[i]

    # Get the class name from the label index
    class_name = Sig53.convert_idx_to_name(label)

    # Map the class name to the family name
    family_name = class_family_dict[class_name]

    # Convert the family name to its corresponding index
    family_index = modulation_mapping[family_name]

    # Append the data and the family index to the new dataset
    family_train_dataset.append((data, family_index))

# Verify the new dataset
print("New dataset length: {}".format(len(family_train_dataset)))
print("First sample data shape: {}".format(family_train_dataset[0][0].shape))

# Retrieve the family name using the index
first_sample_family_index = family_train_dataset[0][1]
family_name_by_index = {v: k for k, v in modulation_mapping.items()}[first_sample_family_index]

print("First sample family name: {}".format(family_name_by_index))
print("First sample family index: {}".format(first_sample_family_index))


New dataset length: 200000
First sample data shape: (2, 4096)
First sample family name: psk
First sample family index: 0


----
### Dataloaders

In [6]:
# Calculate the size of the validation set (10% of the training set)
val_size = int(0.1 * len(sig53_clean_train))
train_size = len(sig53_clean_train) - val_size

# Split the training dataset into training and validation datasets
train_dataset, val_dataset = torch.utils.data.random_split(family_train_dataset, [train_size, val_size])

# Create data loaders for training, validation, and testing
train_dataloader = DataLoader(
    dataset=train_dataset,
    batch_size=32,
    num_workers=8,
    shuffle=True,
    drop_last=True,
)
val_dataloader = DataLoader(
    dataset=val_dataset,
    batch_size=16,
    num_workers=8,
    shuffle=False,
    drop_last=True,
)
test_dataloader = DataLoader(
    dataset=sig53_clean_val,
    batch_size=16,
    num_workers=8,
    shuffle=False,
    drop_last=True,
)

In [7]:
inputs, _ = next(iter(train_dataloader))
input_size = inputs.shape[1:]

In [8]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Initialize the model
model = ResNet1D(Bottleneck1D, [3,4,4,3], num_classes=len(selected_classes), in_channels=2).to(device)

# Check if CUDA is available and set the device accordingly
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)


In [9]:


# Set up the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Number of epochs for training
num_epochs = 20

# Assuming you have a ModelTrainer class defined that can handle the training process
trainer = ModelTrainer(model, train_dataloader, val_dataloader, criterion, optimizer, device)

summary(model, input_size=input_size, device=str(device))


----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv1d-1             [-1, 32, 2048]             448
       BatchNorm1d-2             [-1, 32, 2048]              64
            Conv1d-3             [-1, 32, 2048]           1,024
       BatchNorm1d-4             [-1, 32, 2048]              64
            Conv1d-5             [-1, 32, 2048]              96
            Conv1d-6             [-1, 32, 2048]           1,024
DepthwiseSeparableConv1d-7             [-1, 32, 2048]               0
       BatchNorm1d-8             [-1, 32, 2048]              64
            Conv1d-9            [-1, 128, 2048]           4,096
      BatchNorm1d-10            [-1, 128, 2048]             256
AdaptiveAvgPool1d-11               [-1, 128, 1]               0
           Linear-12                    [-1, 8]           1,024
             ReLU-13                    [-1, 8]               0
           Linear-14             

In [10]:
trainer.run_training_loop(num_epochs)

Epoch 1/20


Training: 100%|██████████| 5625/5625 [09:34<00:00,  9.79it/s, loss=0.2690, accuracy=89.19%]
                                                                                                     

Epoch 1: Train Loss 0.2690, Acc 89.19%, Val Loss 0.2305, Val Acc 91.00%
Epoch 2/20


Training: 100%|██████████| 5625/5625 [09:35<00:00,  9.78it/s, loss=0.1725, accuracy=93.01%]
                                                                                                     

Epoch 2: Train Loss 0.1725, Acc 93.01%, Val Loss 0.1525, Val Acc 93.86%
Epoch 3/20


Training: 100%|██████████| 5625/5625 [09:35<00:00,  9.78it/s, loss=0.1529, accuracy=93.75%]
                                                                                                     

Epoch 3: Train Loss 0.1529, Acc 93.75%, Val Loss 0.1438, Val Acc 94.24%
Epoch 4/20


Training: 100%|██████████| 5625/5625 [09:35<00:00,  9.77it/s, loss=0.1428, accuracy=94.20%]
                                                                                                     

Epoch 4: Train Loss 0.1428, Acc 94.20%, Val Loss 0.1485, Val Acc 93.80%
Epoch 5/20


Training: 100%|██████████| 5625/5625 [09:34<00:00,  9.79it/s, loss=0.1335, accuracy=94.50%]
                                                                                                     

Epoch 5: Train Loss 0.1335, Acc 94.50%, Val Loss 0.1760, Val Acc 92.64%
Epoch 6/20


Training: 100%|██████████| 5625/5625 [09:35<00:00,  9.78it/s, loss=0.1272, accuracy=94.77%]
                                                                                                     

Epoch 6: Train Loss 0.1272, Acc 94.77%, Val Loss 0.1251, Val Acc 94.79%
Epoch 7/20


Training: 100%|██████████| 5625/5625 [09:35<00:00,  9.78it/s, loss=0.1222, accuracy=95.02%]
                                                                                                     

Epoch 7: Train Loss 0.1221, Acc 95.02%, Val Loss 0.1483, Val Acc 93.83%
Epoch 8/20


Training: 100%|██████████| 5625/5625 [09:35<00:00,  9.78it/s, loss=0.1171, accuracy=95.18%]
                                                                                                     

Epoch 8: Train Loss 0.1171, Acc 95.18%, Val Loss 0.1164, Val Acc 95.25%
Epoch 9/20


Training: 100%|██████████| 5625/5625 [09:35<00:00,  9.77it/s, loss=0.1126, accuracy=95.30%]
                                                                                                     

Epoch 9: Train Loss 0.1126, Acc 95.30%, Val Loss 0.1111, Val Acc 95.61%
Epoch 10/20


Training: 100%|██████████| 5625/5625 [09:35<00:00,  9.77it/s, loss=0.1081, accuracy=95.53%]
                                                                                                     

Epoch 10: Train Loss 0.1081, Acc 95.53%, Val Loss 0.1204, Val Acc 95.01%
Epoch 11/20


Training: 100%|██████████| 5625/5625 [09:35<00:00,  9.78it/s, loss=0.1066, accuracy=95.55%]
                                                                                                     

Epoch 11: Train Loss 0.1066, Acc 95.55%, Val Loss 0.1334, Val Acc 94.31%
Epoch 12/20


Training: 100%|██████████| 5625/5625 [09:35<00:00,  9.78it/s, loss=0.1038, accuracy=95.69%]
                                                                                                     

Epoch 12: Train Loss 0.1038, Acc 95.69%, Val Loss 0.1102, Val Acc 95.40%
Epoch 13/20


Training: 100%|██████████| 5625/5625 [09:34<00:00,  9.79it/s, loss=0.1023, accuracy=95.74%]
                                                                                                     

Epoch 13: Train Loss 0.1023, Acc 95.74%, Val Loss 0.1148, Val Acc 95.36%
Epoch 14/20


Training: 100%|██████████| 5625/5625 [09:34<00:00,  9.79it/s, loss=0.0982, accuracy=95.94%]
                                                                                                     

Epoch 14: Train Loss 0.0982, Acc 95.94%, Val Loss 0.1161, Val Acc 95.08%
Stopping early due to no improvement in validation accuracy.
Loaded best model from best_model_checkpoint.pth




----
### Call and Train Model
Loading the model structure to be trained

----
### Evaluation

In [13]:
results = evaluate_model(trainer.model, trainer.val_dataloader, trainer.criterion, trainer.device)

labels = results['all_labels']
predictions = results['all_predictions']
plot_metrics(trainer, labels, predictions)

plot_conf_matrix( labels, predictions,selected_classes)

RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


In [15]:
torch.save(trainer.model, './models/family_im_new_m_1')

RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


In [41]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Initialize the model
model = ResNet1D(Bottleneck1D, [3,4,4,3], num_classes=len(selected_classes), in_channels=2).to(device)

# Check if CUDA is available and set the device accordingly
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)

state_dict = torch.load('./models/PSK_im_new_m')
model.load_state_dict(state_dict)

<All keys matched successfully>

### Accuracy based on SNR

In [42]:
def get_snr_bin(snr):
    if snr <= 5:
        return '-2 to 5'
    elif 6 <= snr <= 10:
        return '6 to 10'
    elif 11 <= snr <= 15:
        return '11 to 15'
    elif 16 <= snr <= 20:
        return '16 to 20'
    elif 21 <= snr <= 25:
        return '21 to 25'
    elif 26 <= snr <= 30:
        return '26 to 30'


In [31]:
sig53_metadata = Sig53(
    root=root,
    train=False,
    impaired=impaired,
    transform=None,
    target_transform=None,
)

In [43]:
from collections import defaultdict
# Evaluation per SNR bin
model.eval()
model.to(device)
snr_bins = defaultdict(lambda: {'correct': 0, 'total': 0})

with torch.no_grad():
    for batch_idx, (inputs, labels) in enumerate(test_dataloader):
        # Get corresponding SNR values from the metadata
        start_idx = batch_idx * 16
        end_idx = start_idx + 16
        snrs = [sig53_metadata[idx][1][1] for idx in range(start_idx, end_idx)]

        inputs = inputs.to(device, dtype=torch.float32)
        labels = labels.to(device, dtype=torch.long)

        outputs = model(inputs)
        _, predicted = torch.max(outputs, 1)

        for label, pred, snr in zip(labels, predicted, snrs):
            bin_name = get_snr_bin(snr)
            snr_bins[bin_name]['total'] += 1
            if label == pred:
                snr_bins[bin_name]['correct'] += 1

# Define the bin order
bin_order = ['-2 to 5', '6 to 10', '11 to 15', '16 to 20', '21 to 25', '26 to 30']

# Calculate and print accuracy per SNR bin in order
for bin_name in bin_order:
    if bin_name in snr_bins:
        stats = snr_bins[bin_name]
        accuracy = 100 * stats['correct'] / stats['total']
        count = stats['total']
        print(f"SNR Bin {bin_name}: Accuracy = {accuracy:.2f}% (Count = {count})")
    else:
        print(f"SNR Bin {bin_name}: No data available")


SNR Bin -2 to 5: Accuracy = 40.41% (Count = 2316)
SNR Bin 6 to 10: Accuracy = 51.58% (Count = 1140)
SNR Bin 11 to 15: Accuracy = 69.57% (Count = 1288)
SNR Bin 16 to 20: Accuracy = 79.31% (Count = 1252)
SNR Bin 21 to 25: Accuracy = 86.71% (Count = 1204)
SNR Bin 26 to 30: Accuracy = 88.64% (Count = 1312)


: 

In [16]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Initialize the model
model = ResNet1D(Bottleneck1D, [4,5,5,4], num_classes=len(selected_classes), in_channels=2).to(device)

# Check if CUDA is available and set the device accordingly
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)


RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.
