In [38]:
from sklearn.model_selection import train_test_split
from ast import literal_eval
from torch.utils.data import Dataset, DataLoader
import torch
import h5py
from torch.utils.data import DataLoader, TensorDataset, random_split

In [39]:
file_path = 'F:\ISEP_Learning_Document\Semester3\End-of-track Project\dataset\data_raw\GOLD_XYZ_OSC.0001_1024.hdf5'

In [40]:
# Open my file in read mode
file_raw = h5py.File(file_path,'r')

In [41]:
# access the datasets within the HDF5 File
data_raw_x = file_raw['X'][:]
data_raw_y = file_raw['Y'][:]
data_raw_z = file_raw['Z'][:]

In [43]:
# Convert the hdf5 dataset to a PyTorch tensor
X_tensor = torch.from_numpy(data_raw_x).float()
Y_tensor = torch.from_numpy(data_raw_y).long()
Z_tensor = torch.from_numpy(data_raw_z).long()

In [44]:
# The information about dataset X
print("Tensor Dataset_X Information:")
print("--------------------------")

# Check the data type of the tensor dataset
print(f"Data type: {X_tensor.type()}")

# Get the shape (dimensions) of the data
print(f"Data shape: {X_tensor.shape}")

# Check the number of elements in the dataset
print(f"Number of elements: {X_tensor.numel()}")

# Access the first few elements of the data
print("First few data elements:")
print(X_tensor[:20])

Tensor Dataset_X Information:
--------------------------
Data type: torch.FloatTensor
Data shape: torch.Size([2555904, 1024, 2])
Number of elements: 5234491392
First few data elements:
tensor([[[ 0.0420,  0.2348],
         [-0.2729,  0.4051],
         [-0.2671,  0.2275],
         ...,
         [-0.7056, -0.2869],
         [-0.4116,  0.6683],
         [ 0.0649,  0.6358]],

        [[ 1.1986,  0.4494],
         [ 0.4854,  0.2827],
         [ 0.8679, -0.3319],
         ...,
         [-1.2129, -0.6429],
         [-0.3810,  0.7936],
         [ 0.0864,  1.0922]],

        [[-0.2235,  0.1814],
         [ 0.0620, -0.9343],
         [ 0.7195,  0.2179],
         ...,
         [ 0.7599,  0.0082],
         [ 0.3397,  0.1293],
         [ 0.5823,  0.1078]],

        ...,

        [[-0.2001,  1.0368],
         [-0.4378, -0.9239],
         [-1.3281, -0.3239],
         ...,
         [ 0.3032,  1.0953],
         [-1.2550,  0.7407],
         [-0.9081,  0.3837]],

        [[ 0.5385, -0.7164],
         [ 0

In [45]:
# The information about dataset Y
print("Tensor Dataset_Y Information:")
print("--------------------------")

# Check the data type of the tensor dataset
print(f"Data type: {Y_tensor.type()}")

# Get the shape (dimensions) of the data
print(f"Data shape: {Y_tensor.shape}")

# Check the number of elements in the dataset
print(f"Number of elements: {Y_tensor.numel()}")

# Access the first few elements of the data
print("First few data elements:")
print(Y_tensor[:2])

Tensor Dataset_Y Information:
--------------------------
Data type: torch.LongTensor
Data shape: torch.Size([2555904, 24])
Number of elements: 61341696
First few data elements:
tensor([[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])


In [46]:
# The information about dataset Z
print("Tensor Dataset_Z Information:")
print("--------------------------")

# Check the data type of the tensor dataset
print(f"Data type: {Z_tensor.type()}")

# Get the shape (dimensions) of the data
print(f"Data shape: {Z_tensor.shape}")

# Check the number of elements in the dataset
print(f"Number of elements: {Z_tensor.numel()}")

# Access the first few elements of the data
print("First few data elements:")
print(Z_tensor[:2])

Tensor Dataset_Z Information:
--------------------------
Data type: torch.LongTensor
Data shape: torch.Size([2555904, 1])
Number of elements: 2555904
First few data elements:
tensor([[-20],
        [-20]])


In [47]:
# Expand Z_tensor to match the dimensions of X_tensor
Z_tensor_expanded = Z_tensor.unsqueeze(-1).expand(-1, X_tensor.shape[1], X_tensor.shape[2])

In [48]:
# Concatenate X_tensor and Z_tensor_expanded along the last dimension
XZ_combined = torch.cat([X_tensor, Z_tensor_expanded], dim=2)

In [50]:
# The information about dataset XZ_combined
print("Tensor XZ_combined Information:")
print("--------------------------")

# Check the data type of the tensor dataset
print(f"Data type: {XZ_combined.type()}")

# Get the shape (dimensions) of the data
print(f"Data shape: {XZ_combined.shape}")

# Check the number of elements in the dataset
print(f"Number of elements: {XZ_combined.numel()}")

# Access the first few elements of the data
print("First few data elements:")
print(XZ_combined[:2])

Tensor XZ_combined Information:
--------------------------
Data type: torch.FloatTensor
Data shape: torch.Size([2555904, 1024, 4])
Number of elements: 10468982784
First few data elements:
tensor([[[  0.0420,   0.2348, -20.0000, -20.0000],
         [ -0.2729,   0.4051, -20.0000, -20.0000],
         [ -0.2671,   0.2275, -20.0000, -20.0000],
         ...,
         [ -0.7056,  -0.2869, -20.0000, -20.0000],
         [ -0.4116,   0.6683, -20.0000, -20.0000],
         [  0.0649,   0.6358, -20.0000, -20.0000]],

        [[  1.1986,   0.4494, -20.0000, -20.0000],
         [  0.4854,   0.2827, -20.0000, -20.0000],
         [  0.8679,  -0.3319, -20.0000, -20.0000],
         ...,
         [ -1.2129,  -0.6429, -20.0000, -20.0000],
         [ -0.3810,   0.7936, -20.0000, -20.0000],
         [  0.0864,   1.0922, -20.0000, -20.0000]]])


In [51]:
# Split the dataset ：60% trainset，40% temporary set
X_train, X_temp, Y_train, Y_temp = train_test_split(XZ_combined, Y_tensor, test_size=0.4, random_state=42)

# Split temporaryset ：50% validation dataset，50% testset
X_val, X_test, Y_val, Y_test = train_test_split(X_temp, Y_temp, test_size=0.5, random_state=42)

# check
print("Training set size:", X_train.shape[0])
print("Validation set size:", X_val.shape[0])
print("Test set size:", X_test.shape[0])

Training set size: 1533542
Validation set size: 511181
Test set size: 511181


In [52]:
class CustomDataset(Dataset):
    def __init__(self, features, labels):
        self.features = features
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        return self.features[idx], self.labels[idx]

train_dataset = CustomDataset(X_train, Y_train)
val_dataset = CustomDataset(X_val, Y_val)
test_dataset = CustomDataset(X_test, Y_test)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

In [58]:
import torch.nn as nn

class ModulationCNN(nn.Module):
    def __init__(self, num_features, n_classes, input_length=1024):
        super().__init__()

        self.conv1 = nn.Conv1d(in_channels=num_features, out_channels=48, kernel_size=2, stride=2)
        self.batchnorm1 = nn.BatchNorm1d(48)
        self.conv2 = nn.Conv1d(in_channels=48, out_channels=48, kernel_size=2, stride=2)
        self.batchnorm2 = nn.BatchNorm1d(48)
        self.conv3 = nn.Conv1d(in_channels=48, out_channels=24, kernel_size=1, stride=1)
        self.batchnorm3 = nn.BatchNorm1d(24)
        self.flatten = nn.Flatten()

        # Calculate the input size of the fully connected layer
        flattened_size = self._compute_flattened_size(input_length, num_features)

        self.fc1 = nn.Linear(flattened_size, 120)
        self.batchnorm_fc1 = nn.BatchNorm1d(120)
        self.fc2 = nn.Linear(120, n_classes)

        self.af = nn.ReLU()
        self.af_out = nn.Softmax(dim=1)

    def _compute_flattened_size(self, input_length, num_features):
        # Temporary tensor used to calculate the flattened dimensions
        temp_tensor = torch.zeros(1, num_features, input_length)

        # Through convolutional layers but excluding fully connected layers
        temp_tensor = self.conv1(temp_tensor)
        temp_tensor = self.batchnorm1(temp_tensor)
        temp_tensor = self.conv2(temp_tensor)
        temp_tensor = self.batchnorm2(temp_tensor)
        temp_tensor = self.conv3(temp_tensor)
        temp_tensor = self.batchnorm3(temp_tensor)
        temp_tensor = self.flatten(temp_tensor)

        # Returns the total number of features after flattening
        return temp_tensor.nelement()

    def forward(self, X):
        x = self.conv1(X.permute(0,2,1))
        x = self.af(self.batchnorm1(x))
        x = self.conv2(x)
        x = self.af(self.batchnorm2(x))
        x = self.conv3(x)
        x = self.af(self.batchnorm3(x))
        x = x.view(x.size(0), -1)  # Flatten
        x = self.fc1(x)
        x = self.af(self.batchnorm_fc1(x))
        x = self.fc2(x)
        x = self.af_out(x)
        return x


model = ModulationCNN(num_features=4, n_classes=24)

In [59]:
import torch
import torch.nn as nn
import torch.optim as optim


criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [60]:
from tqdm import tqdm
import torch.nn as nn
# 训练过程
n_epochs = 10  # 设置适当的迭代次数

for epoch in range(n_epochs):
    model.train()
    running_train_loss = 0.0
    running_train_acc = 0.0
    progress_bar_train = tqdm(train_loader, desc=f'Epoch {epoch+1}/{n_epochs} [Train]')

    for inputs, labels in progress_bar_train:
        optimizer.zero_grad()

        # forward propagation
        outputs = model(inputs)
        loss = criterion(outputs, torch.max(labels, 1)[1])
        running_train_loss += loss.item()

        # Backpropagation and optimization
        loss.backward()
        optimizer.step()

        # Calculate training accuracy
        _, predicted = torch.max(outputs.data, 1)
        correct = (predicted == torch.max(labels, 1)[1]).float().sum().item()
        running_train_acc += correct / labels.size(0)

        # Update progress bar
        progress_bar_train.set_postfix(loss=running_train_loss/(progress_bar_train.n+1),
                                       acc=running_train_acc/(progress_bar_train.n+1))

    model.eval()
    running_val_loss = 0.0
    running_val_acc = 0.0
    progress_bar_val = tqdm(val_loader, desc=f'Epoch {epoch+1}/{n_epochs} [Val]')

    with torch.no_grad():
        for inputs, labels in progress_bar_val:
            outputs = model(inputs)
            loss = criterion(outputs, torch.max(labels, 1)[1])
            running_val_loss += loss.item()

            _, predicted = torch.max(outputs.data, 1)
            correct = (predicted == torch.max(labels, 1)[1]).float().sum().item()
            running_val_acc += correct / labels.size(0)

            progress_bar_val.set_postfix(loss=running_val_loss/(progress_bar_val.n+1),
                                         acc=running_val_acc/(progress_bar_val.n+1))

Epoch 1/10 [Train]: 100%|██████████| 23962/23962 [35:36<00:00, 11.21it/s, acc=0.284, loss=2.96]
Epoch 1/10 [Val]: 100%|██████████| 7988/7988 [03:25<00:00, 38.80it/s, acc=0.259, loss=2.98]
Epoch 2/10 [Train]: 100%|██████████| 23962/23962 [39:26<00:00, 10.13it/s, acc=0.346, loss=2.9]  
Epoch 2/10 [Val]:  62%|██████▏   | 4986/7988 [01:43<01:02, 48.40it/s, acc=0.237, loss=3.01]


KeyboardInterrupt: 