In [1]:
import numpy as np
import torch
from torch.utils.data import DataLoader, TensorDataset
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import wandb

In [2]:
data1 = np.load('data1.npy')
lab1 = np.load('lab1.npy')
data0 = np.load('data0.npy')
lab0 = np.load('lab0.npy')
data2 = np.load('data2.npy')
lab2 = np.load('lab2.npy')
total_data=np.concatenate((data0,data1,data2),axis=0)
total_lab=np.concatenate((lab0,lab1,lab2),axis=0)

In [3]:
total_size = len(total_data)
train_size = int(0.8 * total_size)
val_size = int(0.1 * total_size)
test_size = total_size - train_size - val_size
train_data = total_data[:train_size]
train_lab=total_lab[:train_size]
val_data = total_data[train_size:train_size + val_size]
val_lab=total_lab[train_size:train_size + val_size]
test_data = total_data[train_size + val_size:]
test_lab=total_lab[train_size + val_size:]


In [4]:
train_data = torch.tensor(train_data, dtype=torch.float32)
train_lab = torch.tensor(train_lab, dtype=torch.long)

val_data = torch.tensor(val_data, dtype=torch.float32)
val_lab = torch.tensor(val_lab, dtype=torch.long)

test_data = torch.tensor(test_data, dtype=torch.float32)
test_lab = torch.tensor(test_lab, dtype=torch.long)

# Add channel dimension (1 for grayscale images)
train_data = train_data.unsqueeze(1)  # Shape becomes [num_samples, 1, height, width]
val_data = val_data.unsqueeze(1)
test_data = test_data.unsqueeze(1)

# Create datasets
train_dataset = TensorDataset(train_data, train_lab)
val_dataset = TensorDataset(val_data, val_lab)
test_dataset = TensorDataset(test_data, test_lab)

# Define data loaders
batch_size = 2048
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Check a single batch
for batch_data, batch_labels in train_loader:
    print("Batch data shape:", batch_data.shape)  # Should be [batch_size, 1, height, width]
    print("Batch labels shape:", batch_labels.shape)  # Should be [batch_size]
    break

Batch data shape: torch.Size([2048, 1, 40, 168])
Batch labels shape: torch.Size([2048])


In [5]:
class ConvBlock(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(ConvBlock, self).__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1)
        self.bn = nn.BatchNorm2d(out_channels)

    def forward(self, x):
        x = self.conv(x)
        x = self.bn(x)
        return F.relu(x)

class CNNModel(nn.Module):
    def __init__(self, input_shape):
        super(CNNModel, self).__init__()
        self.block1 = ConvBlock(1, 32)
        self.block2 = ConvBlock(32, 64)

        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)

        # Calculate the flattened size dynamically based on the input shape
        with torch.no_grad():
            dummy_input = torch.zeros(1, *input_shape)  # Batch size of 1 for testing
            flattened_size = self._get_flattened_size(dummy_input)

        self.fc1 = nn.Linear(flattened_size, 128)
        self.fc2 = nn.Linear(128, 1)  # Output a single value for regression

        self.dropout = nn.Dropout(0.5)

    def _get_flattened_size(self, x):
        x = self.pool(self.block1(x))
        x = self.pool(self.block2(x))
        return x.view(-1).size(0)

    def forward(self, x):
        x = self.pool(self.block1(x))
        x = self.pool(self.block2(x))

        # Flatten for fully connected layers
        x = x.view(x.size(0), -1)

        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)  # Output shape: [batch_size, 1]
        return x


In [6]:
num_classes = len(set(total_lab))  # Replace with the number of unique labels
input_shape = (1, 40, 168) 
model = CNNModel(input_shape)
criterion = nn.MSELoss()  # Mean Squared Error Loss for regression
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [7]:

# Evaluate model function for regression
def evaluate_model(model, val_loader):
    model.eval()
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    running_loss = 0.0
    predictions = []
    targets = []

    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device).float()
            outputs = model(images).squeeze(1)  # Flatten outputs to [batch_size]
            loss = criterion(outputs, labels)
            running_loss += loss.item()

            predictions.extend(outputs.cpu().numpy())
            targets.extend(labels.cpu().numpy())

    avg_val_loss = running_loss / len(val_loader)
    return avg_val_loss, predictions, targets

# Train model
def train_model(model, train_loader, val_loader, optimizer, epochs=50):
    # wandb.init(
    #     project="MLNS-pre",
    #     name=f"cnn-training-regression-{wandb.util.generate_id()}",
    #     tags=["cnn", "pytorch", "regression"]
    # )

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device).float()

            optimizer.zero_grad()
            outputs = model(images).squeeze(1)  # Flatten outputs to [batch_size]
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        # Average training loss
        train_loss = running_loss / len(train_loader)

        # Validation loss
        val_loss, _, _ = evaluate_model(model, val_loader)

        # Log metrics to wandb
        # wandb.log({
        #     "epoch": epoch + 1,
        #     "train_loss": train_loss,
        #     "val_loss": val_loss
        # })

        print(f"Epoch {epoch + 1}")
        print(f"Training Loss: {train_loss:.4f}")
        print(f"Validation Loss: {val_loss:.4f}")
        print("----------------------------------------")

    # wandb.finish()

# Test model
def test_model(model, test_loader):
    model.eval()
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    test_loss = 0.0
    predictions = []
    targets = []

    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device).float()
            outputs = model(images).squeeze(1)  # Flatten outputs to [batch_size]
            loss = criterion(outputs, labels)
            test_loss += loss.item()

            predictions.extend(outputs.cpu().numpy())
            targets.extend(labels.cpu().numpy())

    avg_test_loss = test_loss / len(test_loader)
    rounded_predictions = np.round(predictions)  # Round predictions to nearest integer

    # Calculate accuracy
    correct_predictions = np.sum(rounded_predictions == targets)
    accuracy = correct_predictions / len(targets) * 100

    print(f"Test Loss: {avg_test_loss:.4f}")
    print(f"Accuracy: {accuracy:.2f}%")

train_model(model, train_loader, val_loader, optimizer, epochs=25)


Epoch 1
Training Loss: 138.6424
Validation Loss: 47.4990
----------------------------------------
Epoch 2
Training Loss: 53.6347
Validation Loss: 36.3398
----------------------------------------
Epoch 3
Training Loss: 44.8591
Validation Loss: 33.1168
----------------------------------------
Epoch 4
Training Loss: 41.3626
Validation Loss: 32.4657
----------------------------------------
Epoch 5
Training Loss: 39.4821
Validation Loss: 30.9911
----------------------------------------
Epoch 6
Training Loss: 38.5358
Validation Loss: 29.8522
----------------------------------------
Epoch 7
Training Loss: 37.4814
Validation Loss: 29.9637
----------------------------------------
Epoch 8
Training Loss: 36.4803
Validation Loss: 28.6589
----------------------------------------
Epoch 9
Training Loss: 35.5161
Validation Loss: 28.4227
----------------------------------------
Epoch 10
Training Loss: 34.9504
Validation Loss: 27.4909
----------------------------------------
Epoch 11
Training Loss: 33.8

In [8]:
test_model(model, test_loader)


Test Loss: 23.9763
Accuracy: 7.97%


In [9]:
# Save the model after training
torch.save(model.state_dict(), "regression_model.pth")
print("Model saved to regression_model.pth")


Model saved to regression_model.pth
