In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms

# Set the device to use (GPU if available, else CPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load and preprocess the MNIST dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])
train_dataset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform)

# Create data loaders
batch_size = 64
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Define the CNN model
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 16, kernel_size=3, stride=1, padding=1)
        self.relu1 = nn.ReLU()
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1)
        self.relu2 = nn.ReLU()
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.fc = nn.Linear(32 * 7 * 7, 10)

    def forward(self, x):
        x = self.conv1(x)
        x = self.relu1(x)
        x = self.pool1(x)
        x = self.conv2(x)
        x = self.relu2(x)
        x = self.pool2(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x

# Create an instance of the model and move it to the device
model = Net().to(device)

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Train the model
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    for images, labels in train_loader:
        images = images.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}")

# Test the model and convert classified images to vector representations
model.eval()
image_vectors = []
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)

        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

        # Convert classified images to vector representations
        vectors = outputs.cpu().numpy()
        image_vectors.extend(vectors)

    accuracy = 100 * correct / total
    print(f"Test Accuracy: {accuracy:.2f}%")

# Print the vector representations of the first 5 images
num_images_to_print = 5
for i in range(num_images_to_print):
    print(f"Vector representation of image {i+1}: {image_vectors[i]}")

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 78221143.80it/s]


Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 27764312.13it/s]


Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 34029874.51it/s]


Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 9906671.23it/s]

Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw






Epoch [1/10], Loss: 0.2618
Epoch [2/10], Loss: 0.0310
Epoch [3/10], Loss: 0.0163
Epoch [4/10], Loss: 0.2157
Epoch [5/10], Loss: 0.0134
Epoch [6/10], Loss: 0.1160
Epoch [7/10], Loss: 0.0039
Epoch [8/10], Loss: 0.0665
Epoch [9/10], Loss: 0.0072
Epoch [10/10], Loss: 0.0030
Test Accuracy: 98.86%
Vector representation of image 1: [-21.066765   -6.879763   -6.355869    1.9802029 -11.591761   -7.4443054
 -41.06301    19.204853  -10.406628    3.9379368]
Vector representation of image 2: [ -9.252592   -2.394763   14.678845  -13.926844  -18.01803   -16.91106
  -7.3153124 -21.473885   -6.6277795  -8.591769 ]
Vector representation of image 3: [-11.287099    11.813167    -6.1603913   -9.649429     0.17907226
  -2.452532   -10.487567    -1.1455851   -3.6930108   -5.745078  ]
Vector representation of image 4: [ 15.835983  -30.700287   -5.9191003 -11.037349  -16.488546   -5.3912835
   1.7175869  -7.47911    -4.228919   -6.6101165]
Vector representation of image 5: [-13.607168   -22.413736   -14.923964

In [None]:
# Test the model and convert classified images to vector representations
model.eval()
image_vectors = []
actual_labels = []
predicted_labels = []
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)

        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

        # Convert classified images to vector representations
        vectors = outputs.cpu().numpy()
        image_vectors.extend(vectors)

        # Store the actual labels and predicted labels
        actual_labels.extend(labels.cpu().numpy())
        predicted_labels.extend(predicted.cpu().numpy())

    accuracy = 100 * correct / total
    print(f"Test Accuracy: {accuracy:.2f}%")

# Print the vector representations, actual labels, and predicted labels of the first 5 images
num_images_to_print = 5
for i in range(num_images_to_print):
    print(f"Image {i+1}:")
    print(f"  Actual Label: {actual_labels[i]}")
    print(f"  Predicted Label: {predicted_labels[i]}")
    print(f"  Vector Representation: {image_vectors[i]}")
    print()

Test Accuracy: 98.86%
Image 1:
  Actual Label: 7
  Predicted Label: 7
  Vector Representation: [-21.066765   -6.879763   -6.355869    1.9802029 -11.591761   -7.4443054
 -41.06301    19.204853  -10.406628    3.9379368]

Image 2:
  Actual Label: 2
  Predicted Label: 2
  Vector Representation: [ -9.252592   -2.394763   14.678845  -13.926844  -18.01803   -16.91106
  -7.3153124 -21.473885   -6.6277795  -8.591769 ]

Image 3:
  Actual Label: 1
  Predicted Label: 1
  Vector Representation: [-11.287099    11.813167    -6.1603913   -9.649429     0.17907226
  -2.452532   -10.487567    -1.1455851   -3.6930108   -5.745078  ]

Image 4:
  Actual Label: 0
  Predicted Label: 0
  Vector Representation: [ 15.835983  -30.700287   -5.9191003 -11.037349  -16.488546   -5.3912835
   1.7175869  -7.47911    -4.228919   -6.6101165]

Image 5:
  Actual Label: 4
  Predicted Label: 4
  Vector Representation: [-13.607168   -22.413736   -14.923964   -14.275178    15.972704
 -12.452628   -14.9262085  -11.684353    -5.6