In [8]:
import torch
import torch . nn as nn
import torch . optim as optim
from torch . utils . data import DataLoader
import torchvision . transforms as transforms
from torchvision import models
from sklearn.cluster import KMeans
import torchvision . datasets as datasets


In [9]:
print("PyTorch version:", torch.__version__)
print("CUDA version in PyTorch:", torch.version.cuda)
print("CUDA available:", torch.cuda.is_available())
print("Number of GPUs:", torch.cuda.device_count())
print("GPU name:", torch.cuda.get_device_name(0) if torch.cuda.is_available() else "None")

PyTorch version: 2.7.0+cu128
CUDA version in PyTorch: 12.8
CUDA available: True
Number of GPUs: 1
GPU name: NVIDIA GeForce RTX 2060


In [10]:
from datasets import load_dataset

dataset = load_dataset("ylecun/mnist")

train_dataset = dataset["train"]
test_dataset = dataset["test"]

  from .autonotebook import tqdm as notebook_tqdm


In [11]:
print(train_dataset)
print(test_dataset)

Dataset({
    features: ['image', 'label'],
    num_rows: 60000
})
Dataset({
    features: ['image', 'label'],
    num_rows: 10000
})


In [12]:
input_size = 784
hidden_size = 128
output_size = 10 
learning_rate = 0.001
batch_size = 64
epochs = 5

In [13]:
transform = transforms . Compose ([
            transforms . ToTensor () ,
            transforms . Normalize ((0.5 ,) , (0.5 ,) )
])

In [14]:
train_dataset.set_transform(lambda example: {
    "image": transform(example["image"])
})

test_dataset.set_transform(lambda example: {
    "image": transform(example["image"])
})


In [15]:
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, pin_memory=True)

In [16]:
import matplotlib.pyplot as plt
image = dataset['train'][1]['image']
label = dataset['train'][5]['label']
plt.imshow(image, cmap="grey")
plt.title(f"Label: {label}")
plt.show()

ModuleNotFoundError: No module named 'matplotlib'

In [17]:
class Autoencoder(nn.Module):
    def __init__(self, latent_dim=64):
        super(Autoencoder, self).__init__()

       
        self.encoder = nn.Sequential(
            nn.Conv2d(1, 32, 3, stride=2, padding=1),  # (32, 14, 14)
            nn.ReLU(),
            nn.Conv2d(32, 64, 3, stride=2, padding=1),  # (64, 7, 7)
            nn.ReLU(),
            nn.Conv2d(64, 128, 3, stride=2, padding=1), # (128, 4, 4)
            nn.ReLU()
        )
        self.flatten = nn.Flatten()
        self.fc_latent = nn.Linear(128 * 4 * 4, latent_dim)

       
        self.fc_decode = nn.Linear(latent_dim, 128 * 4 * 4)
        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(128, 64, 3, stride=2, padding=1, output_padding=1),
            nn.ReLU(),
            nn.ConvTranspose2d(64, 32, 3, stride=2, padding=1, output_padding=1),
            nn.ReLU(),
            nn.ConvTranspose2d(32, 1, 3, stride=2, padding=1, output_padding=1),
            nn.Sigmoid()
        )

    def encode(self, x):
        x = self.encoder(x)
        x = self.flatten(x)
        z = self.fc_latent(x)
        return z

    def decode(self, z):
        x = self.fc_decode(z)
        x = x.view(-1, 128, 4, 4)
        x = self.decoder(x)
        return x

    def forward(self, x):
        z = self.encode(x)
        x_recon = self.decode(z)
        return x_recon

In [18]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = Autoencoder(latent_dim=64).to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
criterion = nn.MSELoss()

In [19]:
for epoch in range(epochs):
    model.train()
    total_loss = 0
    for images, _ in train_loader:
        images = images.to(device)
        outputs = model(images)
        loss = criterion(outputs, images)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    print(f"Epoch {epoch+1}/{epochs} - Loss: {total_loss/len(train_loader):.4f}")



TypeError: pic should be PIL Image or ndarray. Got <class 'list'>

In [None]:
model.eval()
latent_features = []
labels = []

with torch.no_grad():
    for images, lbls in train_loader:
        images = images.to(device)
        z = model.encode(images)
        latent_features.append(z.cpu())
        labels.append(lbls)

latent_features = torch.cat(latent_features).numpy()
labels = torch.cat(labels).numpy()

In [None]:
kmeans = KMeans(n_clusters=10, random_state=42)
clusters = kmeans.fit_predict(latent_features)

ari = adjusted_rand_score(labels, clusters)
print(f"Adjusted Rand Index (ARI): {ari:.4f}")

  labels.append(torch.tensor(label_batch))


KeyboardInterrupt: 