In [18]:
%load_ext autoreload
%autoreload 2


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [3]:
import kagglehub
from tqdm.notebook import trange, tqdm

# Download chest x-ray (will take a minute or two)
data_path = kagglehub.dataset_download("paultimothymooney/chest-xray-pneumonia")



In [4]:
train_path = data_path + "/chest_xray/train"
test_path = data_path + "/chest_xray/test"
val_path = data_path + "/chest_xray/test"

In [5]:
from torchvision import datasets, transforms
from torch.utils.data import DataLoader


transform = transforms.Compose([
    transforms.Grayscale(), #compression 3 rgb matrix into one for runtime
    transforms.Resize((224, 224)), # can downsample here for runtime
    transforms.ToTensor(),
])

#Loading train, test, and validation datasets.
train_dataset = datasets.ImageFolder(train_path, transform=transform)
test_dataset = datasets.ImageFolder(test_path, transform=transform)
val_dataset = datasets.ImageFolder(val_path, transform = transform)

#dataloaders w 32 images each for batches (randomized)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

print(train_dataset.classes) # classes for the data (train)

  Referenced from: <FB2FD416-6C4D-3621-B677-61F07C02A3C5> /opt/anaconda3/envs/ml-0451/lib/python3.9/site-packages/torchvision/image.so
  warn(


['NORMAL', 'PNEUMONIA']


In [6]:
import torch
import torch.nn as nn
import numpy as np
from sklearn.decomposition import PCA
from ContrastiveVAE import ContrastiveVAE

# contrastive loss function for VAE
def supervised_contrastive_loss(embeddings, labels: torch.Tensor, temperature=0.1):
    device = embeddings.device
    labels = labels.contiguous().view(-1, 1)
    mask = torch.eq(labels, labels.T).float().to(device)

    dot_product = (embeddings @ embeddings.T) / temperature
    logits_max, _ = torch.max(dot_product, dim=1, keepdim=True)
    logits = dot_product - logits_max.detach()

    exp_logits = torch.exp(logits) * (1 - torch.eye(len(labels), device=device))
    log_prob = logits - torch.log(exp_logits.sum(1, keepdim=True) + 1e-8)

    mean_log_prob_pos = (mask * log_prob).sum(1) / mask.sum(1)
    loss = -mean_log_prob_pos.mean()
    return loss

In [7]:
def train_vae(model: ContrastiveVAE, dataloader, optimizer, device, epochs=10, beta=1.0, contrastive_weight=1.0):
    model.train()
    recon_loss_fn = nn.MSELoss(reduction='sum')

    for epoch in range(epochs):
        total_loss = 0
        for x, y in tqdm(dataloader, desc=f"Epoch {epoch+1}/{epochs}"):
            x, y = x.to(device), y.to(device)
            optimizer.zero_grad()

            x_recon, mu, logvar = model(x)
            recon_loss = recon_loss_fn(x_recon, x) / x.size(0)

            kl_loss = -0.5 * torch.mean(1 + logvar - mu.pow(2) - logvar.exp())

            contrastive = supervised_contrastive_loss(mu, y)

            loss = recon_loss + beta * kl_loss + contrastive_weight * contrastive
            loss.backward()
            optimizer.step()

            total_loss += loss.item()

        print(f"Epoch {epoch+1} - Loss: {total_loss / len(dataloader):.4f}")


In [8]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = ContrastiveVAE(latent_dim=64).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

train_vae(
    model,
    train_loader,
    optimizer,
    device,
    epochs=3,
    beta=1.0,
    contrastive_weight=5 # 5 ~208, 10 ~ 229
)


Epoch 1/3:   0%|          | 0/163 [00:00<?, ?it/s]

Epoch 1 - Loss: 2100.6436


Epoch 2/3:   0%|          | 0/163 [00:00<?, ?it/s]

Epoch 2 - Loss: 492.1164


Epoch 3/3:   0%|          | 0/163 [00:00<?, ?it/s]

Epoch 3 - Loss: 348.6070


In [9]:
def extract_latent_vectors(model, dataloader, device):
    model.eval()
    all_mu = []
    all_labels = []

    with torch.no_grad():
        for x, y in dataloader:
            x = x.to(device)
            mu, _ = model.encode(x)
            all_mu.append(mu.cpu().numpy()) #add all into np array
            all_labels.append(y.numpy())

    X = np.concatenate(all_mu, axis=0) # concatenate all latent vectors
    y = np.concatenate(all_labels, axis=0) # concatenate all labels
    return X, y


In [10]:
X_train, y_train = extract_latent_vectors(model, train_loader, device)
X_test, y_test = extract_latent_vectors(model, test_loader, device)


In [11]:
print(X_train.shape, y_train.shape)

(5216, 64) (5216,)


In [None]:
from Transformer import Transformer
from sklearn.metrics import classification_report, accuracy_score

latent_vectors = X_train.shape[0] # 5216 latent vectors
latent_dim = X_train.shape[1] # 64 dimensions of latent space
output_dim = 2

# Initialize the Transformer model
T = Transformer(input_dim = latent_dim, num_patches = 1, output_dim = output_dim, hidden_dim=latent_dim).to(device)

# Train the Transformer model
def train_transformer(model: Transformer, X_train, y_train, device, epochs=3, batch_size=32):
    X = torch.tensor(X_train, dtype=torch.float32).to(device)  # [vectors, dim]
    y = torch.tensor(y_train, dtype=torch.long).to(device)  # [labels]
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
    model.train()

    for epoch in range(epochs):
        total_loss = 0
        i = 0
        while i < X.shape[0]: # this simulates batch loading method without dataloader
            # Create batches
            x_batch = X[i:i + batch_size]
            y_batch = y[i:i + batch_size]

            optimizer.zero_grad()
            outputs = model(x_batch)  
            loss = criterion(outputs, y_batch)  
            loss.backward()  
            optimizer.step()  

            total_loss += loss.item()
            i += batch_size  

        print(f"Epoch {epoch + 1} - Loss: {total_loss / (X.shape[0] // batch_size):.4f}")



In [24]:
train_transformer(T, X_train, y_train, device, epochs=3, batch_size=32)

Epoch 1 - Loss: 0.4442
Epoch 2 - Loss: 0.1556
Epoch 3 - Loss: 0.1438


In [None]:
# # initialize a perceptron 
# p = Perceptron()
# opt = PerceptronOptimizer(p)
# p.loss(X, y)

# loss = 1
# score_vec = [] 

# while loss > 0 and len(score_vec) <= 1000:

#     # save the old value of w for plotting later
#     old_w = torch.clone(p.w)
    
#     # make an optimization step -- this is where the update actually happens
#     # now p.w is the new value 
#     prev_length = len(score_vec)
#     i = torch.randint(n, size = (1,))
#     x_i = X[[i],:]
#     y_i = y[i]
#     local_loss = p.loss(x_i, y_i).item()
#     score = p.score(X).mean()

#     if local_loss > 0:
#         opt.step(x_i, y_i)
    
#     if local_loss > 0:
#         loss = p.loss(X, y).item()
#         score = p.score(X).mean()
#         score_vec.append(score)
    
#     if(len(score_vec) != prev_length):
#         print(f"Iteration {len(score_vec)}: Loss = {loss:.3f}, Score = {score:.3f}")