In [1]:
# mount google drive 
from google.colab import drive
drive.mount('/content/drive')

!tar -xvzf "/content/drive/MyDrive/lens_data_alt.tgz";

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
lens_data/image_129921264748304580224978784123993897887.npy
lens_data/image_60674821045913106252338146417083597669.npy
lens_data/image_7586588992730644946273072343065751609.npy
lens_data/image_256615804082514045226068245569293561600.npy
lens_data/image_331318447763555722421727000564541383144.npy
lens_data/image_306434472647715886273091851647655788036.npy
lens_data/image_153218353373199188551319913929375882134.npy
lens_data/image_11412000761836877732825972164665604473.npy
lens_data/image_266881034011026170985732508767754319849.npy
lens_data/image_322127030264560088986489885939769106814.npy
lens_data/image_103941636199723507484958618254099927151.npy
lens_data/image_182258691333363044109515573847422173318.npy
lens_data/image_317193050927123178027142767517329150638.npy
lens_data/image_7313831936673811745893506016093993651.npy
lens_data/image_123432310684084797411926937999919367992.npy
lens_data/image_2341154685534313694540376

# Importing Libaries

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision.transforms as transforms
from torchvision.datasets import DatasetFolder
from torch.utils.data import Dataset, DataLoader, random_split
import torchvision
from tqdm import tqdm

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import gc
import os

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Data Preparation

In [362]:
# crate a dataset for regression task
class LensDataset(Dataset):
    def __init__(self, path, transform=None):
        self.path = path
        self.transform = transform
        self.file_list = os.listdir(path)
        self.file_list.sort()
        
    def __len__(self):
        return len(self.file_list)
    
    def __getitem__(self, idx):
        data = np.load(self.path + '/' + self.file_list[idx], allow_pickle=True)
        image = data[0]
        mass = torch.tensor(data[1], dtype=torch.float32)
        
        if self.transform:
            image = self.transform(image)
        
        return image, mass

In [396]:
transform = transforms.Compose([
                transforms.ToTensor(),
                transforms.Resize((224,224)),
                transforms.Lambda(lambda x: x.to(torch.float32).repeat(3,1,1)),
                transforms.Lambda(lambda x: x/ x.max()),
                transforms.RandomHorizontalFlip(),
                transforms.RandomVerticalFlip(),
                transforms.Random
            ])
dataset = LensDataset("lens_data", transform= transform)

train_dataset, test_dataset = random_split(dataset, [int(len(dataset) * 0.9), len(dataset) - int(len(dataset) * 0.9)])
train_dataset, val_dataset = random_split(train_dataset, [int(len(train_dataset) * 0.9), len(train_dataset) - int(len(train_dataset) * 0.9)])

batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)


In [397]:
next(iter(train_loader))

RuntimeError: ignored

In [369]:
# Explore the dataset
print('Train dataset size: ', len(train_dataset))
print('Validation dataset size: ', len(val_dataset))
print('Test dataset size: ', len(test_dataset))

# get image shape
print('Image shape: ', train_dataset[0][0].shape)

# type of image tensor
print('Type of image tensor: ', train_dataset[0][0].dtype)

# number of batches in train loader
print('Number of batches in train loader: ', len(train_loader))

Train dataset size:  16200
Validation dataset size:  1800
Test dataset size:  2000
Image shape:  torch.Size([3, 224, 224])
Type of image tensor:  torch.float32
Number of batches in train loader:  507


# Training and Test Functions

In [370]:
def train(model, train_loader, valid_loader, loss_fn, optimizer, n_epochs, scheduler):

    train_losses = []
    val_losses = []
    for epoch in range(1, n_epochs+1):
        # Keep track of training and validation loss
        train_loss = 0.0
        valid_loss = 0.0

        
        # Train the model
        model.train()
        for batch, data in enumerate(tqdm(train_loader)):
            inputs, labels = data
            inputs = inputs.to(device)
            labels = labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = loss_fn(outputs, labels)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()*inputs.size(0)

        # Evaluate the model
        model.eval()
        for data in tqdm(valid_loader):
            inputs, labels = data
            inputs = inputs.to(device)
            labels = labels.to(device)
            outputs = model(inputs)
            loss = loss_fn(outputs, labels)

            valid_loss += loss.item()*inputs.size(0)
        
        
        # Calculate average losses
        train_loss = train_loss/len(train_loader.dataset)
        valid_loss = valid_loss/len(valid_loader.dataset)

        scheduler.step()
        
        train_losses.append(train_loss)
        val_losses.append(valid_loss)

        
        print(f'Epoch: {epoch} \tTraining Loss: {train_loss:.6f} | Validation Loss: {valid_loss:.6f}')
    return train_losses, val_losses

In [386]:
def test(model, test_loader, loss_fn):
    test_loss = 0.0
    model.eval()

    y_pred = []
    y_true = []
    with torch.no_grad():
      for data in tqdm(test_loader):
          inputs, labels = data
          inputs = inputs.to(device)
          labels = labels.to(device)
          outputs = model(inputs)
          loss = loss_fn(outputs, labels)

          y_pred.extend(outputs.cpu().detach().numpy())
          y_true.extend(labels.cpu().detach().numpy())

          test_loss += loss.item()*inputs.size(0)
      
    test_loss = test_loss/len(test_loader.dataset)


    # plot label vs prediction
    plt.figure(figsize=(10,10))
    plt.scatter(y_true, y_pred)
    plt.xlabel("True Mass")
    plt.ylabel("Predicted Mass")
    plt.title("True Mass vs Predicted Mass")
    plt.show()
    print(f'Test Loss: {test_loss:.6f}')
    return test_loss

# Model Initialization

## EfficientNet

In [7]:
# EffNetV2 = torchvision.models.efficientnet_b5()
# EffNetV2.classifier[-1] = nn.Sequential(nn.Linear(in_features=2048, out_features=1024, bias=True), nn.Linear(in_features=1024, out_features=3, bias=True))

# # load weights
# EffNetV2.load_state_dict(torch.load('EffNetV2_batch=32_lr=0.001_optim=Adam.pt'))

# EffNetV2.classifier[-1] = nn.Sequential(nn.Linear(in_features=2048, out_features=1024, bias=True), nn.Linear(in_features=1024, out_features=1, bias=True))
# EffNetV2.to(device);

# # freeze all layers except the last one
# for param in EffNetV2.parameters():
#     param.requires_grad = False
# EffNetV2.classifier[-1][-1].weight.requires_grad = True

# MobileNetV3

In [372]:
gc.collect()
torch.cuda.empty_cache()

In [4]:
MobileNetV3 = torchvision.models.mobilenet_v3_small(weights=True)
MobileNetV3.classifier[-1] = nn.Linear(in_features=1024, out_features=1, bias=True)
MobileNetV3.to(device);




In [19]:
class SimpleModel(nn.Module):
    def __init__(self):
        super(SimpleModel, self).__init__()
        # depthwise convolutional layers
        self.conv1 = nn.Conv2d(1, 32, 3, padding=1, groups=1) # 32 channels
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1, groups=32) # 64 channels
        self.conv3 = nn.Conv2d(64, 128, 3, padding=1, groups=64) # 128 channels
        self.conv4 = nn.Conv2d(128, 256, 3, padding=1, groups=128) # 256 channels
        # pointwise convolutional layers
        self.point1 = nn.Conv2d(32, 64, 1) # 64 channels
        self.point2 = nn.Conv2d(64, 128, 1) # 128 channels
        self.point3 = nn.Conv2d(128, 256, 1) # 256 channels
        self.point4 = nn.Conv2d(256, 512, 1) # 512 channels
        # max pooling layer
        self.pool = nn.MaxPool2d(2, 2)
        # fully connected layer
        self.fc = nn.Linear(512 * 9 * 9, 1)

    def forward(self, x):
        # apply depthwise and pointwise convolutions with relu activation
        x = F.relu(self.point1(self.conv1(x)))
        x = F.relu(self.point2(self.conv2(x)))
        x = F.relu(self.point3(self.conv3(x)))
        x = F.relu(self.point4(self.conv4(x)))
        # apply max pooling
        x = self.pool(x)
        # flatten the output
        x = x.view(-1, 512 * 9 * 9)
        # apply fully connected layer with sigmoid activation
        x = self.fc(x)
        return x

# create an instance of the model
model = SimpleModel()
# print the number of parameters
print(sum(p.numel() for p in model.parameters() if p.requires_grad))

221313


# Train and Test Functions

In [375]:
model = MobileNetV3
optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)
loss_fn = nn.MSELoss()

train_losses, val_losses = train(model, train_loader, val_loader, loss_fn, optimizer, 10, scheduler)
torch.save(model.state_dict(), '/content/drive/MyDrive/MbReg.pth')

  return F.mse_loss(input, target, reduction=self.reduction)
100%|██████████| 507/507 [01:01<00:00,  8.26it/s]
100%|██████████| 57/57 [00:04<00:00, 12.61it/s]


Epoch: 1 	Training Loss: 0.023910 | Validation Loss: 0.001510


100%|██████████| 507/507 [01:01<00:00,  8.27it/s]
100%|██████████| 57/57 [00:05<00:00, 10.26it/s]


Epoch: 2 	Training Loss: 0.003575 | Validation Loss: 0.003120


100%|██████████| 507/507 [01:00<00:00,  8.33it/s]
100%|██████████| 57/57 [00:04<00:00, 12.74it/s]


Epoch: 3 	Training Loss: 0.001279 | Validation Loss: 0.000272


100%|██████████| 507/507 [01:00<00:00,  8.42it/s]
100%|██████████| 57/57 [00:05<00:00, 11.37it/s]


Epoch: 4 	Training Loss: 0.000755 | Validation Loss: 0.000747


100%|██████████| 507/507 [01:01<00:00,  8.29it/s]
100%|██████████| 57/57 [00:05<00:00, 10.18it/s]


Epoch: 5 	Training Loss: 0.000589 | Validation Loss: 0.000325


100%|██████████| 507/507 [00:59<00:00,  8.46it/s]
100%|██████████| 57/57 [00:05<00:00, 10.38it/s]


Epoch: 6 	Training Loss: 0.000337 | Validation Loss: 0.000274


100%|██████████| 507/507 [01:00<00:00,  8.32it/s]
100%|██████████| 57/57 [00:05<00:00, 11.20it/s]


Epoch: 7 	Training Loss: 0.000317 | Validation Loss: 0.000319


100%|██████████| 507/507 [01:06<00:00,  7.66it/s]
100%|██████████| 57/57 [00:05<00:00, 10.11it/s]


Epoch: 8 	Training Loss: 0.000312 | Validation Loss: 0.000238


100%|██████████| 507/507 [01:00<00:00,  8.32it/s]
100%|██████████| 57/57 [00:05<00:00, 10.03it/s]


Epoch: 9 	Training Loss: 0.000314 | Validation Loss: 0.000260


100%|██████████| 507/507 [01:02<00:00,  8.12it/s]
100%|██████████| 57/57 [00:05<00:00, 10.35it/s]


Epoch: 10 	Training Loss: 0.000307 | Validation Loss: 0.000287


In [387]:
test(MobileNetV3, test_loader, nn.MSELoss())

  0%|          | 0/63 [00:00<?, ?it/s]


RuntimeError: ignored