# Prepare

In [1]:
import torch
from torch import nn
import numpy as np
import random
from torch.backends import cudnn

In [2]:
# set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

torch.set_default_dtype(torch.float64)

# Because some mass is a very small value. In order to observe the loss on those small value,
# we set the precision we cared.
cared_precision = 0.00001

# set seed and set deterministic behavior, which ensure the reproducible
SEED = 2022
torch.cuda.manual_seed_all(SEED) if torch.cuda.is_available() else torch.manual_seed(SEED)
np.random.seed(SEED)
random.seed(SEED)
cudnn.deterministic = True
cudnn.benchmark = False

# Data Define

In [3]:
import numpy as np
import os
import pandas as pd
from torch.utils.data import Dataset

## Data Class

In [4]:
class MyData(Dataset):

    def __init__(self, csv_file, data_dir, transform=None):
        self.csv_file = pd.read_csv(csv_file)
        self.data_dir = data_dir
        self.transform = transform
        print(self.transform)

    def __len__(self):
        return len(self.csv_file)

    def __getitem__(self, item):
        img_dir = os.path.join(self.data_dir, self.csv_file.iloc[item, 0])
        img = np.load(img_dir, allow_pickle=True)

        # MaxMin normalization.
        x = (img[0] - img[0].min()) / (img[0].max() - img[0].min())

        if self.transform is not None:
            x = self.transform(x)

        return x, torch.tensor(img[1])

# Config

I have tried batch_size 64, batch_size 128 and batch_size 200 and found that batch_size 100 may be a better choice

In [6]:
class Config:
    lr = 1e-4
    batch_size = 100
    num_epochs = 100

# Model Define

In [7]:
from torch import nn
from torchvision import models
from torch.nn import Conv2d

The model architecture is simple. Two full connection layer with ReLU and Sigmoid activation function are appended to the tail of ResNet18.

In [8]:
class ResNetReg(nn.Module):

    def __init__(self, pre_trained):
        """
        Args:
            pre_trained: True if want to use pretrained weight else false
        """
        super(ResNetReg, self).__init__()
        self.backbone = models.resnet18(pretrained=pre_trained)
        self.reg = nn.Sequential(
            nn.Linear(512, 128),
            nn.ReLU(),
            nn.Linear(128, 1),
            nn.Sigmoid()
        )
        self.backbone.conv1 = Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
        self.backbone.fc = self.reg

    def forward(self, x):
        return self.backbone(x)

# Train

In [9]:
import torch.optim as optim  # optimizer
from torch.utils.data import DataLoader
import torchvision.transforms as transforms  # many transforms used to data set
from tqdm import tqdm
from utils import check_accuracy, save_checkpoint, create_description, diff_rate
from torch.optim.lr_scheduler import ReduceLROnPlateau
import time
import sys

## Train function

- **lr**: Learning rate is initialized with 1e-4. And it will be decayed with a factor of 0.8 if the val loss does not decrease in 3 epochs.

- **cared_precision**: May be you have noticed that the value of the val mse and train mse is bigger than one. I noticed that y is a decimal much smaller than 1. So the mse is also small. In order to observe the change of MSE more clearly, I scaled the value of MSE. If you want to use the original value, you can set the cared_precision with value 1.

- **model save**: After 20 epoch, I will choose a model with the min validation mse to save.

- **r2**: just a reference criterion

In [12]:
def train_fn(cfg, train_loader, val_loader, model, optimizer, criterion, flatten=False):
    """
    train function
    """

    min_loss = 0
    for epoch in range(cfg.num_epochs):
        losses_per_batch = []
        y_true_list = []
        y_pred_list = []

        scheduler = ReduceLROnPlateau(
            optimizer,
            mode="min",
            factor=0.8,
            patience=3,
            verbose=True,
            min_lr=1e-5
        )

        start_time = time.time()
        p_bar = tqdm(enumerate(train_loader), leave=False, total=len(train_loader), file=sys.stdout)
        for batch_idx, (x_batch, y_batch) in p_bar:
            # move data to gpu if cuda is available
            x_batch = x_batch.to(device)
            y_batch = y_batch.to(device)

            if flatten:
                x_batch = x_batch.reshape((x_batch.shape[0], -1))

            # forward
            scores = model.forward(x_batch)

            loss = criterion(scores.ravel(), y_batch)
            losses_per_batch.append(loss.item())

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # used to compute diff rate
            y_true_list.extend(list(y_batch.cpu().numpy()))
            y_pred_list.extend(list(scores.detach().ravel().cpu().numpy()))

        # compute acc and loss
        val_loss, r2, _ = check_accuracy(val_loader, model, criterion, device, flatten)
        train_loss = sum(losses_per_batch) / len(losses_per_batch)

        scheduler.step(val_loss)

        # checkpoint
        if epoch == 20:
            min_loss = val_loss

        if epoch > 20 and val_loss < min_loss:
            check_point = {
                "state_dict": model.state_dict(),
                "optimizer": optimizer.state_dict()
            }
            print(f"epoch {epoch+1}: ", end="")
            save_checkpoint(check_point, "./checkpoint/lowest_loss_model.pth.tar")
            min_loss = val_loss

        # train_diff_rate = diff_rate(y_pred_list, y_true_list)
        end_time = time.time()
        print(f"epoch [{epoch + 1}/{cfg.num_epochs}], "
              f"train mse: {train_loss / cared_precision:.8f}, "
              f"val mse: {val_loss / cared_precision:.8f}, "
              f"val r2 score: {r2:.4f}, "
              # f"train diff rate: {train_diff_rate:.4f}, "
              # f"val diff rate: {rate:.4f}, "
              f"time used: {(end_time - start_time)/60:.4f}m")

## Training

### Data prepare

**Before run this part, please download data from <a href="https://drive.google.com/file/d/1hu472ALwGPBcTCXSAM0VoCWmTktg9j-j/view">here</a>**.

After you download data, the file should be organized as following:

```
|--- Test III
    |--- checkpoint
    |--- lens_data
        |--- xxx.npy
        |--- xxx.npy
        ...
    |--- test description.csv
    |--- Test III.ipynb
    |--- train description.csv
    |--- utils.py
    |--- val description.csv
    |--- y.npy
```

**The file `test description.csv`, `train description.csv` and `val description.csv` which i have provided. And you do not need to run the code `create_description(data_dir, [0.8, 0.1])`. But if you want to create new `xxx description.csv` file, you can open the comment.**

**The argument [0.8, 0.1] represents the trainset_rate 0.8 and valset_rate 0.1. The testset_rate will be induced.**

In [11]:
data_dir = "./lens_data"
# create_description(data_dir, [0.8, 0.1])

train_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.RandomAffine(degrees=(0, 180), translate=(0.2, 0.2)),
    transforms.Resize((150, 150))
])
val_transform = train_transform

# get data set
train_set = MyData("./train description.csv", data_dir, train_transform)
val_set = MyData("./val description.csv", data_dir, val_transform)

Compose(
    ToTensor()
    RandomAffine(degrees=[0.0, 180.0], translate=(0.2, 0.2))
    Resize(size=(150, 150), interpolation=bilinear, max_size=None, antialias=None)
)
Compose(
    ToTensor()
    RandomAffine(degrees=[0.0, 180.0], translate=(0.2, 0.2))
    Resize(size=(150, 150), interpolation=bilinear, max_size=None, antialias=None)
)


### train model

In [13]:
cfg = Config()
train_loader = DataLoader(train_set, batch_size=cfg.batch_size, shuffle=True)
val_loader = DataLoader(val_set, batch_size=cfg.batch_size)

# define model, loss function and optimizer.
model = ResNetReg(pre_trained=True).to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=cfg.lr)

train_fn(cfg, train_loader, val_loader, model, optimizer, criterion)

epoch [1/100], train mse: 488.92054504, val mse: 23.23353733, val r2 score: -0.1010, time used: 4.5347m
epoch [2/100], train mse: 23.35984664, val mse: 21.87382848, val r2 score: -0.0365, time used: 4.4552m
epoch [3/100], train mse: 22.28330613, val mse: 21.72169860, val r2 score: -0.0293, time used: 4.4615m
epoch [4/100], train mse: 21.86350809, val mse: 21.74953078, val r2 score: -0.0307, time used: 4.4202m
epoch [5/100], train mse: 21.45100737, val mse: 21.28794895, val r2 score: -0.0088, time used: 4.3763m
epoch [6/100], train mse: 21.34558314, val mse: 20.66337529, val r2 score: 0.0208, time used: 4.4063m
epoch [7/100], train mse: 20.75944963, val mse: 20.37258441, val r2 score: 0.0346, time used: 4.4357m
epoch [8/100], train mse: 20.45769606, val mse: 20.81315133, val r2 score: 0.0137, time used: 4.4096m
epoch [9/100], train mse: 20.41810515, val mse: 20.07014532, val r2 score: 0.0489, time used: 4.3871m
epoch [10/100], train mse: 19.99689106, val mse: 19.57481426, val r2 score: 

# evaluate

## simple baseline

In [14]:
from sklearn.metrics import r2_score

In [15]:
def baseline():
    y = np.load("y.npy")
    y_mean = [float(y.mean())] * len(y)
    print(f"mse: {((y - y.mean())**2).mean()/cared_precision:.8f}, ", end="")
    print(f"r2 score: {r2_score(list(y), y_mean):.4f}, ", end="")
    print(f"diff rate: {diff_rate(list(y), y_mean):.4f}.")

baseline()

mse: 21.34528930, r2 score: 0.0000, diff rate: 0.2795.


In [16]:
from torch.utils.data import DataLoader
import torchvision.transforms as transforms  # many transforms used to data set
from utils import load_checkpoint

**If you want to use my pretrained model, you can download <a href="https://drive.google.com/file/d/1FF7Zp6OjJVVq4hyp7wbdueRg6XwSaKL_/view?usp=sharing">here</a>. After you download it, put it into the './checkpoint'.**

In [17]:
checkpoint_file = "./checkpoint/lowest_loss_model.pth.tar"
data_dir = "./lens_data"
test_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Resize((150, 150))
])
test_set = MyData("./test description.csv", data_dir, val_transform)
test_loader = DataLoader(test_set, batch_size=cfg.batch_size)

Compose(
    ToTensor()
    RandomAffine(degrees=[0.0, 180.0], translate=(0.2, 0.2))
    Resize(size=(150, 150), interpolation=bilinear, max_size=None, antialias=None)
)


In [18]:
model = ResNetReg(pre_trained=False).to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=cfg.lr)
load_checkpoint(checkpoint_file, model, optimizer)

==> Loading checkpoint


In [19]:
test_time = 5

**If you do not want to use the cared_precision, you can remove it.**

In [22]:
%%time
test_loss_list = []
for i in range(test_time):
    test_loss, r2, _ = check_accuracy(test_loader, model, criterion, device)
    test_loss_list.append(test_loss)
    print(f"test mse loss: {test_loss/cared_precision}, r2: {r2}")

test mse loss: 7.306139547242726, r2: 0.6810318628581902
test mse loss: 7.2235091088115615, r2: 0.6846393051807977
test mse loss: 6.890963728087273, r2: 0.6991574210638662
test mse loss: 6.9443447187824, r2: 0.6968269379064189
test mse loss: 7.223070315506167, r2: 0.6846584618205398
CPU times: user 2min 56s, sys: 1.76 s, total: 2min 58s
Wall time: 1min 7s


In [23]:
print(f"mean: {np.mean(test_loss_list)/cared_precision}, std: {np.std(test_loss_list)/cared_precision}")

mean: 7.1176054836860265, std: 0.16689474510760155


**So here the mse on the test set is about 7.11761e-5**