# ResNet-Based mmFace

In [1]:
import torch
import torch.nn as nn
from torchvision.transforms import ToTensor
from torchvision.models import resnet18, ResNet18_Weights

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)
num_classes = 17

class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1, downsample=None):
        super(ResidualBlock, self).__init__()
        self.conv1 = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU()
        )
        self.conv2 = nn.Sequential(
            nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(out_channels)
        )
        self.downsample = downsample
        self.relu = nn.ReLU()
        self.out_channels = out_channels
    
    def forward(self, x):
        residual = x
        out = self.conv1(x)
        out = self.conv2(out)
        if self.downsample:
            residual = self.downsample(x)
        out += residual
        out = self.relu(out)

        return out
    
class ResNet(nn.Module):
    def __init__(self, block, layers, num_classes=50):
        super(ResNet, self).__init__()
        self.inplanes = 64
        self.conv1 = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3),
            nn.BatchNorm2d(64),
            nn.ReLU()
        )
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer0 = self._make_layer(block, 64, layers[0], stride=1)
        self.layer1 = self._make_layer(block, 128, layers[1], stride=2)
        self.layer2 = self._make_layer(block, 256, layers[2], stride=2)
        self.layer3 = self._make_layer(block, 512, layers[3], stride=2)
        self.avgpool = nn.AvgPool2d(1, stride=1)
        self.fc = nn.Linear(512, num_classes)
    
    def _make_layer(self, block, planes, blocks, stride=1):
        downsample = None
        if stride != 1 or self.inplanes != planes:
            downsample = nn.Sequential(
                nn.Conv2d(self.inplanes, planes, kernel_size=1, stride=stride),
                nn.BatchNorm2d(planes)
            )
        layers = [block(self.inplanes, planes, stride, downsample)] + [block(planes, planes) for _ in range(blocks-1)]
        self.inplanes = planes

        return nn.Sequential(*layers)

    def forward(self, x):
        # print(x.shape)
        # x = self.conv1(x)
        # print(x.shape)
        # x = self.maxpool(x)
        # print(x.shape)
        # x = self.layer0(x)
        # print(x.shape)
        # x = self.layer1(x)
        # print(x.shape)
        # x = self.layer2(x)
        # print(x.shape)
        # x = self.layer3(x)
        # print(x.shape)
        
        # x = self.avgpool(x)
        # print(x.shape)
        # x = x.view(x.size(0), -1)
        # print(x.shape)
        # x = self.fc(x)
        # print(x.shape)
        
        x = self.conv1(x)
        x = self.maxpool(x)
        x = self.layer0(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)

        # TODO: MAYBE STOP HERE FOR 512D EMBEDDING???
        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)

        return x

class MMFace3D(nn.Module):
    def __init__(self, num_classes=50):
        super(MMFace3D, self).__init__()
        self.conv1 = nn.Sequential(
            nn.Conv3d(3, 64, kernel_size=3, stride=2, padding=1),
            nn.BatchNorm3d(64),
            nn.ReLU()
        )
        self.conv2 = nn.Sequential(
            nn.Conv3d(64, 128, kernel_size=3, stride=2, padding=1),
            nn.BatchNorm3d(128),
            nn.ReLU()
        )
        self.conv3 = nn.Sequential(
            nn.Conv3d(128, 256, kernel_size=3, stride=2, padding=1),
            nn.BatchNorm3d(256),
            nn.ReLU()
        )
        self.conv4 = nn.Sequential(
            nn.Conv3d(256, 512, kernel_size=3, stride=2, padding=1),
            nn.BatchNorm3d(512),
            nn.ReLU()
        )
        self.fc1 = nn.Linear(512, num_classes)
    
    def forward(self, x):
        print(x.shape)
        x = self.conv1(x)
        print(x.shape)
        x = self.maxpool(x)
        print(x.shape)
        x = self.conv2(x)
        print(x.shape)
        x = self.conv3(x)
        print(x.shape)
        x = self.conv4(x)
        print(x.shape)
        x = x.view(x.size(0), -1)
        x = self.fc1(x)
        print(x.shape)

        
        # x = self.conv1(x)
        # x = self.conv2(x)
        # x = self.conv3(x)
        # x = self.conv4(x)
        # x = x.view(x.size(0), -1)
        # x = self.fc1(x)
    
        return x

cuda


## Loading Dataset and Model

In [3]:
from torch.utils.data import DataLoader
from torch.utils.data.sampler import SubsetRandomSampler
from torchvision import transforms
import os
import numpy as np
from glob import glob
import json
from utils import get_crd_data, by_experiment
from tqdm import tqdm

def collate_crds(path, subjects, frames):
    exact_frames = []
    for subject in tqdm(subjects):
        crd_subject_path = f"data/{frames}/crd{frames}_{subject}.npy"
        if not os.path.exists(crd_subject_path):
            data = None
            subject_radars = sorted(glob(rf"{path}\{subject}\*_radar.json"), key=by_experiment)
            for file in subject_radars:
                with open(file, 'r') as f:
                    exp_crd = np.einsum("fcrd->frdc", np.abs(get_crd_data(json.load(f), num_chirps_per_burst=16)[:250]).astype(np.float32))
                    if data is None:
                        data = exp_crd
                    else:
                        data = np.concatenate((data, exp_crd))

            np.save(crd_subject_path, data)
            exact_frames.append(str(data.shape[0]))
            del data

    if len(exact_frames) > 0:
        with open(f"data/{frames}/exact_frames.txt", 'a') as ef:
            ef.write('\n'.join(exact_frames))

class MMFaceDataset(torch.utils.data.Dataset):
    def __init__(self, data_path, frames_file, subjects=[0], frames=250, transform=None, target_transform=None):
        if len(os.listdir(f"data/{frames}"))-1 != len(subjects):
            collate_crds(data_path, subjects, frames)
        
        with open(frames_file, 'r') as f:
            self.exact_frames = [int(x) for x in f.read().splitlines()]
        
        self.subjects = subjects
        self.frames = frames
        self.data_path = data_path
        self.transform = transform
        self.target_transform = target_transform
    
    def __len__(self):
        return sum(self.exact_frames)

    # TODO: MAY NEED TO TRAIN AS "VIDEO DATA" USING ALL FRAMES INSTEAD OF INDIVIDUAL
    def __getitem__(self, idx):
        _get_file = lambda self, i, s=0: (s, i) if s >= len(self.exact_frames) or i < self.exact_frames[s] else _get_file(self, i-self.exact_frames[s], s+1)
        subject, mod_idx = _get_file(self, idx)
        crd = np.load(f"data/{self.frames}/crd{self.frames}_{subject}.npy")[mod_idx]
        label = subject
        if self.transform:
            crd = self.transform(crd)
        if self.target_transform:
            label = self.target_transform(label)
        
        return crd, label

def normalise(x):
    return (x - np.min(x)) / (np.max(x) - np.min(x))

def load_dataset(path, subjects=[0], frames=250, batch_size=64, train_split=0.8, test_split=0.1, shuffle=True):
    dataset = MMFaceDataset(path, f"data/{frames}/exact_frames.txt", subjects, frames, transform=transforms.Compose([normalise, ToTensor()]))

    indices = list(range(len(dataset)))
    train_portion = int(train_split*len(dataset))
    val_portion = train_portion + int(test_split*len(dataset))

    if shuffle:
        np.random.seed(333)
        np.random.shuffle(indices)
    
    train_idx, val_idx, test_idx = indices[:train_portion], indices[train_portion:val_portion], indices[val_portion:]
    # *16 Subjects x 15 Scenarios x 250 frames = 60,000 total frames
    train_loader = DataLoader(dataset, batch_size=batch_size, sampler=SubsetRandomSampler(train_idx))
    val_loader = DataLoader(dataset, batch_size=batch_size, sampler=SubsetRandomSampler(val_idx))
    test_loader = DataLoader(dataset, batch_size=batch_size, sampler=SubsetRandomSampler(test_idx))
    
    return train_loader, val_loader, test_loader

def load_model(name, model, optimiser):
    epoch, loss = 0, None
    try:
        checkpoint = torch.load(f"models/{name}")
        model.load_state_dict(checkpoint["model_state_dict"])
        optimiser.load_state_dict(checkpoint["optimiser_state_dict"])
        epoch = checkpoint["epoch"]
        loss = checkpoint["loss"]
    except Exception as ex:
        print(ex)
    
    return epoch, loss

frames = 250
train, validation, test = load_dataset(path=os.path.relpath("../../Soli/soli_realsense/data"), subjects=list(range(num_classes)), frames=frames, batch_size=32)

## Hyperparameters + Loss + Optimiser

In [10]:
num_epochs = 20
learning_rate = 0.01

# model = ResNet(ResidualBlock, [3, 4, 6, 3], num_classes).to(device)
# model = MMFace3D(num_classes).to(device)

model = resnet18(weights=ResNet18_Weights.DEFAULT)
model.fc = nn.Linear(model.fc.in_features, num_classes)
model = model.to(device)

# Loss + Optimiser
criterion = nn.CrossEntropyLoss()
optimiser = torch.optim.SGD(model.parameters(), lr=learning_rate, weight_decay=0.001, momentum=0.9)

model_name = "mmFace250-resnet18.pt"
cur_epoch, cur_loss = load_model(model_name, model, optimiser)
for param in model.parameters():
    param.requires_grad = True

if cur_loss:
    print(cur_epoch, cur_loss.item())

11 3.2780444622039795


## Training
- Load training data in ***batches*** for every epoch, moving to `device`
  - `train_loader` = `[([data*], [labels*])*]`
- `model(data)` to predict label, then calculate loss between predictions and ground truth labels using `criterion(preds, labels)`
- Backpropagate to learn with `loss.backward()`, and update weights with `optimiser.step()`. Gradients must be reset to 0 after every update with `optimiser.zero_grad()` otherwise gradients will accumulate (default PyTorch).
- After every epoch, test model on validation set but can turn off gradients for faster evaluation using `with torch.no_grad()`.

In [11]:
import gc

for epoch in range(cur_epoch, num_epochs):
    print(f"\nEpoch [{epoch}/{num_epochs-1}]:")
    model.train()

    # Running Loss and Accuracy
    running_loss = 0.
    running_acc = 0.
    total = 0.

    for data, labels in tqdm(train):
        data = data.to(device)
        labels = labels.to(device)

        # Forward Pass
        outputs = model(data)
        _, predicted = torch.max(outputs.data, 1)
        loss = criterion(outputs, labels)

        # Backward Pass and Optimise
        optimiser.zero_grad()
        loss.backward()
        optimiser.step()

        running_loss += loss.item()
        total += labels.size(0)
        running_acc += (predicted == labels).sum().item()

        del data, labels, outputs
        torch.cuda.empty_cache()
        gc.collect()
    
    print(f"\tLoss: {loss.item():.4f}")
    print(f"\tTrain Loss: {running_loss / len(train)}")
    print(f"\tTrain Accuracy: {100*running_acc / total}%")

    torch.save({"epoch": epoch+1,
                "model_state_dict": model.state_dict(),
                "optimiser_state_dict": optimiser.state_dict(),
                "loss": loss},
                f"models/{model_name}")
    
    # Validation
    print("Validation")
    model.eval()
    with torch.no_grad():
        correct = 0
        total = 0
        for data, labels in tqdm(validation):
            data = data.to(device)
            labels = labels.to(device)
            outputs = model(data)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            del data, labels, outputs
        
        
        print(f"\tAccuracy of mmFace: {100*correct/total:.2f}%")


Epoch [11/19]:


100%|██████████| 1594/1594 [13:40<00:00,  1.94it/s]


	Loss: 2.8566
	Train Loss: 2.835771550946732
	Train Accuracy: 9.706453320783575%
Validation


100%|██████████| 200/200 [01:11<00:00,  2.80it/s]


	Accuracy of mmFace: 10.37%

Epoch [12/19]:


100%|██████████| 1594/1594 [13:28<00:00,  1.97it/s]


	Loss: 2.7980
	Train Loss: 2.7598005039629103
	Train Accuracy: 10.033923564131223%
Validation


100%|██████████| 200/200 [01:08<00:00,  2.92it/s]


	Accuracy of mmFace: 11.01%

Epoch [13/19]:


100%|██████████| 1594/1594 [12:58<00:00,  2.05it/s]


	Loss: 2.6304
	Train Loss: 2.7319253463218622
	Train Accuracy: 10.34766750985352%
Validation


100%|██████████| 200/200 [01:08<00:00,  2.91it/s]


	Accuracy of mmFace: 9.48%

Epoch [14/19]:


100%|██████████| 1594/1594 [13:11<00:00,  2.01it/s]


	Loss: 2.6234
	Train Loss: 2.7188341293011886
	Train Accuracy: 10.547679275251486%
Validation


100%|██████████| 200/200 [01:09<00:00,  2.89it/s]


	Accuracy of mmFace: 10.65%

Epoch [15/19]:


100%|██████████| 1594/1594 [13:12<00:00,  2.01it/s]


	Loss: 2.7622
	Train Loss: 2.7081953999089774
	Train Accuracy: 11.269290350412769%
Validation


100%|██████████| 200/200 [01:09<00:00,  2.89it/s]


	Accuracy of mmFace: 11.11%

Epoch [16/19]:


100%|██████████| 1594/1594 [13:18<00:00,  2.00it/s]


	Loss: 2.6189
	Train Loss: 2.6942078056120065
	Train Accuracy: 11.734023570013923%
Validation


100%|██████████| 200/200 [01:09<00:00,  2.86it/s]


	Accuracy of mmFace: 12.72%

Epoch [17/19]:


100%|██████████| 1594/1594 [13:19<00:00,  1.99it/s]


	Loss: 2.6517
	Train Loss: 2.684222459942663
	Train Accuracy: 12.00266682353864%
Validation


100%|██████████| 200/200 [01:09<00:00,  2.86it/s]


	Accuracy of mmFace: 12.32%

Epoch [18/19]:


100%|██████████| 1594/1594 [13:58<00:00,  1.90it/s]


	Loss: 2.5177
	Train Loss: 2.6720423904835355
	Train Accuracy: 12.44779104653215%
Validation


100%|██████████| 200/200 [01:19<00:00,  2.51it/s]


	Accuracy of mmFace: 12.77%

Epoch [19/19]:


100%|██████████| 1594/1594 [13:39<00:00,  1.95it/s]


	Loss: 2.7929
	Train Loss: 2.6643466918052665
	Train Accuracy: 12.543875129909603%
Validation


100%|██████████| 200/200 [01:10<00:00,  2.85it/s]

	Accuracy of mmFace: 12.91%





## Testing

In [5]:
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for data, labels in tqdm(test):
        data = data.to(device)
        labels = labels.to(device)
        outputs = model(data)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        del data, labels, outputs
    
    print(f"Test Accuracy of mmFace: {100*correct/total:.4f}%")

100%|██████████| 376/376 [01:42<00:00,  3.68it/s]

Test Accuracy of mmFace: 12.2480%



