<a href="https://colab.research.google.com/github/AlmTechSoftware/piper/blob/main/training_data/research/feynman_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Prerequisites

## Dataset download from temporary 0x0.st

In [21]:
!curl http://0x0.st/HLtv.zip > dataset.zip
!rm -r dataset
!unzip dataset.zip -d dataset

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 12.1M  100 12.1M    0     0  8200k      0  0:00:01  0:00:01 --:--:-- 8205k
rm: cannot remove 'dataset': No such file or directory
Archive:  dataset.zip
 extracting: dataset/README.dataset.txt  
 extracting: dataset/README.roboflow.txt  
   creating: dataset/test/
 extracting: dataset/test/Adam-still_jpeg.rf.90fbaea30ed3e09fd7fda5aa92ea6a68.jpg  
 extracting: dataset/test/Adam-still_jpeg.rf.90fbaea30ed3e09fd7fda5aa92ea6a68_mask.png  
 extracting: dataset/test/Board_jpg.rf.4d5d6a6f3c89eab87661fa09120877ef.jpg  
 extracting: dataset/test/Board_jpg.rf.4d5d6a6f3c89eab87661fa09120877ef_mask.png  
 extracting: dataset/test/MPC_2_jpg.rf.8263fb9cbb0b86857de4519a380ac872.jpg  
 extracting: dataset/test/MPC_2_jpg.rf.8263fb9cbb0b86857de4519a380ac872_mask.png  
 extracting: dataset/test/_classes.csv  
 extracting: dataset/test/falling_mp4-

## Dependencies

In [2]:
!pip install  opencv-python \
              scikit-image \
              python-dotenv \
              pycocotools \
              dataclasses-json \
              supervision \
              colored \
              wandb \
              torchvision \
              torchviz -Uq

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m72.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m59.3/59.3 kB[0m [31m7.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m44.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m49.4/49.4 kB[0m [31m4.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m188.5/188.5 kB[0m [31m17.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m215.6/215.6 kB[0m [31m12.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.7/62.7 kB[0m [31m6.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for torchviz (set

## WandB login

In [17]:
import wandb
wandb.login()



True

# Definitions

## Dataset Handler Definition

In [18]:
import os
import numpy as np

from torch import Tensor
from torch.utils.data import Dataset
from torchvision import transforms

from PIL import Image

from typing import Tuple

from glob import glob

class PNGMaskDataset(Dataset):
    def __init__(
        self,
        dataset_dir: str,
    ):
        self.dataset_dir = dataset_dir
        self.transform = transforms.Compose(
            [
                transforms.ToTensor(),
            ]
        )

        # Example _classes.csv
        """
        Pixel Value, Class
        0, background
        1, sghug
        2, w%
        """

        # Parse _classes.csv
        fh = open(os.path.join(self.dataset_dir, "_classes.csv"))
        lines = fh.readlines()
        fh.close()

        self.classes = map(lambda csv: csv.split(", "), lines)
        self.classes = map(lambda csv: (csv[1], int(csv[0])), self.classes)


        # Load images & masks
        self.mask_files = glob(os.path.join(self.dataset_dir, "*_mask.png"))

        all_files =  glob(os.path.join(self.dataset_dir, "*"))
        self.images = {}
        for mask_file in self.mask_files:
            img_file = mask_file.replace("_mask.png", ".jpg")
            base, ext = os.path.splitext(img_file)

            if img_file in all_files:
                self.images[base] = {
                    "image": img_file,
                    "mask": mask_file,
                }

        self.tensors = {}
        self.max_width = 0
        self.max_height = 0
        for base, image_info in self.images.items():
            # Load and preprocess the image
            image_path = image_info["image"]
            image = Image.open(image_path).convert("RGB")
            self.max_width = max(image.size[0], self.max_width)
            image = self.transform(image)

            # Load and preprocess the segmentation mask
            mask_path = image_info["mask"]
            mask = Image.open(mask_path).convert("RGB")
            self.max_height = max(image.size[1], self.max_height)
            mask = self.transform(mask)

            self.tensors[base] = {
                "image": image,
                "mask": mask,
            }

    @staticmethod
    def scale_and_pad_tensor(input_tensor: Tensor,
                            target_size: Tuple[int, int],
                            padding_value: float = 0) -> Tensor:

        input_size = input_tensor.size()
        scale_factor = (target_size[0] / input_size[0], target_size[1] / input_size[1])

        scaled_tensor = torch.nn.functional.interpolate(
            input_tensor.unsqueeze(0),
            scale_factor=scale_factor,
            mode='bilinear',
            align_corners=False,
        ).squeeze(0)

        pad_h = target_size[0] - scaled_tensor.size(1)
        pad_w = target_size[1] - scaled_tensor.size(2)

        if pad_h > 0 or pad_w > 0:
            padded_tensor = torch.nn.functional.pad(
                scaled_tensor,
                (0, pad_w, 0, pad_h),
                value=padding_value,
            )
        else:
            padded_tensor = scaled_tensor

        return padded_tensor

    def __str__(self):
        return str(self.images)

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image_info = list(self.tensors.values())[idx]
        image, mask = image_info["image"], image_info["mask"]

        image = self.scale_and_pad_tensor(image, (self.max_height, self.max_width))
        mask = self.scale_and_pad_tensor(mask, (self.max_height, self.max_width))

        return image, mask

In [19]:
import torchvision.transforms as T
_tensor_pil_transform = T.ToPILImage()

def tensor_to_pil(tensor):
    return _tensor_pil_transform(tensor)

### Example

In [23]:
dataset = PNGMaskDataset("dataset/train")

img, mask = dataset[0]

img = tensor_to_pil(img)
mask = tensor_to_pil(mask * 255)

display(img)
display(mask)

del dataset, img, mask

TypeError: ignored

## FeynMAN Model Definition

In [7]:
# Import needed libs

import torch
import torch.nn as nn

In [9]:
import torch
import torch.nn as nn

from torch import Tensor

class FeynMAN(nn.Module):
    def __init__(self, in_channels: int = 3, num_classes: int = 2):
        super(__class__, self).__init__()

        self.conv_layers = nn.Sequential(
            nn.Conv2d(in_channels, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True)
        )

        self.classifier = nn.Conv2d(256, num_classes, kernel_size=1)

    def forward(self, x: torch.Tensor) -> Tensor:
        x = self.conv_layers(x)
        x = self.classifier(x)
        return x

# Training

In [10]:
!nvidia-smi && echo "YES YOU CAN TRAIN WITH CUDA!"

Sun Aug 27 20:29:37 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.105.17   Driver Version: 525.105.17   CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   40C    P8    11W /  70W |      0MiB / 15360MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [11]:
LEARNING_RATE = 0.001
EPOCHS = 100
BATCH_SIZE = 8
DATASET_DIR = "dataset/train/"

In [12]:
from torch.utils.data import DataLoader

import torch
import torch.nn as nn
import torch.optim as optim

def train_model(
    model: nn.Module,
    dataset_dir: str = DATASET_DIR,
    epochs: int = EPOCHS,
    batch_size: int = BATCH_SIZE,
    learning_rate: float = LEARNING_RATE,
    device: torch.device = torch.device("cpu"),
):
    # start a new wandb run to track this script
    wandb.init(
        # set the wandb project where this run will be logged
        project="feynman_2",

        # track hyperparameters and run metadata
        config={
        "learning_rate": learning_rate,
        "epochs": epochs,
        "batch_size": batch_size,
        "architecture": "FeynMAN",
        "dataset": "feynman_v7i",
        }
    )

    # Watch the model for cool weights & biases
    wandb.watch(model)

    criterion = nn.CrossEntropyLoss() # nn.BCELoss() #### TODO: change?
    params = model.parameters()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    dataset = PNGMaskDataset(dataset_dir)
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

    print("BEGIN TRAINING!")
    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        print(epoch)

        for batch_idx, (images, masks) in enumerate(dataloader):
            images, masks = images.to(device), masks.to(device) #.float()
            print("#", batch_idx)

            # Forward pass
            outputs = model(images)
            print("post forward pass")
            loss = criterion(outputs, masks.squeeze(1))
            print("loss done")

            # Backpropagation and optimization
            optimizer.zero_grad()
            print("zero grad done")
            loss.backward()
            print("backward done")
            optimizer.step()
            print("step done")

            running_loss += loss.item()
            wandb.log({"Loss": loss})

            print(
                f"Epoch [{epoch+1}/{epochs}] Batch [{batch_idx+1}/{len(dataloader)}] - Loss: {loss:.4f}"
            )

        wandb.log({"Running loss": running_loss})

    print("END TRAINING!")
    wandb.finish()

### Model creation

In [13]:
model = FeynMAN()
device_type = "cuda" if torch.cuda.is_available() else "cpu"
#device_type = "cpu"
device = torch.device(device_type)

# Put the model on the GPU
model = model.to(device)

#### Debug

In [14]:
# Test the input etc.

input_height = 256
input_width = 256
random_input = torch.randn(1, 3, input_height, input_width)

# Forward pass
segmentation_map = model(random_input)
print("Segmentation map shape:", segmentation_map.shape)

Segmentation map shape: torch.Size([1, 3, 256, 256])


In [15]:
from torchviz import make_dot

ds = PNGMaskDataset("./dataset/test/")
x_tmp, _ = ds[0]
x_tmp = x_tmp.to(device)
y_tmp = model(x_tmp.unsqueeze(0))

make_dot(y_tmp, params=dict(list(model.named_parameters()))).render("model.png", format="png")

del model, ds, x_tmp, y_tmp, _

In [23]:
!pip install torchsummary


model = model.to(device)

from torchsummary import summary
summary(model, input_size=(3, 128, 128), batch_size=8, device="cuda")

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1          [8, 64, 128, 128]           1,792
              ReLU-2          [8, 64, 128, 128]               0
            Conv2d-3         [8, 128, 128, 128]          73,856
              ReLU-4         [8, 128, 128, 128]               0
            Conv2d-5         [8, 256, 128, 128]         295,168
              ReLU-6         [8, 256, 128, 128]               0
            Conv2d-7           [8, 3, 128, 128]             771
Total params: 371,587
Trainable params: 371,587
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 1.50
Forward/backward pass size (MB): 899.00
Params size (MB): 1.42
Estimated Total Size (MB): 901.92
----------------------------------------------------------------


### Start training loop

#### FCN ResNet 50 test

In [20]:
from torchvision.models.segmentation import fcn_resnet50

model = fcn_resnet50(num_classes=3, pretrained=False)
model = model.to(device)



#### Train the model

In [14]:
# Fix for some CUDA bullshit

import os
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'

In [15]:
print(f"Doing training on device type '{device_type}'!")

train_model(model, dataset_dir="./dataset/train/", device=device, batch_size=BATCH_SIZE, learning_rate=LEARNING_RATE, epochs=EPOCHS)

[34m[1mwandb[0m: Currently logged in as: [33mwych[0m ([33malmtech[0m). Use [1m`wandb login --relogin`[0m to force relogin


Doing training on device type 'cuda'!


BEGIN TRAINING!
0


RuntimeError: ignored