In [1]:
import numpy as np
import matplotlib.pyplot as plt
from preprocess_images import data_from_folder
from tqdm import tqdm

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms.v2 as T
from torchsummary import summary
import cv2 
import wandb
from config import LMDB_USE_COMPRESSION

import lmdb
import os
import msgpack
import lz4.frame

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(DEVICE)

cuda


In [2]:
# data = data_from_folder("H:/latest_real_data/real_data/real", grayscale=True, target_size=(125, 125))
# data = data_from_folder("./data/laser_x4_y6")
# data = data_from_folder("./data/test")

In [3]:
# for k in data:
#     print(k)
#     plt.imshow(data[k], vmin=0, vmax=255)
#     # plt.show()
#     break

In [4]:
def imname_to_target(name:str) -> tuple[float]:
    """Parses image names of format x{x_value}_y{y_value}.jpg"""
    name = name.split('.jpg')[0]
    x, y = name.split("_")
    x = float(x[1:])
    y = float(y[1:5])
    return x, y

In [5]:
config = {
    "experiment_name": "mixed_002step_smallext1_512bs_00001lr_aug+",
    "batch_size": 512,
    "lr": 0.001,
    "lr_scheduler_loop": 7,
    "epochs": 28,
    "use_amp": False,

    "data_folder": "/mnt/h/real_512_0_001step.lmdb",
    "dataset_type": "LMDBImageDataset",
    "dataset_config_flatten": False,
    "dataset_train_keys_fname": "002_mixed_keys_train.txt",
    "dataset_val_keys_fname": "002_mixed_keys_val.txt",

    "noise_level": 0.1,
    "jitter_brightness": 0.4, 
    "jitter_contrast": 0.1, 
    "jitter_saturation": 0.1, 
    "jitter_hue": 0.2,

    "data_collection_step": 0.001,
    "starting_checkpoint_fname": None,
    "checkpoint_folder": "./saved_models/real",

    "conv_stride": 2,
    "conv_kernel": 3,
    "conv_depth": 1,
}

In [6]:
# Prepare image filenames for FilesImageDataset
FilesImageDataset_fnames = []
if config["dataset_type"] == "FilesImageDataset":
    for file in os.listdir(config["data_folder"]):
            if not file.endswith((".png", ".jpg", ".jpeg")):
                  continue
            if config["data_collection_step"] == 0.002:
                # TODO: Attention! Mimmicking 0.002 step
                x, y = imname_to_target(file)
                if int(x*100)%2 != 0:
                    continue
                if int(y*100)%2 != 0:
                    continue
            FilesImageDataset_fnames.append(file)

In [7]:
class FlatGrayImageDataset(Dataset):
    def __init__(self, images, targets, exclude=True):
        self.images = images
        self.targets = targets

    def __len__(self):
        return len(self.images)
    
    def __getitem__(self, index):
        image = self.images[index]  # Get image (NumPy array)
        label = self.targets[index]  # Get corresponding tuple
        
        # Convert image to Tensor, flatten, and normalize [0,1]
        image = torch.from_numpy(image).flatten()
        image = image.float() / 255.0
        
        # Convert label tuple to Tensor
        label = torch.tensor(label, dtype=torch.float32)

        return image, label
    

class FilesImageDataset(Dataset):
    def __init__(self, data_dir, filenames):
        self.data_dir = data_dir
        self.filenames = filenames
        self.targets = [imname_to_target(s) for s in filenames]

    def __len__(self):
        return len(self.images)
    
    def __getitem__(self, index):
        fname = self.filenames[index]
        image = cv2.imread(os.path.join(self.data_dir, fname), cv2.IMREAD_GRAYSCALE)
        label = self.targets[index]  # Get corresponding tuple
        
        # Convert image to Tensor, flatten, and normalize [0,1]
        image = torch.from_numpy(image).flatten()
        image = image.float() / 255.0
        
        # Convert label tuple to Tensor
        label = torch.tensor(label, dtype=torch.float32)

        return image, label
    

class LMDBImageDataset(Dataset):
    def __init__(self, lmdb_path, transforms=None, keys_fname="keys.txt", flatten_data=True):
        self.keys = None

        # Data augmentation
        self.transforms = transforms

        # Read text keys from file
        with open(os.path.join(lmdb_path, keys_fname)) as f:
            self.keys = f.readlines()
            if self.keys[-1] == '':
                self.keys = self.keys[:-1]
        for i in range(len(self.keys)):
            self.keys[i] = self.keys[i].replace("\n", "")

        # Get labels from text keys
        self.labels = []
        # self.labels = [imname_to_target(key) for key in self.keys]
        for i, key in enumerate(self.keys):
            try:
                self.labels.append(imname_to_target(key))
            except Exception as e:
                print("i:", i)
                print("name:", key)
                raise e

        # Encode keys
        for i in range(len(self.keys)):
            self.keys[i] = self.keys[i].encode()

        self.lmdb_path = lmdb_path
        self.flatten_data = flatten_data

    def open_lmdb(self):
        self.env = lmdb.open(self.lmdb_path, readonly=True, create=False, lock=False, readahead=False, meminit=False)
        self.txn = self.env.begin()

    def close(self):
        self.env.close()

    def __len__(self):
        return len(self.keys)
    
    def add_coord_channels(self, image_tensor):
        _, H, W = image_tensor.shape

        x_coords = torch.linspace(-1, 1, W, device=image_tensor.device)
        y_coords = torch.linspace(-1, 1, H, device=image_tensor.device)

        x_coords = x_coords.view(1, 1, W).expand(1, H, W)
        y_coords = y_coords.view(1, H, 1).expand(1, H, W)

        return torch.cat([image_tensor, x_coords, y_coords], dim=0)
    
    def __getitem__(self, index):
        if not hasattr(self, 'txn'):
            print("Opening lmdb txn")
            self.open_lmdb()
        key = self.keys[index]  # Get corresponding tuple
        label = self.labels[index]
        
        img_bytes = self.txn.get(key)
        
        if img_bytes is None:
            raise KeyError(f"Image {key} not found in LMDB!")

        if LMDB_USE_COMPRESSION:
            img_bytes = lz4.frame.decompress(img_bytes)

        image = np.array(msgpack.unpackb(img_bytes, raw=False), dtype=np.uint8)

        # Convert image to Tensor, and normalize [0,1]
        
        if not self.flatten_data:
            image = torch.from_numpy(np.array([image])).float()
        image = image / 255.0

        # Augmenation
        if self.transforms is not None:
            image = self.transforms(image)

        if self.flatten_data:
            image = torch.from_numpy(image).flatten().float()

        # image = self.add_coord_channels(image)

        # Convert label tuple to Tensor
        x, y = label
        x = (x + 2) / 5.7
        y = (y + 2) / 4
        label = (x, y)
        label = torch.tensor(label, dtype=torch.float32)

        return image, label

In [8]:
train_transforms = T.Compose([
            T.ColorJitter(config["jitter_brightness"],
                          config["jitter_contrast"],
                        #   config["jitter_saturation"],
                        #   config["jitter_hue"]
                        ),
            T.GaussianNoise(sigma=config["noise_level"]),
        ])


match config["dataset_type"]:
    case "LMDBImageDataset":
        train_dataset = LMDBImageDataset(config["data_folder"], transforms=train_transforms, flatten_data=config["dataset_config_flatten"], keys_fname=config["dataset_train_keys_fname"])
        val_dataset = LMDBImageDataset(config["data_folder"], flatten_data=config["dataset_config_flatten"], keys_fname=config["dataset_val_keys_fname"])
    case "FilesImageDataset":
        train_dataset = FilesImageDataset(config["data_folder"], FilesImageDataset_fnames)
        val_dataset = FilesImageDataset(config["data_folder"], FilesImageDataset_fnames)
    case _ :
        raise("Wrong dataset type")
train_data_loader = DataLoader(train_dataset, 
                         batch_size=config["batch_size"], 
                         shuffle=True, 
                         num_workers=8, 
                         pin_memory=True, 
                         prefetch_factor=4, 
                         persistent_workers=True
                        )
val_data_loader = DataLoader(val_dataset,
                             batch_size=config["batch_size"],
                             shuffle=False,
                             num_workers=4,
                             persistent_workers=True,
                             pin_memory=True
                            )

In [9]:
print(train_dataset[0][0].shape)
print(len(train_dataset))

Opening lmdb txn
torch.Size([1, 512, 512])
136800


In [None]:
class SimpleCNN(nn.Module):
    def __init__(self, output_size, in_channels):
        super(SimpleCNN, self).__init__()
        self.sec1 = nn.Sequential(
            nn.Conv2d(in_channels, 32, 5, 2), # 3, 250 -> 32, 125
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2), # 32, 125 -> 32, 62

            nn.Conv2d(32, 64, 3, 2), # 32, 62 -> 64, 31
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2), # 64, 31 -> 64, 15

            nn.Conv2d(64, 128, 3, 2), # 64, 15 -> 128, 7
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2), # 128, 7 -> 128, 3
        )

        self.sec2 = nn.Sequential(
            nn.Linear(128*3*3, 256),
            nn.ReLU(),
            nn.Linear(256, output_size),
        )

    def forward(self, x):
        x = self.sec1(x)
        x = x.view(x.size(0), -1)  # Flatten
        x = self.sec2(x)

        return x
    
class SimpleFC(nn.Module):
    def __init__(self, in_features, out_features):
        super(SimpleFC, self).__init__()
        self.relu = nn.ReLU()
        self.layers = nn.Sequential(
            nn.Linear(in_features, 1024), # 262,144 -> 1024
            nn.BatchNorm1d(1024),
            self.relu,
            nn.Linear(1024, 256),
            nn.BatchNorm1d(256),
            self.relu,
            nn.Linear(256, 32),
            nn.BatchNorm1d(32),
            self.relu,
            nn.Linear(32, out_features),
        )
    def forward(self, x):
        return self.layers.forward(x)
    
class GradientSimpleFC(nn.Module):
    def __init__(self, in_features, out_features):
        super(GradientSimpleFC, self).__init__()
        self.relu = nn.ReLU()
        self.layers = nn.Sequential(
            nn.Linear(in_features, 1024), # 262,144 -> 1024
            nn.BatchNorm1d(1024),
            self.relu,
            nn.Linear(1024, 256),
            nn.BatchNorm1d(256),
            self.relu,
            nn.Linear(256, 32),
            nn.BatchNorm1d(32),
            self.relu,
            nn.Linear(32, out_features),
        )
    def forward(self, x):
        return self.layers.forward(x)
    
class SmallFC(nn.Module):
    def __init__(self, in_features, out_features):
        super(SmallFC, self).__init__()
        self.relu = nn.ReLU()
        self.layers = nn.Sequential(
            nn.Linear(in_features, 128), # 512*512 -> 128
            nn.BatchNorm1d(128),
            self.relu,
            nn.Linear(128, 32),
            nn.BatchNorm1d(32),
            self.relu,
            nn.Linear(32, out_features),
        )
    def forward(self, x):
        return self.layers.forward(x)
    
class SmallExt(nn.Module):
    def __init__(self, in_features, out_features):
        super(SmallExt, self).__init__()
        k_size = config["conv_kernel"]
        step = config["conv_stride"]
        pad = k_size//2
        depth = config["conv_depth"]
        flat = int((in_features-k_size + 2*pad)//step + 1)
        self.relu = nn.ReLU()
        self.layers = nn.Sequential(
            nn.Conv2d(1, depth, k_size, step, padding=pad),
            self.relu,
            nn.Flatten(),
            nn.Linear(flat*flat * depth, 128), # 512*512 -> 128
            nn.BatchNorm1d(128),
            self.relu,
            nn.Linear(128, 32),
            nn.BatchNorm1d(32),
            self.relu,
            nn.Linear(32, out_features),
        )
    def forward(self, x):
        return self.layers.forward(x)
    

# model = CoordWideConv().to(DEVICE)
# model = SimpleFC(512*512, 2).to(DEVICE)
# model = SmallFC(512*512, 2).to(DEVICE)
# model = SmallExt(512, 2).to(DEVICE)
model = GradientSimpleFC(512, 2).to(DEVICE)

summary(model, (1,512,512), config["batch_size"])
        

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [512, 1, 256, 256]              10
              ReLU-2         [512, 1, 256, 256]               0
              ReLU-3         [512, 1, 256, 256]               0
           Flatten-4               [512, 65536]               0
            Linear-5                 [512, 128]       8,388,736
       BatchNorm1d-6                 [512, 128]             256
              ReLU-7                 [512, 128]               0
              ReLU-8                 [512, 128]               0
            Linear-9                  [512, 32]           4,128
      BatchNorm1d-10                  [512, 32]              64
             ReLU-11                  [512, 32]               0
             ReLU-12                  [512, 32]               0
           Linear-13                   [512, 2]              66
Total params: 8,393,260
Trainable param

- ansemble: 
- transfer learning

In [11]:
optimizer = optim.AdamW(model.parameters(), config["lr"], weight_decay=0.001)
criterion = nn.MSELoss()
scheduler = optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, config["lr_scheduler_loop"], eta_min=0.00001)
# scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3)
# scaler = torch.cuda.amp.GradScaler("cuda", enabled=config["use_amp"])

In [12]:
def save_model(model:torch.nn.Module, fname="best_model.pth", path="./saved_models/real"):
    torch.save(model.state_dict(), os.path.join(path,fname))

def load_model(model:torch.nn.Module, fname="best_model.pth", path="./saved_models/real"):
    model.load_state_dict(torch.load(os.path.join(path,fname), weights_only=False))
    return model

In [1]:
wandb.login(key="a41d74c58ab2f0d2c2bbdb317450ab14a8ad9d4e")
wandb.init(
    project="multireflection",
    name=config["experiment_name"],
    config=config,
)
wandb.watch(model, log='all', log_freq=100)

NameError: name 'wandb' is not defined

In [14]:
def train(model, train_loader, val_loader, optimizer:optim.Optimizer, criterion, scheduler:optim.lr_scheduler.CosineAnnealingWarmRestarts, best_loss=None):
    
    if best_loss is None:
        best_loss = 1000000000
    best_model = None

    for epoch in range(config['epochs']):
        # Training Loop
        model.train()
        running_loss = 0.0
        for images, labels in tqdm(train_loader):
            # with torch.autocast(device_type=DEVICE, dtype=torch.float16, enabled=config["use_amp"]):
            images, labels = images.to(DEVICE), labels.to(DEVICE)

            optimizer.zero_grad(set_to_none=True)
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            
            # scaler.scale(loss).backward()
            # scaler.step(optimizer)
            # scaler.update()
            running_loss += loss.item()
            
        last_lr = scheduler.get_last_lr()[0]
        

        avg_train_loss = running_loss / len(train_loader)

        # Validation
        model.eval()
        val_loss = 0.0
        for images, labels in tqdm(val_loader):
            images, labels = images.to(DEVICE), labels.to(DEVICE)
            with torch.inference_mode():
                out = model(images)
                loss = criterion(out, labels)
            val_loss += loss.item()
        avg_val_loss = val_loss/len(val_loader)

        scheduler.step()

        if avg_val_loss < best_loss:
            best_model = model
            best_loss = avg_val_loss
            save_model(model, fname=config["experiment_name"]+"_best_model.pth")

        print(f"Epoch {epoch + 1}/{config['epochs']}, Train Loss: {avg_train_loss:.4f}, Val Loss: {avg_val_loss:.4f}")

        # ✅ Log Training Loss
        wandb.log({"Train Loss": avg_train_loss, "Val Loss": avg_val_loss, "LR": last_lr, "best_loss":best_loss})

    print("Best loss:", best_loss)
    return model, best_model, best_loss


In [15]:
if config["starting_checkpoint_fname"] is not None:
    model = load_model(model, fname=config["starting_checkpoint_fname"], path=config["checkpoint_folder"])

In [16]:
best_loss = None
# best_loss = 0.0002398806914137568

In [None]:
model, best_model, best_loss = train(model, train_data_loader, val_data_loader, optimizer, criterion, scheduler, best_loss)

  7%|▋         | 19/268 [01:30<19:49,  4.78s/it] 


AttributeError: 'NoneType' object has no attribute '_log'

: 

In [18]:
wandb.finish()

0,1
LR,██▇▅▃▂▁██▇▅▃▂▁██▇▅▃▂▁██▇▅▃▂▁
Train Loss,█▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Val Loss,█▄▄▂▂▂▁▄▇▁▂▂▁▁▆▂▂▃▂▁▁▃▅▂▂▃▁▁
best_loss,█▄▄▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
LR,6e-05
Train Loss,0.00062
Val Loss,0.0007
best_loss,0.0007


In [19]:
def prepare_test_input(data_folder:str, fnames:list[str], device):
    tensors = []
    targets = []
    original_images = [] # for visualization
    for fname in fnames:
        img = cv2.imread(data_folder+fname, cv2.IMREAD_GRAYSCALE)
        original_images.append(img)
        img = torch.from_numpy(img).flatten()
        img = img.float() / 255.0
        tensors.append(img)

        targets.append(imname_to_target(fname))
    result_tensor = torch.stack(tensors).to(device)

    return result_tensor, targets, original_images

In [20]:
# Prepare test images
data_folder = "data/125x125_laser_x4_y6/"
fnames = [
    "x-3.00_y-2.10.jpg",
    "x-2.90_y1.40.jpg",
    "x-1.70_y2.00.jpg",

    "x-1.10_y1.40.jpg",
    "x-0.60_y2.10.jpg",
    "x1.30_y-0.90.jpg",

    "x-0.00_y-0.10.jpg",
    "x-0.00_y0.00.jpg",
    "x0.80_y0.10.jpg",
]
test_input, keys, original_images = prepare_test_input(data_folder, fnames, DEVICE)
print(test_input.shape)
print(keys)


torch.Size([9, 15625])
[(-3.0, -2.1), (-2.9, 1.4), (-1.7, 2.0), (-1.1, 1.4), (-0.6, 2.1), (1.3, -0.9), (-0.0, -0.1), (-0.0, 0.0), (0.8, 0.1)]


In [21]:
# Try best model
best.eval()

predictions:torch.Tensor = best.forward(test_input)
predictions = predictions.detach().cpu().numpy()
print(predictions)

NameError: name 'best' is not defined

In [None]:
# Visualize
fig, axes = plt.subplots(nrows=3, ncols=3, figsize=(10, 10))
for i, ax in enumerate(axes.flat):
    ax.imshow(original_images[i], cmap='gray')
    ax.set_title(fnames[i])
    ax.axis("off")

    # Display prediction
    ax.text(35,15,f"x{predictions[i][0]:.2f}_y{predictions[i][1]:.2f}", color="white")

plt.show()

In [None]:
ntargets = np.array(train_dataset.targets)
print(ntargets.shape)

evaluate_on_train_loader = DataLoader(train_dataset, batch_size=config["batch_size"], shuffle=False)