In [None]:
from PIL import Image
import torch
cudas = torch.cuda.device_count()
for i in range(cudas):
    print(torch.cuda.get_device_properties(i).name)
if cudas < 1: 
    print("No GPU found")
    exit(1)

try: print("Is cuda available? ", torch.cuda.is_available())
except Exception as e: print("Error checking cuda availability: ", e)

try: print("Is MPS? ", torch.backends.mps.is_available())
except Exception as e: print("Error checking mips availability: ", e)

try: print("Is using HIP version? ", torch.version.hip)
except Exception as e: print("Error checking hip availability: ", e)

try: print("Is using CUDA version? ", torch.version.cuda)
except Exception as e: print("Error checking cuda availability: ", e)

torch.backends.cudnn.enabled = True
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.workspace_limit = 256 * 1024 * 1024

In [None]:
from TranslatedLoader import GetDataset

csv_dir = '~/KNN/data/data_list/data_list/'
translation_table = {
    './val/': '~/KNN/data/valRGB/',
    './train/': '~/KNN/data/trainRGB/train/',
    './train_normal/': '~/KNN/data/trainNorm/',
    './val_normal/': '~/KNN/data/valNorm/',
}


In [None]:
from model_unet import UNet
from train_model import TrainModel, LoadModel, visualize_predictions, ValidateModel
from vgg_model import VGGNormal
from dataset import load_data
from WriteWrapper import WriteWrapper
from torch.utils.data import DataLoader
import os

FORCE_TRAIN = True
FORCE_FULL_TRAIN = False
full_model_name = 'model'
mini_model_name = 'model_mini'
model_version = '_epoch_1.pth'

file_out = WriteWrapper('train_log.txt')
#file_out = WriteWrapper(None) # This will print all to stdout
train_dataset = GetDataset(csv_dir + 'train_indoors.csv', translation_table, 128)
validator = GetDataset(csv_dir + 'val_indoors.csv', translation_table, 128)

def TryModel(model_name, criterion, useLarge, useGlobalFeatures, finisherLR):
    try:
        model = UNet(file_out, useLarge=useLarge, useGlobalFeatures=useGlobalFeatures)
        model.InitWeights()
        model.SetActive(True)
        train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=6, pin_memory=True)
        model = TrainModel(model, model_name, train_loader, file_out, epochs=16, train_dataset=validator, LR = 1e-3, lrShrink=0.9, criterion=criterion, finisherLR=finisherLR)

        image_ids = []
        for i in range(6): image_ids.append(50+10*i)
        visualize_predictions(image_ids, train_dataset, model, 3)

        image_ids = []
        for i in range(6): image_ids.append(10+20*i)
        visualize_predictions(image_ids, validator, model, 4)

        validator_loader = DataLoader(validator, batch_size=1, shuffle=False, num_workers=0, pin_memory=True)
        ValidateModel(model, validator_loader)
    except Exception as e:
        print("Error in model training: ", e)
        file_out("Error in model training: " + str(e))
        file_out('=' * 20)
        file_out('\n' * 4)
        return

In [None]:
import torch.nn as nn
from model_unet import AngularLoss, CombinedLoss

for criterion in [nn.MSELoss(), AngularLoss(), CombinedLoss()]:
    for finisherLR in [0.2, 0.9, 1]:
        for useLarge in [True, False]:
            for useGlobalFeatures in [True, False]:
                print(f"Training with criterion: {criterion}, useLarge: {useLarge}, useGlobalFeatures: {useGlobalFeatures}, finisherLR: {finisherLR}")
                modelName = f"model_{criterion.__class__.__name__}_L_{useLarge}_G_{useGlobalFeatures}_f_{finisherLR}"
                TryModel(modelName, criterion, useLarge, useGlobalFeatures, finisherLR)