In [1]:
import matplotlib.pyplot as plt
from pathlib import Path
from torch.utils.data import DataLoader
import torch as th
from data_preprocessing.ImageDataset import CustomImageDataset
from models.autoencoder import Autoencoder_conv
from models.losses import batch_MSE_loss
import torch.optim as optim
from tqdm.auto import trange

  from .autonotebook import tqdm as notebook_tqdm


In [13]:
figure_folder = Path("../figs/autoencoder/")
model_folder = Path("models/")

train_dataset_path = Path("../data/datasets/train/dataset_2000_100_5.pth")
test_dataset_path = Path("../data/datasets/test/dataset_500_100_5.pth")
if not train_dataset_path.exists():
    print(f"Path {train_dataset_path} does not exist")
    
if not test_dataset_path.exists():
    print(f"Path {test_dataset_path} does not exist")

batch_size = 32
epochs = 8
learning_rate = 0.01

In [15]:
print("Loading datasets")

train_dataset = th.load(train_dataset_path)
print("Train dataset loaded")
th_train = CustomImageDataset(train_dataset)
train_loader = DataLoader(th_train, batch_size=batch_size, shuffle=True)
print("Train dataLoader created")


test_dataset = th.load(test_dataset_path)
print("Test dataset loaded")
th_test = CustomImageDataset(test_dataset)
test_loader = DataLoader(th_test, batch_size=batch_size, shuffle=False)
print("Test dataLoader created")


Loading datasets
Train dataset loaded
Train dataLoader created
Test dataset loaded
Test dataLoader created


In [16]:
train_dataset.keys()

dict_keys(['images', 'labels', 'targets', 'masks'])

In [17]:
type(train_loader)

torch.utils.data.dataloader.DataLoader

In [19]:
len(test_loader)

16

In [7]:
for data in train_loader:
    print(data.shape)
    break

TypeError: default_collate: batch must contain tensors, numpy arrays, numbers, dicts or lists; found <class 'NoneType'>

In [15]:
model = Autoencoder_conv()

optimizer = optim.Adam(model.parameters(), lr=learning_rate)

print("Starting training")
train_losses = []
test_losses = []
for epoch in trange(epochs):
    model.train()
    train_loss = 0
    for batch in train_loader:
        optimizer.zero_grad()
        img, _, mask, _ = batch
        output = model(img)
        
        loss = batch_MSE_loss(output, img, mask)
        train_loss += loss.item()
        
        loss.backward()
        optimizer.step()
        
    train_losses.append(train_loss / len(train_loader))
    
    model.eval()
    
    test_loss = 0
    
    for batch in test_loader:
        img, _, mask, _ = batch
        output = model(img)
        
        loss = batch_MSE_loss(output, img, mask)
        test_loss += loss.item()
    test_losses.append(test_loss / len(test_loader))

Starting training


  0%|          | 0/8 [00:00<?, ?it/s]


TypeError: default_collate: batch must contain tensors, numpy arrays, numbers, dicts or lists; found <class 'NoneType'>

In [None]:
print("Training finished")

if not figure_folder.exists():
    figure_folder.mkdir(parents=True, exist_ok=True)
    

print("Saving model")
th.save(model.state_dict(), model_folder / "autoencoder.pth")
print("Model saved")

print("Saving training loss plot")
plt.plot(train_losses)
plt.plot(test_losses)
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.title("Training Loss")
plt.savefig(figure_folder / "losses.png")