In [None]:
from comet_ml import Experiment
import numpy as np
import os
import torch
from torchvision.transforms import ToTensor
from torch.utils.data import DataLoader
from concrete_scars_dataset import ConcreteScarsDataset
from concrete_scars_model import ConcreteScarsModel

API_KEY = os.environ['COMETKEY']
DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [None]:
hyper_params = {
    'learning_rate': 0.001,
    'epochs': 50,
    'batch_size': 10,
    'negative_samples': 300,
    'positive_samples': 700
}

train_dataset = ConcreteScarsDataset(transform=ToTensor(),n_negative=hyper_params['negative_samples'], n_positive=hyper_params['positive_samples'], skip=10) #default distribution 300-negative 700-positive
test_dataset = ConcreteScarsDataset(transform=ToTensor(),n_negative=5, n_positive=5, skip=0)
train_loader = DataLoader(dataset=train_dataset, batch_size=hyper_params['batch_size'], shuffle=True, num_workers=1)
test_loader = DataLoader(dataset=test_dataset, batch_size=hyper_params['batch_size'], shuffle=True, num_workers=1)
steps = len(train_dataset)

model = ConcreteScarsModel().to(DEVICE)
criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=hyper_params['learning_rate'])


In [None]:
experiment = Experiment(
  api_key = API_KEY,
  project_name = 'concrete-scars-detection',
  workspace='my-projects'
)
experiment.log_parameters(hyper_params)

In [None]:
start = torch.cuda.Event(enable_timing=True)
#first epoch is always a few seconds longer, we dont do warmup
end = torch.cuda.Event(enable_timing=True)

total_time = 0
for epoch in range(hyper_params['epochs']):
    error = np.zeros(steps)
    s_per_epoch = np.zeros(hyper_params['epochs'])

    start.record()
    for i, (images, masks) in enumerate(train_loader):
        images = images.to(DEVICE)
        masks = masks.to(DEVICE)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, masks)

        loss.backward()
        optimizer.step()

        error[i] = loss.item()
        print(f'Step:  {(i+1)*hyper_params["batch_size"]}/{steps} epoch: {epoch+1}', end='\r', flush=True)

    for i in range(len(test_dataset)):
        output = model(test_dataset[i][0].cuda())

        #converting dimensions to HWC
        img = test_dataset[i][0].permute(1,2,0)
        msk = test_dataset[i][1].permute(1,2,0)
        predict = output.permute(1,2,0).cpu()
     
        #log images to comet
        step = (epoch+1)*len(train_dataset)
        experiment.log_image(img, f'image{i+1}', step=step)
        experiment.log_image(msk, f'mask{i+1}', step=step)
        experiment.log_image(predict*255, f'predict{i+1}', step=step) #*255 makes it more visible in comet
    end.record()
    torch.cuda.synchronize()
    
    error_avg = np.mean(error)
    error_std = np.std(error)
    error_min = np.min(error)
    error_max = np.max(error)
    metrics = { 'error_avg': error_avg,
                'error_std': error_std,
                'error_min': error_min,
                'error_max': error_max}
    experiment.log_metrics(metrics, epoch=epoch+1)

    s_per_epoch[epoch] = round(start.elapsed_time(end)/1000,3) #convert to seconds
    print(f'Epoch: {epoch+1} error: {error_avg}')
    print(f'Finished in: {s_per_epoch[epoch]}s, est. time left: {round((np.average(s_per_epoch[s_per_epoch!=0])*(hyper_params["epochs"]-epoch))/60, 2)} min')
    total_time += s_per_epoch[epoch]
print('Total time:', round(total_time/60, 2), 'min')
    

Showing results

In [None]:
import matplotlib.pyplot as plt
image, mask = next(iter(train_loader))

image = image.to(DEVICE)
mask = mask.to(DEVICE)
predict = model(image)

image2, mask2 = next(iter(test_loader))

image2 = image2.to(DEVICE)
mask2 = mask2.to(DEVICE)
predict2 = model(image2)

In [None]:
with torch.no_grad():
    i = 2 # i in range(0, batch_size-1)
    img_dim = image2[i].permute(1, 2, 0).cpu()
    predict_dim = predict2[i].permute(1,2,0).cpu()
    msk_dim = mask2[i].permute(1,2,0).cpu()
    plt.imshow(img_dim)
    plt.show()
    plt.imshow(predict_dim*255)
    plt.show()
    plt.imshow(msk_dim)

In [None]:
model_state_file = 'model_versions/filename'

torch.save(model.state_dict(), model_state_file)
experiment.log_model('name on comet', model_state_file)

In [None]:
experiment.end()