In [1]:
from utils import iou_score
# from torch.utils.tensorboard import SummaryWriter
import torch.nn as nn
import torch
import argparse
import pandas as pd
from tqdm import tqdm
import numpy as np
import wandb
from model import get_model
from dataloader import CorroSeg

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd
  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def main(args):
    if(args.wandb):
        wandb.init(
            name=args.experiment_name,
            id=args.wandb_id,
            entity=args.wandb_entity,
            project="corroseg",
        )
        
        wandb.config = {
            "architecture":args.model_name,
            "epochs":args.num_epochs,
            "learning_rate":args.learning_rate,
        }
        
    device ='cpu'
    model = get_model(args.model_name).to(device)
    
    corro_seg = CorroSeg('data', 'y_train.csv', shuffle = True,
                 batch_size = args.batch_size, valid_ratio = args.valid_ratio, transform_img=None, transform_mask=None, 
                 transform_test=None, test_params={'batch_size': args.batch_size, 'shuffle': False})
    train_loader, val_loader, test_loader = corro_seg.get_loaders()

    # Loss function and optimizer definition
    criterion = nn.BCELoss()  # Binary cross-entropy loss
    optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate, weight_decay=args.weight_decay)

    for epoch in tqdm(range(args.num_epochs)):
        # Defreezing strategy
        if epoch % args.unfreeze_at_epoch == 0:
            layers_to_unfreeze = (epoch // args.unfreeze_at_epoch) * args.layers_to_unfreeze_each_time
            model.unfreeze_layers(layers_to_unfreeze)
        
        # Training phase
        model.train()
        train_loss = 0.0
        train_iou = 0.0
        
        for image, mask, well in tqdm(train_loader):
            mask = mask.view(-1, 1, 36, 36)
            optimizer.zero_grad()
            image = image.to(device)  # Move image to device
            mask = mask.to(device)  # Move mask to device
            outputs = model(image.repeat(1, 3, 1, 1))
            loss = criterion(outputs, mask)
            loss.backward()
            optimizer.step()
            
            train_loss += loss.item() * image.size(0)
            preds = outputs > args.threshold  # Apply threshold to get binary predictions
            train_iou += iou_score(preds, mask).item() * image.size(0)
        
        train_loss /= len(train_loader.dataset)
        train_iou /= len(train_loader.dataset)
        
        # Validation phase
        model.eval()
        val_loss = 0.0
        val_iou = 0.0
        
        with torch.no_grad():
            for image, mask, well in tqdm(val_loader):
                mask = mask.view(-1, 1, 36, 36)
                image = image.to(device)  # Move image to device
                mask = mask.to(device)  # Move mask to device
                outputs = model(image.repeat(1, 3, 1, 1))
                outputs = outputs.detach()  # Detach outputs from the computation graph
                loss = criterion(outputs, mask)
                val_loss += loss.item() * image.size(0)
                preds = outputs > args.threshold  # Apply threshold to get binary predictions
                val_iou += iou_score(preds, mask).item() * image.size(0)
        
        val_loss /= len(val_loader.dataset)
        val_iou /= len(val_loader.dataset)
        
        # Logging to Weights and Biases
        if(args.wandb):
            wandb.log({'Train Loss': train_loss, 'Train IoU': train_iou,
                    'Validation Loss': val_loss, 'Validation IoU': val_iou}, step=epoch)
        
        print(f'Epoch {epoch+1}/{args.num_epochs}, Train Loss: {train_loss:.4f}, Train IoU: {train_iou:.4f}, Validation Loss: {val_loss:.4f}, Validation IoU: {val_iou:.4f}')
        
    # Testing phase
    model.eval()
    predicted_masks = []  # List to store predicted masks  
    with torch.no_grad():
        for image, _ in test_loader:  # Ignore the masks in the test loader
            
            # Forward pass
            image = image.to(device)  # Move image to device
            output = model(image.repeat(1, 3, 1, 1)).detach()
            pred = output > args.threshold  # Apply threshold to get binary predictions
            pred = pred.cpu().numpy()
            
            # Flatten each 36x36 mask into a 1D array
            flattened_mask = pred.reshape(pred.shape[0], -1)
            
            # Convert predicted masks to numpy arrays
            predicted_masks.extend(flattened_mask)
    
    # Save predicted masks to a CSV file
    predicted_masks = np.array(predicted_masks)
    df = pd.DataFrame(predicted_masks)
    df.to_csv("predicted_masks.csv", index=False)
    
    print("Predicted masks saved to predicted_masks.csv")

In [3]:
args = argparse.Namespace(
        wandb=True,
        experiment_name='test1',
        output_dir='wandb',
        wandb_id=None,
        wandb_entity='lucasgascon',
        num_epochs=20,
        batch_size=1,
        valid_ratio=0.1,
        model_name='resnet18',
        learning_rate=2e-5,
        threshold=0.5,
        unfreeze_at_epoch=3,
        layers_to_unfreeze_each_time=1,
        weight_decay=0.01
    )

In [4]:
main(args)

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mlucasgascon[0m. Use [1m`wandb login --relogin`[0m to force relogin


100%|██████████| 8397/8397 [10:18<00:00, 13.57it/s]
100%|██████████| 933/933 [00:18<00:00, 49.17it/s]
  5%|▌         | 1/20 [10:37<3:22:00, 637.94s/it]

Epoch 1/20, Train Loss: 0.2460, Train IoU: 0.9106, Validation Loss: 0.6216, Validation IoU: 0.9111


100%|██████████| 8397/8397 [09:06<00:00, 15.36it/s]
100%|██████████| 933/933 [00:17<00:00, 53.00it/s]
 10%|█         | 2/20 [20:02<2:58:23, 594.65s/it]

Epoch 2/20, Train Loss: 0.2492, Train IoU: 0.9106, Validation Loss: 0.7908, Validation IoU: 0.9111


100%|██████████| 8397/8397 [09:33<00:00, 14.64it/s]
100%|██████████| 933/933 [00:16<00:00, 58.20it/s]
 15%|█▌        | 3/20 [29:51<2:47:48, 592.29s/it]

Epoch 3/20, Train Loss: 0.2492, Train IoU: 0.9106, Validation Loss: 0.5091, Validation IoU: 0.9111


100%|██████████| 8397/8397 [07:32<00:00, 18.57it/s]
100%|██████████| 933/933 [00:17<00:00, 54.74it/s]
 20%|██        | 4/20 [37:41<2:24:59, 543.74s/it]

Epoch 4/20, Train Loss: 0.2454, Train IoU: 0.9106, Validation Loss: 0.5359, Validation IoU: 0.9111


100%|██████████| 8397/8397 [07:27<00:00, 18.77it/s]
100%|██████████| 933/933 [00:16<00:00, 56.39it/s]
 25%|██▌       | 5/20 [45:25<2:08:44, 514.96s/it]

Epoch 5/20, Train Loss: 0.2429, Train IoU: 0.9106, Validation Loss: 0.5057, Validation IoU: 0.9111


100%|██████████| 8397/8397 [07:35<00:00, 18.45it/s]
100%|██████████| 933/933 [00:16<00:00, 55.76it/s]
 30%|███       | 6/20 [53:16<1:56:43, 500.27s/it]

Epoch 6/20, Train Loss: 0.2421, Train IoU: 0.9106, Validation Loss: 0.3463, Validation IoU: 0.9111


100%|██████████| 8397/8397 [08:53<00:00, 15.74it/s]
100%|██████████| 933/933 [00:18<00:00, 51.20it/s]
 35%|███▌      | 7/20 [1:02:28<1:52:01, 517.07s/it]

Epoch 7/20, Train Loss: 0.2403, Train IoU: 0.9106, Validation Loss: 0.3003, Validation IoU: 0.9111


100%|██████████| 8397/8397 [09:47<00:00, 14.30it/s]
100%|██████████| 933/933 [00:17<00:00, 52.50it/s]
 40%|████      | 8/20 [1:12:33<1:49:00, 545.07s/it]

Epoch 8/20, Train Loss: 0.2373, Train IoU: 0.9106, Validation Loss: 0.4949, Validation IoU: 0.9111


