In [1]:
import pandas as pd
import sys
from sklearn.model_selection import train_test_split
import time
import torch.optim as optim
import torch.nn as nn
import numpy as np
import os
import matplotlib.pyplot as plt
from pathlib import Path
import torch
import torchvision.transforms as transforms
from torchvision import models
from torch.utils.data import DataLoader,Dataset
from tqdm.notebook import tqdm # Progession bar
from PIL import Image


In [2]:
src_path = os.path.abspath('../src')
if src_path not in sys.path:
    sys.path.append(src_path)

In [3]:
random_seed = 33 
np.random.seed(random_seed)
torch.manual_seed(random_seed)
data_dir = os.path.join('..', 'data')
metadata_dir = os.path.join(data_dir, 'metadata')
train_val_path = os.path.join(metadata_dir, 'train_val_list.txt')
test_path = os.path.join(metadata_dir, 'test_list.txt')
train_val = pd.read_csv(train_val_path,header=None,names=['Image Index'])
test = pd.read_csv(test_path,header=None,names=['Image Index'])

train, val= train_test_split(train_val, test_size=0.2, random_state=42)
train_size= len(train)
val_size = len(val)
train_path = os.path.join(metadata_dir, 'train_list.txt')
val_path = os.path.join(metadata_dir, 'val_list.txt')


In [4]:
from sklearn.preprocessing import MultiLabelBinarizer
metadata_file = os.path.join(metadata_dir, 'Data_Entry_2017_v2020.csv')
metadata_df = pd.read_csv(metadata_file) 
metadata_df = metadata_df[metadata_df['Finding Labels'] !='No Finding']

metadata_df['Finding Labels'] = metadata_df['Finding Labels'].str.split('|')

# Aplicar el one-hot encoding
mlb = MultiLabelBinarizer()
one_hot = mlb.fit_transform(metadata_df['Finding Labels'])

# Convertir a DataFrame y agregarlo a `metadata_df`
one_hot_df = pd.DataFrame(one_hot, columns=mlb.classes_, index=metadata_df.index)
metadata_df = pd.concat([metadata_df, one_hot_df], axis=1)


val_df = pd.merge(metadata_df,val, how='inner')
train_df = pd.merge(metadata_df,train, how='inner')
test_df = pd.merge(metadata_df,test, how='inner')

In [5]:
images_dir = os.path.join(data_dir,'images')

In [6]:
from utils import *
dataloaders, dataset_sizes, class_counts = make_data_loaders(train_df, 
                                                                val_df,
                                                                test_df, 
                                                                images_dir, 
                                                                32, 
                                                                224)

In [7]:
import torchvision.models as models
from torchvision.models import ResNet50_Weights
model = models.resnet50(weights=ResNet50_Weights.IMAGENET1K_V1)
num_ftrs = model.fc.in_features
model.fc = torch.nn.Linear(num_ftrs, 14)

In [8]:
counts = np.array(class_counts)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")  
criterion = get_loss('bce_w', counts, device)
optimizer = get_optimizer(model.parameters(), optimizer='Adam', lr=1e-4, weight_decay=1e-5)
scheduler = get_scheduler(optimizer, name='cyclic')

In [11]:
from train import *
num_epochs = 10
steps = None
s_patience = 3
patience = 15

model_dir = os.path.join('..','models')
model = train_model(device, 
                    model, 
                    model_dir,
                    dataloaders['train'], 
                    dataloaders['val'],
                    criterion,
                    optimizer,
                    scheduler,
                    num_epochs,
                    steps,
                    s_patience,
                    patience)

  checkpoint = torch.load(checkpoint_path)


Loaded checkpoint from epoch 2.
Starting epoch 3/12


Training: 100%|██████████| 899/899 [03:32<00:00,  4.22it/s]
Validating: 100%|██████████| 228/228 [02:14<00:00,  1.69it/s]
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [3/12], Validation Loss: 0.1048, AUC: 0.5062, Precision: 0.0000, Recall: 0.0000, F1-score: 0.0000, Training Time: 212.81s, Validation Time: 134.58s, Total Time: 347.39s
No improvement in validation loss for 1 epoch(s).
Model checkpoint saved at epoch 3.
Starting epoch 4/12


Training: 100%|██████████| 899/899 [03:31<00:00,  4.25it/s]
Validating: 100%|██████████| 228/228 [02:16<00:00,  1.67it/s]
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [4/12], Validation Loss: 0.0392, AUC: 0.5784, Precision: 0.0000, Recall: 0.0000, F1-score: 0.0000, Training Time: 211.30s, Validation Time: 136.32s, Total Time: 347.62s
Model checkpoint saved at epoch 4.
Starting epoch 5/12


Training: 100%|██████████| 899/899 [03:27<00:00,  4.33it/s]
Validating: 100%|██████████| 228/228 [02:14<00:00,  1.69it/s]
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [5/12], Validation Loss: 0.0395, AUC: 0.5755, Precision: 0.0000, Recall: 0.0000, F1-score: 0.0000, Training Time: 207.71s, Validation Time: 134.72s, Total Time: 342.44s
No improvement in validation loss for 1 epoch(s).
Model checkpoint saved at epoch 5.
Starting epoch 6/12


Training: 100%|██████████| 899/899 [03:25<00:00,  4.37it/s]
Validating: 100%|██████████| 228/228 [02:12<00:00,  1.72it/s]
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [6/12], Validation Loss: 0.0391, AUC: 0.5862, Precision: 0.0000, Recall: 0.0000, F1-score: 0.0000, Training Time: 205.65s, Validation Time: 132.75s, Total Time: 338.40s
Model checkpoint saved at epoch 6.
Starting epoch 7/12


Training: 100%|██████████| 899/899 [03:25<00:00,  4.38it/s]
Validating: 100%|██████████| 228/228 [02:16<00:00,  1.66it/s]
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [7/12], Validation Loss: 0.0390, AUC: 0.5916, Precision: 0.0000, Recall: 0.0000, F1-score: 0.0000, Training Time: 205.03s, Validation Time: 137.00s, Total Time: 342.03s
Model checkpoint saved at epoch 7.
Starting epoch 8/12


Training: 100%|██████████| 899/899 [03:25<00:00,  4.38it/s]
Validating: 100%|██████████| 228/228 [02:15<00:00,  1.68it/s]
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [8/12], Validation Loss: 0.0390, AUC: 0.5890, Precision: 0.0000, Recall: 0.0000, F1-score: 0.0000, Training Time: 205.04s, Validation Time: 135.98s, Total Time: 341.02s
No improvement in validation loss for 1 epoch(s).
Model checkpoint saved at epoch 8.
Starting epoch 9/12


Training: 100%|██████████| 899/899 [03:21<00:00,  4.47it/s]
Validating: 100%|██████████| 228/228 [02:11<00:00,  1.73it/s]
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [9/12], Validation Loss: 0.0391, AUC: 0.5966, Precision: 0.0000, Recall: 0.0000, F1-score: 0.0000, Training Time: 201.31s, Validation Time: 131.99s, Total Time: 333.30s
No improvement in validation loss for 2 epoch(s).
Model checkpoint saved at epoch 9.
Starting epoch 10/12


Training:  78%|███████▊  | 704/899 [02:35<00:43,  4.53it/s]


KeyboardInterrupt: 