<a href="https://colab.research.google.com/github/ElenaBianchini/ColoringGrayscaleImages/blob/main/ProgettoLabIA.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Progetto di Laboratorio di Intelligenza Artificiale e Grafica Interattiva**

# Import

In [10]:
%matplotlib inline

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [12]:
import os
import torch
import numpy as np
import matplotlib.pyplot as plt
import torchvision 
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from skimage import color
from PIL import Image

# Impostazione dei parametri

In [13]:
num_epochs = 50
batch_size = 64
learning_rate = 1e-3
use_gpu = torch.cuda.is_available()

In [14]:
if use_gpu:
  device = torch.device("cuda:0")
else:
  device = torch.device("cpu")

# Paths

In [20]:
root_path = "/content/drive/MyDrive/COCO"

In [21]:
train_folder = root_path+"/train2014/"
val_folder = root_path+"/val2014/"
test_folder = root_path+"/test2014/"

# Dataset

In [22]:
def loadImagesName(dir_path):
    images_list = []
    count = 1
    for image_name in os.listdir(dir_path):
      filename = os.path.join(dir_path, image_name)
      images_list.append(filename)
      print("\rImage num: {}".format(count), end='')
      count = count+1
    
    return images_list


In [23]:
train_list = loadImagesName(train_folder)

KeyboardInterrupt: ignored

In [None]:
val_list = loadImagesName(val_folder)

In [None]:
test_list = loadImagesName(test_folder)

In [None]:
class ImageDataset(torch.utils.data.Dataset):
  def __init__(self, images_list):
    self.images_list = images_list
    self.img_transform = transforms.Compose([
      transforms.Resize(256),
      transforms.CenterCrop(224),
      transforms.Lambda(self.import_image),
      transforms.ToTensor(),                                
      transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), # RESNET si aspetta immagini preprocessate così
    ])

  def __len__(self):
    return len(self.images_list)
  
  def __getitem__(self, idx):
    img = Image.open(self.images_list[idx]).convert('RGB')
    img = self.img_transform(img)
    return img

  # converto ogni immagine in un tensore pytorch contenente un'immagine LAB
  def import_image(self, img):
    return color.rgb2lab(img) 



In [None]:
train_dataset = ImageDataset(train_list)
train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

In [None]:
val_dataset = ImageDataset(val_list)
val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=True)

In [None]:
test_dataset = ImageDataset(test_list)
test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=True)

In [None]:
for _, data in enumerate(test_dataloader):
  print(data.shape)
  break

# Regressione

## Modello

In [None]:
class ColorizationRNet(nn.Module):
  def __init__(self):
    super(ColorizationRNet, self).__init__()

    # Importo ResNet che userò per estrarre le features dalle immagini
    resnet = torchvision.models.resnet18()
    # Cambio il primo livello di convoluzione di ResNet per accetta input con un solo canale
    resnet.conv1.weight = nn.Parameter(resnet.conv1.weight.sum(dim=1).unsqueeze(1)) 
    # Estraggo le feature dalle immagini
    self.midlevel_resnet = nn.Sequential(*list(resnet.children())[0:6])

    # Livelli di deconvoluzione:
    self.deconv = nn.Sequential(
        nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1),
        nn.BatchNorm2d(128),
        nn.ReLU(),
        nn.Upsample(scale_factor=2),
        nn.Conv2d(128, 64, kernel_size=3, stride=1, padding=1),
        nn.BatchNorm2d(64),
        nn.ReLU(),
        nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1),
        nn.BatchNorm2d(64),
        nn.ReLU(),
        nn.Upsample(scale_factor=2),
        nn.Conv2d(64, 32, kernel_size=3, stride=1, padding=1),
        nn.BatchNorm2d(32),
        nn.ReLU(),
        nn.Conv2d(32, 2, kernel_size=3, stride=1, padding=1),
        nn.Upsample(scale_factor=2)
    )

  def forward(self, x):
    x = self.midlevel_resnet(x)
    output = self.deconv(x)
    return output

In [None]:
reg_net = ColorizationRNet()

## Funzione di costo e di ottimizzazione

In [78]:
optimizer = torch.optim.Adam(params=reg_net.parameters(), lr=learning_rate)
criterion = nn.MSELoss()

## Train

In [77]:
def train(epoch, loss_avg):
  # Set model to training model
  reg_net.train()

  print('Starting training epoch {}'.format(epoch))

  loss_avg.append(0)

  for batch_idx, img in enumerate(train_dataloader):
    img = img.to(device)

    # Input alla rete: immagine in bianco e nero
    img_grayscale = img[:, 0:1, :, :]

    # Target: immagine ab 
    img_ab = img[:, 1:3, :, :]

    # Predizione dell'immagine ab da grayscale
    predicted = reg_net(img_grayscale)

    # Calcolo l'errore L2 tra i colori ottenuti e quelli veri:
    loss = criterion(predicted, img_ab)

    # Backpropagation
    optimizer.zero_grad()
    loss.backward()

    # Aggiorno i pesi:
    optimizer.step()

    loss_avg[-1]+=loss.item()

    if batch_idx % 12 == 0:
        print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
            epoch, batch_idx, len(train_dataloader), 100. * batch_idx / len(train_dataloader), loss.item()))

  loss_avg[-1]/= batch_size
  print('Finished training epoch {}'.format(epoch))


## Validation

In [None]:
def validation(epoch, val_loss_avg):
  # Set model to validation model
  reg_net.eval()

  val_loss_avg.append(0)

  for batch_idx, img in enumerate(val_dataloader):
    img = img.to(device)

    # Input alla rete: immagine in bianco e nero
    img_grayscale = img[:, 0:1, :, :]

    # Target: immagine ab 
    img_ab = img[:, 1:3, :, :]

    # Predizione dell'immagine ab da grayscale
    predicted = reg_net(img_grayscale)

    # Calcolo l'errore L2 tra i colori ottenuti e quelli veri:
    loss = criterion(predicted, img_ab)
    val_loss_avg[-1]+=loss.item()
  

  val_loss_avg[-1]/= batch_size
  print('\nValidation set: Average loss: {:.4f}\n'.format(val_loss_avg))
  return val_loss_avg[-1]


## Allenamento

In [None]:
best_losses = 1.0
train_loss_avg = []
val_loss_avg = []

for epoch in range(1, num_epochs+1):
  train(epoch, train_loss_avg)
  losses = validation(epoch, val_loss_avg)

  if losses < best_losses:
    best_losses = losses
    torch.save(reg_net.state_dict(), './checkpoints/model-epoch-{}-losses-{:.3f}.pth'.format(epoch+1,losses))


## Grafico della curva di apprendimento

In [None]:
plt.ion()
fig = plt.figure(figsize=(10, 5))
plt.plot(train_loss_avg)
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Training loss')
plt.show()

In [None]:
plt.ion()
fig = plt.figure(figsize=(10, 5))
plt.plot(val_loss_avg)
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Validation loss')
plt.show()

## Risultato su un'immagine del Test Set

# Classificazione 

## Modello

In [None]:
class ColorizationCNet(nn.Module):
  def __init__(self):
    super(ColorizationCNet, self).__init__()

    self.network = nn.Sequential(
        nn.Conv2d(1,64,kernel_size=3, stride=1, padding=1),
        nn.ReLU(),
        nn.Conv2d(64,64,kernel_size=3, stride=2, padding=1),
        nn.ReLU(),
        nn.BatchNorm2d(64),

        nn.Conv2d(64,128,kernel_size=3, stride=1, padding=1),
        nn.ReLU(),
        nn.Conv2d(128,128,kernel_size=3, stride=2, padding=1),
        nn.ReLU(),
        nn.BatchNorm2d(128),

        nn.Conv2d(128,256,kernel_size=3, stride=1, padding=1),
        nn.ReLU(),
        nn.Conv2d(256,256,kernel_size=3, stride=2, padding=1),
        nn.ReLU(),
        nn.BatchNorm2d(256),

        nn.Conv2d(256,512,kernel_size=3, stride=1, padding=1),
        nn.ReLU(),
        nn.Conv2d(512,512,kernel_size=3, stride=2, padding=1),
        nn.ReLU(),
        nn.BatchNorm2d(512),

        nn.Conv2d(512,512,kernel_size=3, stride=1, padding=2, dilatation=2),
        nn.ReLU(),
        nn.Conv2d(512,512,kernel_size=3, stride=1, padding=2, dilatation=2),
        nn.ReLU(),
        nn.BatchNorm2d(512),

        nn.Conv2d(512,512,kernel_size=3, stride=1, padding=2, dilatation=2),
        nn.ReLU(),
        nn.Conv2d(512,512,kernel_size=3, stride=1, padding=2, dilatation=2),
        nn.ReLU(),
        nn.BatchNorm2d(512),

        nn.Conv2d(512,512,kernel_size=3, stride=1, padding=1),
        nn.ReLU(),
        nn.Conv2d(512,512,kernel_size=3, stride=1, padding=1),
        nn.ReLU(),
        nn.BatchNorm2d(512),

        nn.ConvTransposed2d(512, 256, kernel_size=4, stride=2, padding=1),
        nn.ReLU(),
        nn.Conv2d(256,256, kernel_size=3, stride=1, padding=1),
        nn.ReLU(),

        nn.Conv2d(256, 313, kenel_size=1, stride=1, padding=0),
        nn.Softmax(dim=1),
        nn.Conv2d(313, 2, kernel_size=1, padding=0, dilation=1, stride=1),
        nn.Upsample(scale_factor=4)
    )


  def forward(self, x):
    output = self.network(x)
    return output

In [None]:
clas_net = ColorizationCNet()

## Funzione di costo e di ottimizzazione

## Train

## Grafico della curva di apprendimento

## Valutazione sul Test Set

## Risultato su un'immagine del Test Set