<a href="https://colab.research.google.com/github/PepiMartin28/FootballLogos-VAE/blob/develop/Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Notebook utilizada para crear el modelo y entrenarlo

In [None]:
import os
import pandas as pd
import torch;
import torch.nn as nn
import torch.nn.functional as F
import torch.utils
from torch.utils.data import Dataset
import torch.distributions
import torchvision
from torchvision import transforms
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt

device = 'cuda' if torch.cuda.is_available() else 'cpu'

#Descargamos los logos de los equipos

In [None]:
!gdown https://drive.google.com/uc?id=1yXdPhk-OZ-lhGMYbsP_FHc-22Xhsu5Ux
!gdown https://drive.google.com/uc?id=1x-CtdUFfrhAqsHAILxdqkAj9fUG-xTgQ
!unzip logos.zip

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


#Armamos el dataset

In [None]:
class LogoDataset(Dataset):
  def __init__(self, annotations_file):
    self.archivo = pd.read_csv(annotations_file)
    self.transform = transforms.Compose([
            transforms.Resize((200, 200)),
            transforms.ToTensor(),
        ])

  def __len__(self):
    return len(self.archivo["id"])

  def __getitem__(self, idx):
    label = self.archivo.loc[idx,"name"]
    img_path = self.archivo.loc[idx,"img_dir"]
    image = Image.open(img_path).convert("RGBA")
    image = self.transform(image)
    return image, label

dataset = LogoDataset("/content/logos/logos.csv")

dataloader = torch.utils.data.DataLoader(
        dataset,
        batch_size=128,
        shuffle=True,)

#Definimos el modelo

In [None]:
class AUG_block(nn.Module):
    def __init__(self, out_channels, in_channels=4, kernel_size=5, strides=2,
                 padding=1, **kwargs):
        super(AUG_block, self).__init__(**kwargs)
        self.conv2d_trans = nn.ConvTranspose2d(in_channels, out_channels,
                                kernel_size, strides, padding, bias=False)
        self.batch_norm = nn.BatchNorm2d(out_channels)
        self.activation = nn.ReLU()

    def forward(self, X):
        return self.activation(self.batch_norm(self.conv2d_trans(X)))

In [None]:
class DEC_block(nn.Module):
    def __init__(self, out_channels, in_channels=4, kernel_size=5, strides=2,
                padding=1, alpha=0.2, **kwargs):
        super(DEC_block, self).__init__(**kwargs)
        self.conv2d = nn.Conv2d(in_channels, out_channels, kernel_size,
                                strides, padding, bias=False)
        self.batch_norm = nn.BatchNorm2d(out_channels)
        self.activation = nn.LeakyReLU(alpha, inplace=True)

    def forward(self, X):
        return self.activation(self.batch_norm(self.conv2d(X)))

In [None]:
n_G = 32

class Variational_Encoder(nn.Module):
    def __init__(self, latent_dims, n_channels=4):
        super(Variational_Encoder, self).__init__()
        self.conv_seq = nn.Sequential(
            DEC_block(in_channels=n_channels, out_channels=n_G),
            DEC_block(in_channels=n_G, out_channels=n_G*2),
            DEC_block(in_channels=n_G*2, out_channels=n_G*4),
            DEC_block(in_channels=n_G*4, out_channels=n_G*8),
            DEC_block(in_channels=n_G*8, out_channels=n_G*16),
            nn.AdaptiveMaxPool2d((1, 1)),
            nn.Flatten(),
            nn.LazyLinear(latent_dims)
        )
        self.linear3 = nn.LazyLinear(latent_dims)
        self.linear4 = nn.LazyLinear(latent_dims)

    def forward(self, x):
        z = self.conv_seq(x)
        media = self.linear3(z)
        log_var = F.relu(self.linear4(z))
        std = torch.exp(0.5*log_var)
        eps = torch.randn_like(std)
        latente = eps.mul(std).add_(media)
        return (latente, media, log_var)

In [None]:
class Decoder(nn.Module):
    def __init__(self, latent_dims, n_channels=4):
        super(Decoder, self).__init__()
        self.seq = nn.Sequential(
            AUG_block(in_channels=latent_dims, out_channels=n_G*16, strides=1, padding=0), #[5, 5]
            AUG_block(in_channels=n_G*16, out_channels=n_G*8), #[11, 11]
            AUG_block(in_channels=n_G*8, out_channels=n_G*4), #[23, 23]
            AUG_block(in_channels=n_G*4, out_channels=n_G*2), #[47, 47]
            AUG_block(in_channels=n_G*2, out_channels=n_G, strides=3, padding = 9), #[125, 125]
            nn.ConvTranspose2d(in_channels=n_G, out_channels=4, kernel_size=2, stride=2, padding=25, bias=False), #[200, 200]
            nn.Sigmoid()
        )

    def forward(self, z):
        return self.seq(z)

In [None]:
class Variational_Autoencoder(nn.Module):
    def __init__(self, latent_dims, n_channels=4):
        super(Variational_Autoencoder, self).__init__()
        self.encoder = Variational_Encoder(latent_dims, n_channels)
        self.decoder = Decoder(latent_dims, n_channels)

    def forward(self, x):
        z, media, log_var = self.encoder(x)
        z = z.unsqueeze(2).unsqueeze(3)
        return self.decoder(z), media, log_var

In [None]:
def vae_loss(x, x_hat, media, log_var):
  reconstruction_loss = F.binary_cross_entropy(x_hat, x, reduction='sum')
  #1 + codings_log_var - K.exp(codings_log_var) - K.square(codings_mean)
  latent_loss = -0.5 * torch.sum(1 + log_var - log_var.exp() - media.pow(2))
  return reconstruction_loss + latent_loss

#Entrenamos el modelo

In [None]:
def train(autoencoder, data, epochs=10000):
    opt = torch.optim.Adam(autoencoder.parameters())

    for epoch in range(epochs):
        L = 0.0
        N = 0
        for x, y in dataloader:
            x = x.to(device) # GPU
            opt.zero_grad()
            x_hat, media, std = autoencoder(x)
            l = vae_loss(x,x_hat, media, std)
            l.backward()
            opt.step()
            L += l.sum()
            N += l.numel()
        if (epoch+1) % 50 == 0:
          print(f'epoch {epoch + 1}, loss {(L/N):f}')

        if (epoch+1) % 100 == 0:
          torch.save(vae.state_dict(), f'/content/drive/MyDrive/vae-weights-700D-epoch({epoch+1}).params')


In [None]:
latent_dims = 700
vae = Variational_Autoencoder(latent_dims).to(device) # GPU
vae.load_state_dict(torch.load('/content/vae-weights-700DV5.params'))
vae.train()

In [None]:
train(vae, dataloader)

epoch 50, loss 4611406.500000
epoch 100, loss 4608636.500000
epoch 150, loss 4607030.000000
epoch 200, loss 4604271.000000
epoch 250, loss 4604197.500000
epoch 300, loss 4603245.500000
epoch 350, loss 4601725.000000
epoch 400, loss 4599759.500000
epoch 450, loss 4600329.000000
epoch 500, loss 4598127.500000
epoch 550, loss 4598709.500000
epoch 600, loss 4596523.000000
epoch 650, loss 4595637.000000
epoch 700, loss 4596529.500000
epoch 750, loss 4593616.500000
epoch 800, loss 4591520.000000
epoch 850, loss 4592373.000000
epoch 900, loss 4592435.500000
