### Dataset - https://www.kaggle.com/datasets/tongpython/cat-and-dog

In [1]:
import numpy as np
import pandas as pd
import os
import torch
import torchvision
import torch.nn as nn
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
from torchvision.datasets import ImageFolder
import torch.optim as optim
from PIL import Image
import time

# Usa gpu ou cpu
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [2]:
from sklearn.model_selection import train_test_split
dataset = ImageFolder("training_set/")

# Separa os sets
train_data, test_data, train_label, test_label = train_test_split(dataset.imgs, dataset.targets, test_size=0.2)

In [3]:
# CLasse que irá auxiliar e simplificar o processo de transformação

class ImageLoader(Dataset):
    def __init__(self, dataset, transform=None):
        self.transform = transform
        self.dataset = self.check(dataset)
    
    def __len__(self):
        return len(self.dataset)
    
    def __getitem__(self, item):
        image = Image.open(self.dataset[item][0])
        classCategory = self.dataset[item][1]
        if self.transform:
            image = self.transform(image)
        return image, classCategory
    
    def check(self, dataset):
        dataset_size = []
        for index in range(len(dataset)):
            dataset_size.append(dataset[index])
        return dataset_size

In [4]:
# Prepara o tratamento dos datasets
train_transform = transforms.Compose([
    transforms.Resize((224, 224)), transforms.ToTensor(), transforms.Normalize([0.5]*3, [0.5]*3)
])

test_transform = transforms.Compose([
    transforms.Resize((224, 224)), transforms.ToTensor(), transforms.Normalize([0.5]*3, [0.5]*3)
])

train_dataset = ImageLoader(train_data, train_transform)
test_dataset = ImageLoader(test_data, test_transform)

## Transform:

- ToTensor(): transforma em array;
- Normalize(mean, standard deviantion): normaliza baseado na média e desvio padrão

In [5]:
# Carrega os dados
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=True)

In [6]:
from tqdm import tqdm
from torchvision import models

# Carrega um modelo já treinado
model = models.resnet50(weights='ResNet50_Weights.DEFAULT')

# Mantêm os parâmetros do modelo
for param in model.parameters():
    param.requires_grad = False

# Número de inputs e outputs
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 2)

model.to(device)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

## RESNET50

É uma rede ConvNet (Convolutional Neural Network) com 50 camadas, capaz de classificar entre 1000 categorias.
O tamanho dos inputs são
224x224

In [7]:
# Criterion calculará a Loss
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.04)

def train(num_epoch, model):
    for epoch in range(0, num_epoch):
        losses = []
        model.train()
        # tqdm cria barra de progresso
        loop = tqdm(enumerate(train_loader), total=len(train_loader))
        for batch_idx, (data, targets) in loop:
            data = data.to(device=device)
            targets = targets.to(device=device)
            scores = model(data)
            loss = criterion(scores, targets)
            optimizer.zero_grad()
            losses.append(loss)
            loss.backward()
            optimizer.step()
            _, preds = torch.max(scores, 1)
            loop.set_description(f"Epoch {epoch+1}/{num_epoch} process: {int((batch_idx / len(train_loader)) * 100)}")
            loop.set_postfix(loss=loss.data.item())
        
        # Salva o modelo quando acaba cada epoch
        torch.save({ 
                    "model_state_dict": model.state_dict(), 
                    "optimizer_state_dict": optimizer.state_dict(), 
                    }, f"save_epoch_{epoch}.pt")


def test():
    model.eval()
    test_loss = 0
    correct = 0
    # Prática comum, para tirar os gradientes do calculo
    with torch.no_grad():
        for x, y in test_loader:
            x = x.to(device)
            y = y.to(device)
            output = model(x)
            _, predictions = torch.max(output, 1)
            correct += (predictions == y).sum().item()
            test_loss = criterion(output, y)
            
    test_loss /= len(test_loader.dataset)
    print("Average Loss: ", test_loss, "  Accuracy: ", correct, " / ",
    len(test_loader.dataset), "  ", int(correct / len(test_loader.dataset) * 100), "%")

In [8]:
# Treina e valida
train(7, model)
test()

Epoch 1/7 process: 99: 100%|██████| 101/101 [25:11<00:00, 14.97s/it, loss=3.91]
Epoch 2/7 process: 99: 100%|██| 101/101 [23:18<00:00, 13.85s/it, loss=0.000112]
Epoch 3/7 process: 99: 100%|██████| 101/101 [18:56<00:00, 11.25s/it, loss=0.04]
Epoch 4/7 process: 99: 100%|█████████| 101/101 [17:44<00:00, 10.54s/it, loss=0]
Epoch 5/7 process: 99: 100%|█████████| 101/101 [16:56<00:00, 10.06s/it, loss=0]
Epoch 6/7 process: 99: 100%|████| 101/101 [16:27<00:00,  9.78s/it, loss=0.0734]
Epoch 7/7 process: 99: 100%|█████████| 101/101 [17:05<00:00, 10.15s/it, loss=0]


Average Loss:  tensor(0.)   Accuracy:  1582  /  1601    98 %


In [9]:
# Carrega o modelo
load = torch.load("./save_epoch_6.pt")
model.load_state_dict(load["model_state_dict"]) 
optimizer.load_state_dict(load["optimizer_state_dict"])

In [10]:
def cat_dog_pred(filepath):
    img = Image.open(filepath)
    img.show()
    img_array = Image.open(filepath).convert("RGB")
    data_transforms=transforms.Compose([
        transforms.Resize((224, 224)), 
        transforms.ToTensor(), 
        transforms.Normalize([0.5]*3, [0.5]*3)
    ])
    img = data_transforms(img_array).unsqueeze(dim=0)
    load = DataLoader(img)
    
    for x in load:
        x=x.to(device)
        pred = model(x)
        _, preds = torch.max(pred, 1)
        if preds[0] == 1: print(f"Dog")
        else: print(f"Cat")

In [11]:
rg = range(30)
for i in rg:
    cat_dog_pred(f"pred/pred{i}.jpeg")
    time.sleep(1)

Cat
Cat
Dog
Cat
Cat
Dog
Cat
Cat
Cat
Dog
Cat
Dog
Dog
Cat
Dog
Dog
Dog
Dog
Dog
Cat
Dog
Cat
Cat
Cat
Cat
Cat
Dog
Dog
Cat
Dog
