# Projeto final - Redes Neurais
Aluno: Luiz Felipe Barbosa


Dataset escolhido: https://www.kaggle.com/datasets/mloey1/ahcd1

## Importação das bibliotecas

In [9]:
from os.path import exists

import pandas as pd
import numpy as np

import torch

In [2]:
import argparse
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable
from torchvision import datasets, transforms
from torch.optim.lr_scheduler import StepLR
from torch.utils.data import Dataset, DataLoader

## Criação da rede

In [3]:
class Modelo(nn.Module):
    def __init__(self):
        super(Modelo, self).__init__()
        self.dense = nn.Sequential(
            nn.Linear(1024, 4096),
            nn.ReLU(),
            nn.Linear(4096, 2096),
            nn.ReLU(),
            nn.Linear(2096, 1024),
            nn.ReLU(),
            nn.Linear(1024, 1024),
            nn.ReLU(),
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Linear(512, 29),
        )

    def forward(self, x):
        x = self.dense(x)

        return x

model = Modelo()

## Criação da base de dados

In [4]:
class CustomDataset(Dataset):
    def __init__(self, data, labels, transform=None):
        self.data = torch.tensor(data.values, dtype=torch.float32)
        self.labels = torch.tensor(labels.values.squeeze(), dtype=torch.long)
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        sample = self.data[idx]
        label = self.labels[idx]
        if self.transform:
            sample = self.transform(sample)
        return sample, label

### Criando o objeto de treinamento

In [5]:
def train(log_interval, dry_run, model, device, train_loader, optimizer, epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(Variable(data))
        loss = F.cross_entropy(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))
            if dry_run:
                break

In [6]:
def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

Classe p

## Avaliação

Lendo os excel com os datasets

In [7]:
x_test = pd.read_csv('/content/csvTestImages_3360x1024.csv', header=None)
y_test = pd.read_csv('/content/csvTestLabel_3360x1.csv', header=None)

x_train = pd.read_csv('/content/csvTrainImages_13440x1024.csv', header=None)
y_train = pd.read_csv('/content/csvTrainLabel_13440x1.csv', header=None)

Treinamento

In [8]:
use_cuda = torch.cuda.is_available()

device = torch.device("cuda" if use_cuda else "cpu")

train_kwargs = {'batch_size': 64}
test_kwargs = {'batch_size': 1000}
if use_cuda:
    cuda_kwargs = {'num_workers': 1,
                    'pin_memory': True,
                    'shuffle': True}
    train_kwargs.update(cuda_kwargs)
    test_kwargs.update(cuda_kwargs)

transform=transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
    ])

# Create custom datasets
train_dataset = CustomDataset(x_train, y_train)
test_dataset = CustomDataset(x_test, y_test)

# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=train_kwargs['batch_size'], shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=test_kwargs['batch_size'], shuffle=True)

model = Modelo().to(device)
optimizer = optim.Adadelta(model.parameters(), lr=1)

epochs = 14
scheduler = StepLR(optimizer, step_size=1, gamma=0.7)

for epoch in range(1, epochs + 1):
    train(28, False, model, device, train_loader, optimizer, epoch)
    test(model, device, test_loader)
    scheduler.step()

torch.save(model.state_dict(), "arabic_modelo.pt")


Test set: Average loss: -2.7721, Accuracy: 969/3360 (29%)


Test set: Average loss: -4.1024, Accuracy: 1635/3360 (49%)


Test set: Average loss: -4.8633, Accuracy: 2089/3360 (62%)


Test set: Average loss: -5.1331, Accuracy: 2324/3360 (69%)


Test set: Average loss: -6.1038, Accuracy: 2501/3360 (74%)


Test set: Average loss: -7.3311, Accuracy: 2566/3360 (76%)


Test set: Average loss: -8.1858, Accuracy: 2606/3360 (78%)


Test set: Average loss: -8.8244, Accuracy: 2626/3360 (78%)


Test set: Average loss: -9.3137, Accuracy: 2637/3360 (78%)


Test set: Average loss: -9.5726, Accuracy: 2628/3360 (78%)


Test set: Average loss: -9.7217, Accuracy: 2639/3360 (79%)


Test set: Average loss: -9.8333, Accuracy: 2633/3360 (78%)


Test set: Average loss: -9.9006, Accuracy: 2639/3360 (79%)


Test set: Average loss: -9.9412, Accuracy: 2634/3360 (78%)



**Conclusão:**

O projeto final consistiu na criação de uma rede neural para classificar 16.800 caracteres árabes escritos por 60 pessoas, distribuídos em 28 classes. Dos 16.800 caracteres, 13.440 foram utilizados para treino e 3.350 para teste.

Utilizamos seis camadas lineares com a função de ativação ReLU entre elas. Além disso, utilizamos o otimizador Adadelta e a função de perda cross entropy. Usamos um batch de 64 para o treino e de 1.000 para o teste, ambos com shuffle, e um learning rate inicial de 1, que foi reduzido durante o treinamento ao longo de 14 épocas.

A acurácia do modelo variou entre 78% e 79%, alcançando 79% na 13ª época e 78% na época final.


