In [1]:
%pip install opencv-python

Note: you may need to restart the kernel to use updated packages.




In [2]:
%pip install tqdm

Note: you may need to restart the kernel to use updated packages.




In [1]:
!pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118

Looking in indexes: https://download.pytorch.org/whl/cu118




In [None]:
# %pip install datasets

In [2]:
import numpy as np
import cv2
import os

from transformers import ViTImageProcessor, TrainingArguments, ViTForImageClassification, Trainer
import torch 

from string import ascii_uppercase

from tqdm import tqdm

from datasets import Dataset, load_metric, ClassLabel

from PIL import Image

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
torch.cuda.is_available()


False

In [None]:
torch.version.cuda

In [None]:
print(torch.cuda.get_device_name(0))

## 1) Carregando imagens

In [None]:
# Definir classes (letras do alfabeto, excluindo H, J, K, X e Z)
classes = [letter for letter in ascii_uppercase if letter not in {'H', 'J', 'K', 'X', 'Z'}]

num_classes = len(np.unique(classes))

print(f'Classes: {classes}')
print(f'Número de classes: {num_classes}')

In [None]:
def load_images_from_folder(folder):
    images = []
    labels = []
    for label in os.listdir(folder):
        label_path = os.path.join(folder, label)
        if os.path.isdir(label_path):
            for filename in tqdm(os.listdir(label_path), desc=f"Loading {label} images"):
                img_path = os.path.join(label_path, filename)
                if img_path.endswith(".jpg") or img_path.endswith(".png"):
                    img = cv2.imread(img_path, cv2.IMREAD_COLOR)

                    # Converter a matriz de volta para a imagem original
                    image = Image.fromarray(img.astype('uint8'))
                    images.append(image)
                    labels.append(label)
                    
    return images, labels

In [None]:
X_train_images, y_train_labels = load_images_from_folder('../libras_dataset/train')
X_test_images, y_test_labels = load_images_from_folder('../libras_dataset/test')

In [None]:
y_train_labels = [classes.index(x) for x in y_train_labels]
y_test_labels = [classes.index(x) for x in y_test_labels]
train_data = {"image": X_train_images, "label": y_train_labels}
test_data = {"image": X_test_images, "label": y_test_labels}

train_dataset = Dataset.from_dict(train_data)
test_dataset = Dataset.from_dict(test_data)

train_dataset, test_dataset

In [None]:
num_classes = len(set(train_dataset["label"]))
labels = train_dataset.features['label']
num_classes, labels

In [None]:
train_dataset[0]

In [None]:
train_dataset[0]['image']

In [None]:
train_dataset[0]['label'] 

# labels.names[train_dataset[0]['label']]

## 2) Rede com ViT

### 2.1) Modelo vit-base-patch16-224 (https://huggingface.co/google/vit-base-patch16-224) 

In [None]:
model_name_or_path = "google/vit-base-patch16-224"

feature_extractor = ViTImageProcessor .from_pretrained(
    model_name_or_path
)

In [None]:
feature_extractor

In [None]:
example = feature_extractor(
    train_dataset['image'][0],
    return_tensors="pt"
)

example

In [None]:
print("Shape da Imagem original: ", train_dataset['image'][0].size, "\n")

print("Shape da Imagem com resize do pytorch: ", example['pixel_values'].shape)	 

In [None]:
print(torch.cuda.is_available())

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device 

## 3) Fine-tuning do modelo

In [None]:
def preprocess(batch): 
    inputs = feature_extractor(
        batch['image'], 
        return_tensors="pt"
    )

    inputs['label'] = batch['label']

    return inputs 

### 3.1) Adquirindo batchs de dados

In [None]:
prepared_train = train_dataset.with_transform(preprocess)
prepared_test = test_dataset.with_transform(preprocess)

In [None]:
def collate_fn(batch):
    return {
        'pixel_values': torch.stack([x['pixel_values'] for x in batch]),
        'labels': torch.tensor([x['label'] for x in batch])
    }

In [None]:
metric = load_metric("accuracy")
def compute_metrics(p):
    return metric.compute(
        predictions=p.predictions.argmax(axis=1),
        references=p.label_ids
    )

### 3.2) Argumentos do treinamento

In [None]:
training_args = TrainingArguments(
    output_dir="./results",
    per_device_train_batch_size=16,
    evaluation_strategy="steps",
    num_train_epochs=4,
    save_steps=100,
    eval_steps=100,
    logging_steps=10,
    learning_rate=0.01,
    save_total_limit=2,
    remove_unused_columns=False,
    push_to_hub=False,
    load_best_model_at_end=True,
    # logging_dir="./logs",
    # do_train=True,
    # do_eval=True,
    # metric_for_best_model="accuracy"
)

### 3.3) Adquirindo modelo pré-treinado

In [None]:
labels = classes

model = ViTForImageClassification.from_pretrained(
    model_name_or_path,
    num_labels=len(labels),
    ignore_mismatched_sizes=True
)

In [None]:
model.to(device)

### 3.4) Preparando o modelo pré-treinado para um novo treinamento

In [None]:
trainer = Trainer(
        model=model,
        args=training_args,
        data_collator=collate_fn,
        compute_metrics=compute_metrics,
        train_dataset=prepared_train,
        eval_dataset=prepared_test,
        tokenizer=feature_extractor
    )

### 3.5) Novo Treinamento do modelo pré-treinado

In [None]:
train_results = trainer.train()

trainer.save_model()
trainer.log_metrics("train.json", train_results.metrics)
trainer.save_metrics("train.json", train_results.metrics)

trainer.save_state()

### 3.6) Testando modelo 

In [None]:
metrics = trainer.evaluate(prepared_test)

trainer.log_metrics("eval", metrics)
trainer.save_metrics("eval", metrics)

### 3.7) Visualizando predição com uma imagem especifica do conjunto de teste

In [None]:
image = test_dataset['image'][0].resize((224, 224))
image

In [None]:
actual_label = test_dataset['label'][0]
labels = test_dataset.features['label']

actual_label, labels.names[actual_label]

In [None]:
trainer.predict(image)

# with torch.no_grad():
#     logits = model(**inputs).logits