# Configuração Inicial e Importações

In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import transforms, datasets
import torchvision
from torchsummary import summary
import os
import zipfile
import requests
from tqdm import tqdm
import matplotlib.pyplot as plt

In [None]:
# Configurar reprodutibilidade
SEED = 42
np.random.seed(SEED)
torch.manual_seed(SEED)

if torch.cuda.is_available():
    torch.cuda.manual_seed(SEED)
    torch.cuda.manual_seed_all(SEED)

torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

In [None]:
print(f"PyTorch version: {torch.__version__}")

# Download do dataset

In [None]:
# Download do dataset
data_url = "https://github.com/SVizor42/ML_Zoomcamp/releases/download/straight-curly-data/data.zip"
data_file = "data.zip"

In [None]:
if not os.path.exists(data_file):
    print(f"Downloading {data_file}...")
    response = requests.get(data_url, stream=True)
    with open(data_file, "wb") as f:
        for chunk in tqdm(response.iter_content(chunk_size=1024)):
            if chunk:
                f.write(chunk)
    print("Download complete.")
else:
    print(f"{data_file} already exists.")

In [None]:
extract_dir = "data"
if not os.path.exists(extract_dir):
    print(f"Extracting {data_file}...")
    with zipfile.ZipFile(data_file, 'r') as zip_ref:
        zip_ref.extractall(".")
    print("Extraction complete.")
else:
    print(f"{extract_dir} directory already exists.")

In [None]:
train_dir = os.path.join(extract_dir, 'train')
test_dir = os.path.join(extract_dir, 'test')

if os.path.exists(train_dir):
    classes = os.listdir(train_dir)
    print(f"Classes encontradas no conjunto de treino: {classes}")
    print(f"Número de classes: {len(classes)}")
    
    if len(classes) == 2:
        print("\nConclusão: Como existem 2 classes, este é um problema de Classificação Binária.")
        print("Recomendação: Usar nn.BCEWithLogitsLoss()")
    else:
        print(f"\nConclusão: Como existem {len(classes)} classes, este é um problema de Classificação Multiclasse.")
        print("Recomendação: Usar nn.CrossEntropyLoss()")
else:
    print(f"Diretório {train_dir} não encontrado. Verifique a extração.")

In [None]:
!pip install pillow

In [None]:
from io import BytesIO
from urllib import request
from PIL import Image

def download_image(url):
    with request.urlopen(url) as resp:
        buffer = resp.read()
    stream = BytesIO(buffer)
    img = Image.open(stream)
    return img

def prepare_image(img, target_size):
    if img.mode != 'RGB':
        img = img.convert('RGB')
    img = img.resize(target_size, Image.Resampling.NEAREST)
    return img

# Download e Inspeção do Modelo ONNX

In [None]:
base_url = "https://github.com/alexeygrigorev/large-datasets/releases/download/hairstyle/"
files = ["hair_classifier_v1.onnx", "hair_classifier_v1.onnx.data"]

for file_name in files:
    if not os.path.exists(file_name):
        print(f"Downloading {file_name}...")
        response = requests.get(base_url + file_name)
        with open(file_name, "wb") as f:
            f.write(response.content)
        print(f"{file_name} downloaded.")
    else:
        print(f"{file_name} already exists.")

In [None]:
!pip install onnxruntime

In [None]:
import onnxruntime as ort

session = ort.InferenceSession("hair_classifier_v1.onnx")

input_name = session.get_inputs()[0].name
output_name = session.get_outputs()[0].name

print(f"Input name: {input_name}")
print(f"Output name: {output_name}")

input_shape = session.get_inputs()[0].shape
print(f"Input shape: {input_shape}")

### Preparação da Imagem

In [None]:
image_url = "https://habrastorage.org/webt/yf/_d/ok/yf_dokzqy3vcritme8ggnzqlvwa.jpeg"

input_shape = session.get_inputs()[0].shape

target_height = input_shape[2]
target_width = input_shape[3]
target_size = (target_width, target_height)

print(f"Input shape do modelo: {input_shape}")
print(f"Target size para redimensionamento: {target_size}")

img = download_image(image_url)
img_prepared = prepare_image(img, target_size)

plt.imshow(img_prepared)
plt.title(f"Imagem redimensionada para {target_size}")
plt.axis('off')
plt.show()

In [None]:
import numpy as np

def preprocess_input(x):
    x /= 255.0
    return x

x = np.array(img_prepared, dtype='float32')
X = np.array([x])
X = preprocess_input(X)

X = X.transpose(0, 3, 1, 2)

print(f"Shape final: {X.shape}")

r_value = X[0, 0, 0, 0]
print(f"Valor do primeiro pixel (R channel): {r_value}")

### Verificação do Pré-processamento

Cálculo para o canal R:
$$ \text{Normalized} = \frac{\text{Value}/255.0 - \text{Mean}}{\text{Std}} $$
$$ \text{Normalized} = \frac{0.2392 - 0.485}{0.229} $$

In [None]:
def preprocess_input_imagenet(x):
    mean = np.array([0.485, 0.456, 0.406])
    std = np.array([0.229, 0.224, 0.225])
    
    x /= 255.0
    x -= mean
    x /= std
    return x

img = download_image(image_url)
img_prepared = prepare_image(img, target_size)

x = np.array(img_prepared, dtype='float32')
X = np.array([x])

X = preprocess_input_imagenet(X)

X = X.transpose(0, 3, 1, 2)

r_value_norm = X[0, 0, 0, 0]
print(f"Valor do primeiro pixel (R channel) com normalização ImageNet: {r_value_norm}")

### Executar Inferência

In [None]:
outputs = session.run([output_name], {input_name: X})
output_value = outputs[0][0]

print(f"Raw output do modelo: {output_value}")

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

probability = sigmoid(output_value)
print(f"Probabilidade (após Sigmoid): {probability}")