# Introduction

J'ai tout d'abord commencé par uploader le dataset sur [Hugging Face](https://huggingface.co/datasets/Alanox/stanford-dogs) à la fois pour apprendre à utiliser l'upload de dataset mais également pour faire partager ce dataset facilement à la communauté.

Testons que cela fonctione bien

In [None]:
import datasets

dataset = datasets.load_dataset("Alanox/stanford-dogs", split="full")
dataset

In [None]:
dataset[0]["image"]

L'avantage est que le dataset entier n'est pas chargé ! On charge uniquement ce dont on a besoin

# Données

Regardons un peu les données.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

plt.style.use(['ggplot', 'https://raw.githubusercontent.com/AlanBlanchet/matplotlib_styles/master/vscode_blue.mplstyle'])

df = pd.DataFrame(dataset.select_columns(["name", "target", "annotations"]).to_dict())
df.head()

In [None]:
len(df)

In [None]:
plt.figure(figsize=(24,9))
counts = df["target"].value_counts()

plt.title("Target distribution")
plt.bar(counts.index, counts)
plt.xticks(ha="right", rotation=45);

In [None]:
df_annots = pd.DataFrame(df["annotations"].explode().reset_index(drop=True).tolist(), columns=["xmin", "ymin", "xmax", "ymax"])
df_annots.head()

In [None]:
df_annots.describe()

In [None]:
fig, axs = plt.subplots(2, 2, figsize=(16,9), sharex=True, sharey=True)
axs:list[plt.Axes] = np.array(axs).flatten()

fig.suptitle("Coordinate distributions")
for ax, (name, coords) in zip(axs, df_annots.T.iterrows()):
    sorted_coords = coords.sort_values()
    ax.set_title(name)
    ax.plot(sorted_coords.values)

In [None]:
def pixel_area(box:pd.DataFrame):
    box_annots = box["annotations"].explode()
    box_area = box_annots.apply(lambda r: (r[2] - r[0]) * (r[3] - r[1]))
    return box_area

In [None]:
target_areas = df.groupby("target").apply(pixel_area)
target_mean_areas = target_areas.groupby("target").apply(np.mean)
target_mean_areas.head()

In [None]:
target_mean_areas = target_mean_areas.sort_values()

plt.figure(figsize=(24,9))

plt.title("Mean annotation box area per target")
plt.bar(target_mean_areas.index, target_mean_areas)
plt.ylabel("pixel**2")
plt.xticks(ha="right", rotation=45);

On remarque qu'il y a plus de pixels représentant un "Irish Water Spaniel" qu'un "English Foxhound"

In [None]:
first_targets = df.drop_duplicates(["target"], keep="first").reset_index().set_index("target")
first_targets.head()

In [None]:
idx_english_foxhound = int(first_targets.loc["English Foxhound"]["index"])
idx_irish_water_spaniel = int(first_targets.loc["Irish Water Spaniel"]["index"])
idx_english_foxhound, idx_irish_water_spaniel

In [None]:
img = dataset[idx_english_foxhound]["image"]
print(img.size)
img

In [None]:
img = dataset[idx_irish_water_spaniel]["image"]
print(img.size)
img

# Data augmentation

In [None]:
import torchvision
import torchvision.transforms.functional as F
import torchvision.transforms.v2 as T

torchvision.disable_beta_transforms_warning()

img = dataset.with_format("pytorch")[0]["image"]
transforms = T.Compose([
    lambda x: x.permute(2, 0, 1),
    T.Resize(400, antialias=True)
])
F.to_pil_image(transforms(img))

In [None]:
applies = [
    T.AugMix(),
    T.AutoAugment(),
    T.CenterCrop(200),
    T.ColorJitter(),
    T.ElasticTransform(50.0, 1.0),
    T.Grayscale(),
    T.GaussianBlur(5),
    T.Pad(30),
    T.RandomAdjustSharpness(2, p=1),
    T.RandomAutocontrast(p=1),
    T.RandomCrop(200, 200),
    T.RandomHorizontalFlip(p=1),
    T.RandomVerticalFlip(p=1),
    T.RandomInvert(p=1),
    T.RandomPerspective(p=1),
    T.RandomPhotometricDistort(p=1),
    T.RandomPosterize(4, p=1),
    T.RandomZoomOut(p=1),
    T.RandomSolarize(0.5, p=1),
]

n = len(applies)

cols = 4
rows = -(-n // 4)  # ceil

fig, axs = plt.subplots(rows, cols, sharex=True, sharey=True, figsize=(24, 26))
for ax, transform in zip(axs.flatten(), applies):
    ax.set_title(type(transform).__name__)
    ax.grid(False)
    ax.imshow(transform(F.to_pil_image(transforms(img))))