In [None]:
!pip install --user transformers==4.26.1 datasets==2.10.1 evaluate==0.4.0 -q 
!pip install --user --upgrade datasets transformers pyarrow
!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118 -q


In [41]:
import evaluate
import json
import numpy as np
import os
import pandas as pd
import pyarrow as pa
import requests
import torch

from datasets import load_dataset, load_from_disk, Dataset, Features, Array3D
from io import BytesIO
from transformers import AutoProcessor, ViTFeatureExtractor, ViTForImageClassification, Trainer, TrainingArguments, default_data_collator
from typing import Tuple
from PIL import Image

In [42]:
# The directory where our images are saved in folders by category
# Charger les données
data_path = "C:/Users/h.chettaoui/Documents/AVR23_CDS_Rakuten/images/images_train"
test_data_path = "C:/Users/h.chettaoui/Documents/AVR23_CDS_Rakuten/images/images_test"

# The output directory of the processed datasets
train_save_path = "./processed-datasets/train"
val_save_path = "./processed-datasets/val"
test_save_path = "./processed-datasets/test"

# Sizes of dataset splits
val_size = 0.2
test_size = 0.1

# Name of model as named in the HuggingFace Hub
model_name = "google/vit-base-patch16-224"

In [44]:
dataset = load_dataset("imagefolder", data_dir=data_path, split='train')

# Remove from dataset images which are non-RGB (single-channel, grayscale)
condition = lambda data: data['image'].mode == 'RGB'
dataset = dataset.filter(condition)

Resolving data files:   0%|          | 0/67931 [00:00<?, ?it/s]

Downloading data files:   0%|          | 0/67931 [00:00<?, ?it/s]

Downloading data files: 0it [00:00, ?it/s]

Extracting data files: 0it [00:00, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

Filter:   0%|          | 0/67931 [00:00<?, ? examples/s]

In [45]:
def split_dataset(dataset, val_size=0.2, test_size=0.1):
    """
    Returns a tuple with three random train, validation, and test subsets by splitting the passed dataset.
    Size of the validation and test sets defined as a fraction of 1 with the `val_size` and `test_size` arguments.
    """
    print("Splitting dataset into train, validation, and test sets...")

    # Calculate the total size of the validation and test sets
    split_size = round(val_size + test_size, 3)

    # Split the dataset into train and (val + test) sets
    dataset = dataset.train_test_split(shuffle=True, test_size=split_size)

    # Calculate the ratio of the test set size to the total of (val + test) set size
    split_ratio = round(test_size / (test_size + val_size), 3)

    # Split the (val + test) set into validation and test sets
    val_test_sets = dataset['test'].train_test_split(shuffle=True, test_size=split_ratio)

    # Assign the train, validation, and test sets to variables
    train_dataset = dataset["train"]
    val_dataset = val_test_sets["train"]
    test_dataset = val_test_sets["test"]

    # Return the train, validation, and test sets as a tuple
    return train_dataset, val_dataset, test_dataset

# Split the dataset into train, validation, and test sets using the specified sizes
train_dataset, val_dataset, test_dataset = split_dataset(dataset, val_size, test_size)

Splitting dataset into train, validation, and test sets...


In [46]:
train_dataset

Dataset({
    features: ['image', 'label'],
    num_rows: 47551
})

In [48]:
def process_examples(examples, image_processor):
    """Processor helper function. Used to process batches of images using the
    passed image_processor.

    Parameters
    ----------
    examples
        A batch of image examples.

    image_processor
        A HuggingFace image processor for the selected model.

    Returns
    -------
    examples 
        A batch of processed image examples.
    """
    
    # Récupérer le lot d'images
    images = examples['image']

    # Prétraiter les images
    inputs = image_processor(images=images)
    
    # Ajouter les valeurs de pixels prétraitées aux exemples
    examples['pixel_values'] = inputs['pixel_values']

    # Libérer la mémoire si les images ne sont plus nécessaires
    del images

    return examples

In [49]:
def apply_processing(model_name, train_dataset, val_dataset, test_dataset):
 
    """
    Apply model's image AutoProcessor to transform train, validation, and test subsets.
    AutoProcessor effectue le Redimensionnement,Normalisation,onversion en tableau de pixels, Positionnement des patchs...
    Returns train, validation, and test datasets with `pixel_values` in torch tensor type.
    """

    # Extend the features
    features = train_dataset.features.copy()
    features.update({
        'pixel_values': Array3D(dtype="float32", shape=(3, 224, 224)),
    })

    ''' features=
    {'image': Image(decode=True, id=None),
     'label': ClassLabel(names=['10', '1140', '1160', '1180', '1280', '1281', '1300', '1301', '1302', '1320', '1560', '1920', '1940', '2060', '2220', '2280', '2403', '2462', '2522', '2582', '2583', '2585', '2705', '2905', '40', '50', '60'], id=None),
     'pixel_values': Array3D(shape=(3, 224, 224), dtype='float32', id=None)
     }
    '''
    
    # Instantiate image_processor
    image_processor = AutoProcessor.from_pretrained(model_name)
    
    datasets = {"train": train_dataset, "val": val_dataset, "test": test_dataset}

    for dataset_name in ["train", "val", "test"]:
        
        datasets[dataset_name] = datasets[dataset_name].map(process_examples, batched=True, features=features, fn_kwargs={"image_processor": image_processor})
       
        datasets[dataset_name].set_format('torch', columns=['pixel_values', 'label'])
        
        datasets[dataset_name] = datasets[dataset_name].remove_columns("image")
    
    
    return datasets["train"], datasets["val"], datasets["test"]


# Apply AutoProcessor
train_dataset, val_dataset, test_dataset = apply_processing(model_name,train_dataset, val_dataset, test_dataset)

Map:   0%|          | 0/47551 [00:00<?, ? examples/s]

Map:   0%|          | 0/13593 [00:00<?, ? examples/s]

Map:   0%|          | 0/6787 [00:00<?, ? examples/s]

In [50]:
train_dataset

Dataset({
    features: ['label', 'pixel_values'],
    num_rows: 47551
})

In [51]:
# Save train, validation and test preprocessed datasets
train_dataset.save_to_disk(train_save_path, num_shards=1)

val_dataset.save_to_disk(val_save_path, num_shards=1)

test_dataset.save_to_disk(test_save_path, num_shards=1)

Saving the dataset (0/1 shards):   0%|          | 0/47551 [00:00<?, ? examples/s]

Saving the dataset (0/1 shards):   0%|          | 0/13593 [00:00<?, ? examples/s]

Saving the dataset (0/1 shards):   0%|          | 0/6787 [00:00<?, ? examples/s]

In [56]:
train_dataset = load_from_disk(train_save_path)

val_dataset = load_from_disk(val_save_path)

test_dataset=load_from_disk(test_save_path)

In [53]:
train_dataset

Dataset({
    features: ['label', 'pixel_values'],
    num_rows: 47551
})

In [57]:
#Travailler avec 1% des données
# Specify the percentage of data to keep (1% in this case)

subset_percentage = 0.001

# Split the datasets into a small subset (1%)

train_dataset= train_dataset.train_test_split(shuffle=True, test_size=0.99)["train"]
val_dataset= val_dataset.train_test_split(shuffle=True, test_size=0.99)["train"]
test_dataset= test_dataset.train_test_split(shuffle=True, test_size=0.99)["train"]
test_dataset


Dataset({
    features: ['label', 'pixel_values'],
    num_rows: 67
})

In [58]:
num_classes = train_dataset.features["label"].num_classes

# Download model from model hub : Décomposition de l'image en patchs,Tokenisation des patchs,Position Embeddings,Encodage par les couches Transformer,Sortie de Classification
model = ViTForImageClassification.from_pretrained(model_name, num_labels=num_classes, ignore_mismatched_sizes=True)

# Download feature extractor from hub
feature_extractor = ViTFeatureExtractor.from_pretrained(model_name)

model.safetensors:   0%|          | 0.00/346M [00:00<?, ?B/s]

Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224 and are newly initialized because the shapes did not match:
- classifier.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([27]) in the model instantiated
- classifier.weight: found shape torch.Size([1000, 768]) in the checkpoint and torch.Size([27, 768]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [59]:

# Compute metrics function for binary classification
acc_metric = evaluate.load("accuracy", module_type="metric")

def compute_metrics(eval_pred):
    predicted_probs, labels = eval_pred
    # Accuracy
    predicted_labels = np.argmax(predicted_probs, axis=1)
    acc = acc_metric.compute(predictions=predicted_labels, references=labels)
    return acc

Downloading builder script:   0%|          | 0.00/4.20k [00:00<?, ?B/s]

In [60]:
#define the correspondance 'id2label' et 'label2id' to have  class names
# Change labels
id2label = {key:train_dataset.features["label"].names[index] for index,key in enumerate(model.config.id2label.keys())}
label2id = {train_dataset.features["label"].names[index]:value for index,value in enumerate(model.config.label2id.values())}
model.config.id2label = id2label
model.config.label2id = label2id


In [61]:
model_dir = "./model"
output_data_dir = "./outputs"

# Total number of training epochs to perform
num_train_epochs = 10
# The batch size per GPU/TPU core/CPU for training
per_device_train_batch_size = 32
# The batch size per GPU/TPU core/CPU for evaluation
per_device_eval_batch_size = 64
# The initial learning rate for AdamW optimizer
learning_rate = 2e-5
# Number of steps used for a linear warmup from 0 to learning_rate
warmup_steps = 500
# The weight decay to apply to all layers except all bias and LayerNorm weights in AdamW optimizer
weight_decay = 0.01

main_metric_for_evaluation = "accuracy"

In [62]:
# Define training args
training_args = TrainingArguments(
    output_dir = model_dir,
    num_train_epochs = num_train_epochs,
    per_device_train_batch_size = per_device_train_batch_size,
    per_device_eval_batch_size = per_device_eval_batch_size,
    warmup_steps = warmup_steps,
    weight_decay = weight_decay,
    evaluation_strategy = "epoch",
    save_strategy = "epoch",
    logging_strategy = "epoch",
    logging_dir = f"{output_data_dir}/logs",
    learning_rate = float(learning_rate),
    load_best_model_at_end = True,
    metric_for_best_model = main_metric_for_evaluation,
)

# Create Trainer instance
trainer = Trainer(
    model = model,
    args = training_args,
    compute_metrics = compute_metrics,
    train_dataset = train_dataset,
    eval_dataset = val_dataset,
    data_collator = default_data_collator,
    tokenizer = feature_extractor
)

In [None]:
trainer.train() 

Epoch,Training Loss,Validation Loss,Accuracy
1,3.3319,3.324025,0.044444
2,3.3158,3.314242,0.044444
3,3.2812,3.298808,0.044444


In [None]:
log_history = pd.DataFrame(trainer.state.log_history)
log_history = log_history.fillna(0)
log_history = log_history.groupby(['epoch']).sum()
log_history

In [None]:
log_history[["loss", "eval_loss", "eval_accuracy"]].plot(subplots=True)

In [None]:
trainer.save_model(model_dir)

In [None]:
# Load dataset
test_dataset = load_from_disk(test_save_path)

# Load trained model
model = ViTForImageClassification.from_pretrained('./model')

# Load feature extractor
feature_extractor = ViTFeatureExtractor.from_pretrained('./model')
    
# Create Trainer instance
trainer = Trainer(
    model=model,
    compute_metrics=compute_metrics,
    data_collator=default_data_collator,
    tokenizer=feature_extractor
)

# Evaluate model
eval_results = trainer.evaluate(eval_dataset=test_dataset)

# Writes eval_result to file which can be accessed later
with open(os.path.join(output_data_dir, "eval_results.json"), "w") as writer:
    print(f"Logging evaluation results at {output_data_dir}/eval_results.json")
    writer.write(json.dumps(eval_results))

print(json.dumps(eval_results, indent=4))