# Building model

In [10]:
import os
import torch
from datasets import Dataset
from transformers import AutoImageProcessor, ViTForImageClassification, TrainingArguments, Trainer
from torchvision.transforms import Compose, Resize, CenterCrop, ToTensor, Normalize
from torchvision.datasets import ImageFolder
from PIL import Image
import numpy as np
import pandas as pd
from transformers import logging as hf_logging

hf_logging.set_verbosity_info()  # Vezi loguri »ôi √Æn consolƒÉ

# === CONFIGURARE ===
model_name = "google/vit-base-patch16-224-in21k"
data_dir = "data"  # aici ai "train", "val"
image_size = 224
batch_size = 16
num_classes = 3
epochs = 5

# === LOAD MODEL & PREPROCESSOR ===
processor = AutoImageProcessor.from_pretrained(model_name)

# === TRANSFORMARE ===
transform = Compose([
    Resize((image_size, image_size)),
    CenterCrop(image_size),
    ToTensor(),
    Normalize(mean=processor.image_mean, std=processor.image_std)
])

# === LOAD IMAGES ===
def load_split(split):
    folder = os.path.join(data_dir, split)
    dataset = ImageFolder(folder, transform=transform)
    return dataset

train_ds = load_split("train")
val_ds = load_split("val")

# === WRAP TORCH DATASET IN HF DATASET ===
def convert_to_hf_dataset(torch_ds):
    images, labels = [], []
    for img, label in torch_ds:
        images.append(img)
        labels.append(label)
    return Dataset.from_dict({"pixel_values": images, "labels": labels})

train_hf = convert_to_hf_dataset(train_ds)
val_hf = convert_to_hf_dataset(val_ds)

# === DEFINE MODEL ===
model = ViTForImageClassification.from_pretrained(
    model_name,
    num_labels=num_classes
)

# === METRICƒÇ ===
def compute_metrics(p):
    preds = np.argmax(p.predictions, axis=1)
    return {"accuracy": np.mean(preds == p.label_ids)}

# === ARGUMENTE TRAINING ===
args = TrainingArguments(
    output_dir="./vit-output",
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    num_train_epochs=epochs,
    logging_dir="./logs",
    logging_steps=1,  # Logare la fiecare pas
    report_to="none",  # evitƒÉ integrarea cu wandb sau tensorboard implicit
    disable_tqdm=False,  # permite progres vizibil
    save_strategy="no",  # nu salva checkpointuri inutile
)

# === TRAINER ===
trainer = Trainer(
    model=model,
    args=args,
    train_dataset=train_hf,
    eval_dataset=val_hf,
    tokenizer=processor,
    compute_metrics=compute_metrics
)

# === TRAIN ===
trainer.train()

# === LOGURI ===
log_df = pd.DataFrame(trainer.state.log_history)
print("\nüìä Jurnal de antrenare:")
display(log_df)

# === OPTIONAL: Salvare loguri
log_df.to_csv("logs.csv", index=False)
print("‚úÖ Logurile au fost salvate √Æn logs.csv")

# === SAVE MODEL ===
model.save_pretrained("./vit-model")
processor.save_pretrained("./vit-model")
print("‚úÖ Model salvat √Æn './vit-model'")


loading configuration file preprocessor_config.json from cache at C:\Users\User\.cache\huggingface\hub\models--google--vit-base-patch16-224-in21k\snapshots\b4569560a39a0f1af58e3ddaf17facf20ab919b0\preprocessor_config.json
loading configuration file config.json from cache at C:\Users\User\.cache\huggingface\hub\models--google--vit-base-patch16-224-in21k\snapshots\b4569560a39a0f1af58e3ddaf17facf20ab919b0\config.json
Model config ViTConfig {
  "architectures": [
    "ViTModel"
  ],
  "attention_probs_dropout_prob": 0.0,
  "encoder_stride": 16,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.0,
  "hidden_size": 768,
  "image_size": 224,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "model_type": "vit",
  "num_attention_heads": 12,
  "num_channels": 3,
  "num_hidden_layers": 12,
  "patch_size": 16,
  "pooler_act": "tanh",
  "pooler_output_size": 768,
  "qkv_bias": true,
  "transformers_version": "4.51.3"
}

Fast image processor class <class 'trans

Step,Training Loss




Training completed. Do not forget to share your model on huggingface.co/models =)





üìä Jurnal de antrenare:


Unnamed: 0,loss,grad_norm,learning_rate,epoch,step,train_runtime,train_samples_per_second,train_steps_per_second,total_flos,train_loss
0,1.0865,1.331453,5.000000e-05,0.052632,1,,,,,
1,1.1068,1.272911,4.947368e-05,0.105263,2,,,,,
2,1.1142,1.497016,4.894737e-05,0.157895,3,,,,,
3,1.0825,1.315557,4.842105e-05,0.210526,4,,,,,
4,1.1337,1.422301,4.789474e-05,0.263158,5,,,,,
...,...,...,...,...,...,...,...,...,...,...
91,0.9599,1.608701,2.105263e-06,4.842105,92,,,,,
92,0.8695,1.553878,1.578947e-06,4.894737,93,,,,,
93,0.9162,1.655388,1.052632e-06,4.947368,94,,,,,
94,1.0164,2.432333,5.263158e-07,5.000000,95,,,,,


Configuration saved in ./vit-model\config.json


‚úÖ Logurile au fost salvate √Æn logs.csv


Model weights saved in ./vit-model\model.safetensors
Image processor saved in ./vit-model\preprocessor_config.json


‚úÖ Model salvat √Æn './vit-model'


## Imbunatatire model

In [1]:
import os
import torch
import numpy as np
import pandas as pd
from PIL import Image
from datasets import Dataset
from transformers import AutoImageProcessor, ViTForImageClassification, TrainingArguments, Trainer
from torchvision.transforms import Compose, Resize, CenterCrop, ToTensor, Normalize, RandomHorizontalFlip, RandomRotation
from torchvision.datasets import ImageFolder
from transformers import logging as hf_logging

hf_logging.set_verbosity_info()  # ActivƒÉm loguri informative


The cache for model files in Transformers v4.22.0 has been updated. Migrating your old cache. This is a one-time only operation. You can interrupt this and resume the migration later on by calling `transformers.utils.move_cache()`.


0it [00:00, ?it/s]

# configurari

In [2]:
# === CONFIGURARE ===
model_name = "google/vit-base-patch16-224-in21k"
data_dir = "data"  # trebuie sƒÉ con»õinƒÉ subfolderele "train" »ôi "val"
image_size = 224
batch_size = 16
num_classes = 3  # normal, benign, malignant
epochs = 5


# Incarcare procesor si definirea transformarilor

In [3]:
from transformers import AutoImageProcessor
from torchvision.transforms import Compose, Resize, CenterCrop, ToTensor, Normalize

# === √éNCƒÇRCARE PROCESSOR ===
processor = AutoImageProcessor.from_pretrained(model_name)

# === TRANSFORMARE IMAGINI ===
from torchvision.transforms import RandomHorizontalFlip, RandomRotation, ColorJitter

train_transform = Compose([
    Resize((image_size, image_size)),
    RandomHorizontalFlip(p=0.5),
    RandomRotation(degrees=15),
    ColorJitter(brightness=0.2, contrast=0.2),
    ToTensor(),
    Normalize(mean=processor.image_mean, std=processor.image_std)
])

val_transform = Compose([
    Resize((image_size, image_size)),
    CenterCrop(image_size),
    ToTensor(),
    Normalize(mean=processor.image_mean, std=processor.image_std)
])




loading configuration file preprocessor_config.json from cache at C:\Users\User\.cache\huggingface\hub\models--google--vit-base-patch16-224-in21k\snapshots\b4569560a39a0f1af58e3ddaf17facf20ab919b0\preprocessor_config.json
loading configuration file config.json from cache at C:\Users\User\.cache\huggingface\hub\models--google--vit-base-patch16-224-in21k\snapshots\b4569560a39a0f1af58e3ddaf17facf20ab919b0\config.json
Model config ViTConfig {
  "_name_or_path": "google/vit-base-patch16-224-in21k",
  "architectures": [
    "ViTModel"
  ],
  "attention_probs_dropout_prob": 0.0,
  "encoder_stride": 16,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.0,
  "hidden_size": 768,
  "image_size": 224,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "model_type": "vit",
  "num_attention_heads": 12,
  "num_channels": 3,
  "num_hidden_layers": 12,
  "patch_size": 16,
  "qkv_bias": true,
  "transformers_version": "4.40.1"
}

size should be a dictionary on of the

# Incarcare Imagini

In [4]:
from torchvision.datasets import ImageFolder
import os

# === √éNCƒÇRCARE IMAGINI ===
def load_split(split):
    folder = os.path.join(data_dir, split)
    if split == "train":
        dataset = ImageFolder(folder, transform=train_transform)
    else:
        dataset = ImageFolder(folder, transform=val_transform)
    return dataset


train_ds = load_split("train")
val_ds = load_split("val")


# Conversie la HuggingFace DataSet  

In [5]:
from datasets import Dataset

# === CONVERSIE LA HUGGINGFACE DATASET ===
def convert_to_hf_dataset(torch_ds):
    images, labels = [], []
    for img, label in torch_ds:
        images.append(img)
        labels.append(label)
    return Dataset.from_dict({"pixel_values": images, "labels": labels})

train_hf = convert_to_hf_dataset(train_ds)
val_hf = convert_to_hf_dataset(val_ds)


# Definirea modelului VIT

In [6]:
from transformers import ViTForImageClassification

# === DEFINIREA MODELULUI ===
model = ViTForImageClassification.from_pretrained(
    model_name,
    num_labels=num_classes  # Clasificare √Æn 3 categorii: normal, benign, malign
)


loading configuration file config.json from cache at C:\Users\User\.cache\huggingface\hub\models--google--vit-base-patch16-224-in21k\snapshots\b4569560a39a0f1af58e3ddaf17facf20ab919b0\config.json
Model config ViTConfig {
  "_name_or_path": "google/vit-base-patch16-224-in21k",
  "architectures": [
    "ViTModel"
  ],
  "attention_probs_dropout_prob": 0.0,
  "encoder_stride": 16,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.0,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2"
  },
  "image_size": 224,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2
  },
  "layer_norm_eps": 1e-12,
  "model_type": "vit",
  "num_attention_heads": 12,
  "num_channels": 3,
  "num_hidden_layers": 12,
  "patch_size": 16,
  "qkv_bias": true,
  "transformers_version": "4.40.1"
}

loading weights file model.safetensors from cache at C:\Users\User\.cache\huggingface\hub\models--google-

# Definirea metricii de evaluare si setarea hyperparametrilor pentru antrenare

In [12]:
import numpy as np

# === METRICƒÇ CUSTOM ===
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import numpy as np

def compute_metrics(p):
    preds = np.argmax(p.predictions, axis=1)
    labels = p.label_ids

    acc = accuracy_score(labels, preds)
    prec = precision_score(labels, preds, average="weighted", zero_division=0)
    rec = recall_score(labels, preds, average="weighted", zero_division=0)
    f1 = f1_score(labels, preds, average="weighted", zero_division=0)
    
    # Optional: matrice de confuzie
    cm = confusion_matrix(labels, preds)

    print("\nüîç Matrice de confuzie:\n", cm)
    print( {"accuracy": acc,
        "precision": prec,
        "recall": rec,
        "f1": f1})

    return {
        "accuracy": acc,
        "precision": prec,
        "recall": rec,
        "f1": f1
    }




In [13]:
from transformers import TrainingArguments

args = TrainingArguments(
    output_dir="./vit-output-improved",
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=10,
    learning_rate=3e-5,
    logging_dir="./logs",
    logging_steps=1,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    report_to="none",
    disable_tqdm=False,
)

PyTorch: setting up devices


# Definirea obiectului Trainer

In [14]:
from transformers import Trainer

trainer = Trainer(
    model=model,
    args=args,
    train_dataset=train_hf,
    eval_dataset=val_hf,
    tokenizer=processor,
    compute_metrics=compute_metrics
)


# Antrenarea modelului

In [15]:
trainer.train()
log_df = pd.DataFrame(trainer.state.log_history)
print("\nüìä Jurnal de antrenare:")
display(log_df)

# Salvare √Æn CSV
log_df.to_csv("logs.csv", index=False)
print("‚úÖ Logurile au fost salvate √Æn logs.csv")
model.save_pretrained("./vit-model-improved")
processor.save_pretrained("./vit-model-improved")
print("‚úÖ Model salvat √Æn './vit-model-improved'")


***** Running training *****
  Num examples = 299
  Num Epochs = 10
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 1
  Total optimization steps = 190
  Number of trainable parameters = 85,800,963


Epoch,Training Loss,Validation Loss


***** Running Evaluation *****
  Num examples = 76
  Batch size = 16



üîç Matrice de confuzie:
 [[15  4  6]
 [ 9  1 15]
 [ 9  5 12]]
{'accuracy': 0.3684210526315789, 'precision': 0.3068181818181818, 'recall': 0.3684210526315789, 'f1': 0.32810364602329906}


Saving model checkpoint to ./vit-output-improved\checkpoint-19
Configuration saved in ./vit-output-improved\checkpoint-19\config.json
Model weights saved in ./vit-output-improved\checkpoint-19\model.safetensors
Image processor saved in ./vit-output-improved\checkpoint-19\preprocessor_config.json
***** Running Evaluation *****
  Num examples = 76
  Batch size = 16



üîç Matrice de confuzie:
 [[16  6  3]
 [10  2 13]
 [18  1  7]]
{'accuracy': 0.32894736842105265, 'precision': 0.2968356332200726, 'recall': 0.32894736842105265, 'f1': 0.2889993526014525}


Saving model checkpoint to ./vit-output-improved\checkpoint-38
Configuration saved in ./vit-output-improved\checkpoint-38\config.json
Model weights saved in ./vit-output-improved\checkpoint-38\model.safetensors
Image processor saved in ./vit-output-improved\checkpoint-38\preprocessor_config.json
***** Running Evaluation *****
  Num examples = 76
  Batch size = 16



üîç Matrice de confuzie:
 [[15  7  3]
 [11  2 12]
 [15  5  6]]
{'accuracy': 0.3026315789473684, 'precision': 0.2650834403080873, 'recall': 0.3026315789473684, 'f1': 0.2706057473694652}


Saving model checkpoint to ./vit-output-improved\checkpoint-57
Configuration saved in ./vit-output-improved\checkpoint-57\config.json
Model weights saved in ./vit-output-improved\checkpoint-57\model.safetensors
Image processor saved in ./vit-output-improved\checkpoint-57\preprocessor_config.json


KeyboardInterrupt: 

‚ûï  
    RandomVerticalFlip()

    RandomResizedCrop() √Æn loc de CenterCrop

    GaussianBlur() (aten»õie la claritate!)

üîß  lr_scheduler_type='cosine' sau warmup_steps

üß™ Cre»ôte num_train_epochs=10 sau folose»ôte early stopping + evaluare la step

MAI MULTE DATE

In [11]:
# === LOGURI ===
import pandas as pd
log_df = pd.DataFrame(trainer.state.log_history)
log_df.to_csv("logs.csv", index=False)
print("‚úÖ Logurile au fost salvate √Æn logs.csv")


‚úÖ Logurile au fost salvate √Æn logs.csv
