<a href="https://colab.research.google.com/github/HatemMoushir/Shark-identification-1/blob/main/shark-vit-base-patch16-224.ipynb.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
!gdown 165LwqivtdzeXwMaj2VeGzgspqdnOiyrq

Downloading...
From (original): https://drive.google.com/uc?id=165LwqivtdzeXwMaj2VeGzgspqdnOiyrq
From (redirected): https://drive.google.com/uc?id=165LwqivtdzeXwMaj2VeGzgspqdnOiyrq&confirm=t&uuid=30d20897-6cef-4d8a-a625-a84d6a25e28f
To: /content/Shark_project_split.zip
100% 139M/139M [00:03<00:00, 40.6MB/s]


In [None]:
!unzip "/content/Shark_project_split.zip" -d "/content/Shark_project_split"

In [None]:

#✅ تثبيت المكتبات المطلوبة

!pip install -q datasets transformers evaluate torchvision

#✅ الاستيراد

import os
import torch
from torchvision import transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
from datasets import Dataset, DatasetDict, Image as HFImage # Import HF Image feature
from transformers import ViTImageProcessor, ViTForImageClassification, TrainingArguments, Trainer
import evaluate
import numpy as np
from PIL import Image as PILImage # Import PIL Image separately

#✅ تحميل وتحويل الصور إلى تنسيق DatasetDict

def convert_imagefolder_to_datasetdict(data_dir):
    dataset_splits = {}
    for split in ['train', 'val', 'test']:
        path = os.path.join(data_dir, split)
        if not os.path.exists(path):
            print(f"Warning: Split directory not found: {path}")
            continue

        # Use ImageFolder to load images and labels
        imagefolder_dataset = ImageFolder(path)

        # Extract image paths and labels
        image_paths = [img_path for img_path, label in imagefolder_dataset.imgs]
        labels = [label for img_path, label in imagefolder_dataset.imgs]
        class_names = imagefolder_dataset.classes

        # Create Hugging Face Dataset from paths and labels
        dataset_splits[split] = Dataset.from_dict({
            'image': image_paths,
            'label': labels
        })

        # Cast the 'image' column to the Hugging Face Image feature
        # This will load images as PIL objects when accessed
        dataset_splits[split] = dataset_splits[split].cast_column("image", HFImage())

    return DatasetDict(dataset_splits), class_names

# Define the data directory where your split dataset is located
data_dir = "/content/Shark_project_split"
dataset_dict, class_names = convert_imagefolder_to_datasetdict(data_dir)

# Print information about the loaded dataset
print("Dataset loaded successfully!")
print(f"Number of classes: {len(class_names)}")
print(f"Class names: {class_names}")
print(dataset_dict)

#✅ تحميل الـ Image Processor (المعالج)

# ViTFeatureExtractor is deprecated; use ViTImageProcessor instead.
# When initializing ViTImageProcessor, it's designed to automatically get
# the correct mean and std from the pre-trained model's configuration.
image_processor = ViTImageProcessor.from_pretrained(
    'google/vit-base-patch16-224-in21k'
)

#✅ معالجة الصور داخل دالة تحويل (Applied with .map)

def transform(examples):
    # The 'image' column now contains loaded PIL Images because of cast_column("image", Image())
    # We ensure images are converted to RGB as some datasets might contain grayscale or RGBA
    # ViT models typically expect 3-channel RGB images.
    images = [img.convert("RGB") if img.mode != "RGB" else img for img in examples['image']]

    # Process a batch of images using the image_processor
    # The processor handles resizing, normalization (using its internal mean/std), and tensor conversion.
    inputs = image_processor(images, return_tensors='pt')
    inputs['labels'] = examples['label'] # Add labels to the processed batch
    return inputs

# Apply the transformation using .map
# This will apply the `transform` function to each batch of examples in the dataset splits.
print("\nApplying transformations to the dataset...")
dataset_dict = dataset_dict.map(transform, batched=True)
print("Transformations applied!")

#✅ حذف العمود الأصلي (اختياري ولكنه ممارسة جيدة لتوفير الذاكرة)

dataset_dict = dataset_dict.remove_columns('image')
print("Original 'image' column removed.")
print(dataset_dict) # Display the dataset structure after transformation

#✅ تحميل النموذج

# Load the ViT model for image classification
model = ViTForImageClassification.from_pretrained(
    'google/vit-base-patch16-224-in21k',
    num_labels=len(class_names), # Set the number of output labels to match your dataset
    id2label={i: label for i, label in enumerate(class_names)}, # Map label IDs to names
    label2id={label: i for i, label in enumerate(class_names)}, # Map label names to IDs
    ignore_mismatched_sizes=True # Important: Allows loading a pre-trained head with a different number of output neurons
                                # and then reinitializing it for your specific num_labels.
)
print("\nModel loaded successfully!")

#✅ إعدادات التدريب

training_args = TrainingArguments(
    output_dir="./vit-shark", # Directory to save checkpoints and logs
    per_device_train_batch_size=8, # Batch size per GPU/CPU for training
    per_device_eval_batch_size=8,  # Batch size per GPU/CPU for evaluation
    eval_strategy="epoch",   # Evaluate at the end of each epoch
    save_strategy="epoch",         # Save model checkpoint at the end of each epoch
    num_train_epochs=5,            # Total number of training epochs
    logging_steps=10,              # Log metrics every 10 steps
    save_total_limit=2,            # Only keep the last 2 saved checkpoints
    load_best_model_at_end=True,   # Load the best model (based on metric_for_best_model) at the end of training
    metric_for_best_model="accuracy", # Metric to monitor for selecting the best model
    report_to="none",              # Disable logging to services like Weights & Biases if not needed
    gradient_accumulation_steps=2, # Accumulate gradients over 2 steps to effectively increase batch size
    fp16=torch.cuda.is_available(), # Enable mixed precision training if a GPU is available
)
print("Training arguments configured.")

#✅ دالة التقييم

# Load the accuracy metric
accuracy = evaluate.load("accuracy")

def compute_metrics(eval_pred):
    """
    Computes accuracy for classification tasks.
    Args:
        eval_pred (tuple): A tuple containing logits and labels from the model's prediction.
    Returns:
        dict: A dictionary containing the computed accuracy.
    """
    logits, labels = eval_pred
    preds = np.argmax(logits, axis=-1) # Get the predicted class by finding the argmax of logits
    return accuracy.compute(predictions=preds, references=labels)

print("Compute metrics function defined.")

#✅ إنشاء المدرب (Trainer)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset_dict['train'],
    eval_dataset=dataset_dict['val'],
    tokenizer=image_processor, # Use image_processor for preparing inputs during training/evaluation
    compute_metrics=compute_metrics,
)
print("Trainer initialized.")

#✅ تدريب النموذج

print("\nStarting model training...")
trainer.train()
print("Model training complete!")

#---

## ✅ اختبار النموذج على مجموعة الاختبار

print("\nEvaluating the model on the test set:")
test_results = trainer.evaluate(dataset_dict['test'])
print(f"Test Set Evaluation Results: {test_results}")

#---

## ✅ حفظ النموذج والمعالج (اختياري)

save_path = "./vit-shark-model"
print(f"\nSaving model and image processor to {save_path}...")
trainer.save_model(save_path)
image_processor.save_pretrained(save_path)
print("Model and image processor saved successfully!")