In [None]:
# نصب کتابخانه‌های مورد نیاز
!pip install gdown
!pip install tqdm
!pip install huggingface_hub




In [None]:
import os
from huggingface_hub import hf_hub_download
import tarfile

# دانلود دیتاست
hf_hub_download(repo_id='RayanAi/inat_train_modified',
               filename="inat_train_modified.tar.gz",
               repo_type="dataset",
               local_dir=".")

# استخراج فایل tar.gz
with tarfile.open("inat_train_modified.tar.gz", "r:gz") as tar:
    tar.extractall(path=".")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


inat_train_modified.tar.gz:   0%|          | 0.00/11.4G [00:00<?, ?B/s]

In [None]:
from typing import Iterator
from torch.utils.data import Dataset
from PIL import Image
from torchvision import transforms

class Node:
    def __init__(self, name):
        self.name = name
        self._count = 0
        self.children = {}
        self._entities = []

    def add_to_node(self, path, entity, level=0):
        if level >= len(path):
            self._entities.append(entity)
            return
        part = path[level]
        if part not in self.children:
            self.children[part] = Node(path[:level+1])
        self.children[part].add_to_node(path, entity, level=level+1)
        self._count += 1

    @property
    def is_leaf(self):
        return len(self._entities) > 0

    @property
    def count(self):
        if self.is_leaf:
            return len(self._entities)
        else:
            return self._count

    @property
    def entities(self):
        if self.is_leaf:
            return list((entity, self.name) for entity in self._entities)
        else:
            child_entities = []
            for child in self.children.values():
                child_entities.extend(child.entities)
        return child_entities

    def level_iterator(self, level=None):
        if level == 0:
            yield self
        elif level is None and self.is_leaf:
            yield self
        elif self.is_leaf and level != 0:
            raise Exception("Incorrect level is specified in tree.")
        else:
            if level is not None:
                level -= 1
            for child in self.children.values():
                for v in child.level_iterator(level):
                    yield v

    def print_node(self, level=0, max_level=None):
        print(' ' * (level * 4) + f"{self.name[-1]} ({self.count})")
        for node in self.children.values():
            if max_level is None or level < max_level:
                node.print_node(level + 1, max_level=max_level)
        return

class HierarchicalDataset(Dataset):
    def __init__(self, dataset_path, level=None, transform=None):
        self.tree = Node(("Dataset",))  # Initialize with root
        self.level = level if level is not None else 7  # Default level 7
        self.classes = set()
        self.data = []
        self.transform = transform

        index = 0
        for group_name in sorted(os.listdir(dataset_path)):
            group_dir = os.path.join(dataset_path, group_name)
            if not os.path.isdir(group_dir):
                continue
            for image_name in sorted(os.listdir(group_dir)):
                image_path = os.path.join(group_dir, image_name)
                group = tuple(group_name.split("_")[1:])  # Assuming format like 'class_name'
                if len(group) < self.level:
                    continue  # Skip if group path is shorter than required level
                group = group[:self.level]
                self.data.append({
                    "image_path": image_path,
                    "group": group,
                })
                self.tree.add_to_node(group, index)
                index += 1
                self.classes.add(group)

        self.classes = {group: idx for idx, group in enumerate(sorted(list(self.classes)))}
        print(f"Dataset Length: {len(self.data)}")
        print("Hierarchical Structure (up to level 2):")
        self.tree.print_node(max_level=2)
        print(f"Number of classes: {len(self.classes)}")

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        sample = self.data[idx]
        image = Image.open(sample["image_path"]).convert('RGB')
        group = sample["group"][:self.level]
        target = self.classes[group]
        if self.transform:
            image = self.transform(image)
        return image, target

    def get_group_iterator(self, level=None) -> Iterator[Node]:
        for group in self.tree.level_iterator(level):
            yield group


In [None]:
from torchvision import transforms

# افزایش داده برای مجموعه آموزشی
train_transform = transforms.Compose([
    transforms.Resize(256),
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.RandomRotation(15),
    transforms.ToTensor(),
    transforms.Normalize((0.4556, 0.4714, 0.3700), (0.2370, 0.2318, 0.2431))
])

# پیش‌پردازش برای مجموعه اعتبارسنجی
val_transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize((0.4556, 0.4714, 0.3700), (0.2370, 0.2318, 0.2431))
])


In [None]:
import torch
from torchvision import models
from torch import nn

def get_model(num_classes):
    model = models.resnet50(pretrained=True)
    num_ftrs = model.fc.in_features
    model.fc = nn.Linear(num_ftrs, num_classes)
    return model

# بارگذاری مدل
dataset_path = 'train'  # مسیر دیتاست
full_dataset = HierarchicalDataset(dataset_path=dataset_path, level=2, transform=train_transform)
model = get_model(num_classes=len(full_dataset.classes)).to(device)


Dataset Length: 99970
Hierarchical Structure (up to level 2):
Dataset (99970)
    Animalia (49112)
        Annelida (13)
        Arthropoda (29675)
        Chordata (18518)
        Cnidaria (124)
        Echinodermata (83)
        Mollusca (699)
    Fungi (1812)
        Ascomycota (396)
        Basidiomycota (1416)
    Plantae (49046)
        Bryophyta (133)
        Chlorophyta (13)
        Marchantiophyta (22)
        Rhodophyta (22)
        Tracheophyta (48856)
Number of classes: 13


Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 209MB/s]


In [None]:
import torch.optim as optim
from torch.utils.data import DataLoader, random_split
import numpy as np

# تنظیمات آموزش
learning_rate = 1e-4
batch_size = 256
num_epochs = 30
validation_split = 0.1
random_seed = 42

# تقسیم‌بندی به مجموعه آموزشی و اعتبارسنجی
dataset_size = len(full_dataset)
indices = list(range(dataset_size))
split = int(np.floor(validation_split * dataset_size))

np.random.seed(random_seed)
np.random.shuffle(indices)

train_indices, val_indices = indices[split:], indices[:split]

train_subset = torch.utils.data.Subset(full_dataset, train_indices)
val_subset = torch.utils.data.Subset(HierarchicalDataset(dataset_path=dataset_path, level=2, transform=val_transform), val_indices)

# محاسبه وزن‌های کلاس
def compute_class_weights(dataset):
    class_counts = {}
    for _, label in dataset:
        class_counts[label] = class_counts.get(label, 0) + 1
    total_samples = len(dataset)
    class_weights = [total_samples / (len(full_dataset.classes) * class_counts[i]) for i in range(len(full_dataset.classes))]
    return torch.tensor(class_weights, dtype=torch.float).to(device)

class_weights = compute_class_weights(train_subset)

# بارگذاری داده‌ها
train_loader = DataLoader(train_subset, batch_size=batch_size, shuffle=True, num_workers=4, pin_memory=True)
val_loader = DataLoader(val_subset, batch_size=batch_size, shuffle=False, num_workers=4, pin_memory=True)

# تعریف تابع هزینه و بهینه‌ساز
criterion = nn.CrossEntropyLoss(weight=class_weights)
optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-5)

# تنظیم‌کننده نرخ یادگیری
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=3, verbose=True)


Dataset Length: 99970
Hierarchical Structure (up to level 2):
Dataset (99970)
    Animalia (49112)
        Annelida (13)
        Arthropoda (29675)
        Chordata (18518)
        Cnidaria (124)
        Echinodermata (83)
        Mollusca (699)
    Fungi (1812)
        Ascomycota (396)
        Basidiomycota (1416)
    Plantae (49046)
        Bryophyta (133)
        Chlorophyta (13)
        Marchantiophyta (22)
        Rhodophyta (22)
        Tracheophyta (48856)
Number of classes: 13




In [None]:
from tqdm import tqdm

def train_one_epoch(model, dataloader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    progress_bar = tqdm(dataloader, desc="Training", leave=False)

    for inputs, labels in progress_bar:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # آمارگیری
        running_loss += loss.item() * inputs.size(0)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

        progress_bar.set_postfix(loss=loss.item(), accuracy=100. * correct / total)

    epoch_loss = running_loss / total
    epoch_acc = correct / total
    return epoch_loss, epoch_acc

def validate(model, dataloader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    progress_bar = tqdm(dataloader, desc="Validation", leave=False)

    with torch.no_grad():
        for inputs, labels in progress_bar:
            inputs, labels = inputs.to(device), labels.to(device)

            outputs = model(inputs)
            loss = criterion(outputs, labels)

            running_loss += loss.item() * inputs.size(0)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

            progress_bar.set_postfix(loss=loss.item(), accuracy=100. * correct / total)

    epoch_loss = running_loss / total
    epoch_acc = correct / total
    return epoch_loss, epoch_acc


In [None]:
import torch
import os
import zipfile

# تنظیم دستگاه
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# ایجاد پوشه برای چک‌پوینت‌ها
checkpoint_dir = './checkpoints'
os.makedirs(checkpoint_dir, exist_ok=True)

best_val_acc = 0.0

for epoch in range(1, num_epochs + 1):
    print(f"Epoch {epoch}/{num_epochs}")

    train_loss, train_acc = train_one_epoch(model, train_loader, criterion, optimizer, device)
    val_loss, val_acc = validate(model, val_loader, criterion, device)

    print(f"Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.4f}")
    print(f"Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.4f}")

    # به‌روزرسانی تنظیم‌کننده نرخ یادگیری
    scheduler.step(val_loss)

    # ذخیره چک‌پوینت بهترین مدل
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        checkpoint_path = os.path.join(checkpoint_dir, 'best_checkpoint.pth')
        torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'val_loss': val_loss,
            'val_acc': val_acc,
        }, checkpoint_path)
        print(f"New best model saved with Val Acc: {best_val_acc:.4f}")

    print("-" * 30)

# بارگذاری بهترین مدل
best_checkpoint = torch.load(os.path.join(checkpoint_dir, 'best_checkpoint.pth'), map_location=device)
model.load_state_dict(best_checkpoint['model_state_dict'])

# ذخیره وزن‌های نهایی مدل
torch.save(model.state_dict(), 'resnet.pth')

# ایجاد فایل ZIP برای ارسال
with zipfile.ZipFile('submission.zip', 'w') as zipf:
    zipf.write('resnet.pth')

print("وزن‌های مدل به نام 'resnet.pth' ذخیره شده و در 'submission.zip' بسته‌بندی شدند.")


Epoch 1/30




Train Loss: 1.9650 | Train Acc: 0.5107
Val Loss: 1.6733 | Val Acc: 0.5991
New best model saved with Val Acc: 0.5991
------------------------------
Epoch 2/30




Train Loss: 1.7190 | Train Acc: 0.5759
Val Loss: 1.4045 | Val Acc: 0.7166
New best model saved with Val Acc: 0.7166
------------------------------
Epoch 3/30




Train Loss: 1.6205 | Train Acc: 0.5957
Val Loss: 1.2008 | Val Acc: 0.7167
New best model saved with Val Acc: 0.7167
------------------------------
Epoch 4/30




Train Loss: 1.5355 | Train Acc: 0.5888
Val Loss: 1.2414 | Val Acc: 0.7911
New best model saved with Val Acc: 0.7911
------------------------------
Epoch 5/30




Train Loss: 1.4841 | Train Acc: 0.6071
Val Loss: 1.4102 | Val Acc: 0.7729
------------------------------
Epoch 6/30




Train Loss: 1.3435 | Train Acc: 0.6268
Val Loss: 1.1482 | Val Acc: 0.7052
------------------------------
Epoch 7/30




Train Loss: 1.3733 | Train Acc: 0.6286
Val Loss: 1.2110 | Val Acc: 0.7704
------------------------------
Epoch 8/30




Train Loss: 1.2879 | Train Acc: 0.6312
Val Loss: 1.2077 | Val Acc: 0.7251
------------------------------
Epoch 9/30




Train Loss: 1.3014 | Train Acc: 0.6234
Val Loss: 1.0417 | Val Acc: 0.7291
------------------------------
Epoch 10/30




Train Loss: 1.1829 | Train Acc: 0.6559
Val Loss: 1.2805 | Val Acc: 0.7605
------------------------------
Epoch 11/30




Train Loss: 1.1511 | Train Acc: 0.6388
Val Loss: 1.2966 | Val Acc: 0.6895
------------------------------
Epoch 12/30




Train Loss: 1.1555 | Train Acc: 0.6511
Val Loss: 1.2145 | Val Acc: 0.7693
------------------------------
Epoch 13/30




Train Loss: 1.1184 | Train Acc: 0.6490
Val Loss: 1.1967 | Val Acc: 0.7039
------------------------------
Epoch 14/30




Train Loss: 0.9449 | Train Acc: 0.6834
Val Loss: 0.9940 | Val Acc: 0.7826
------------------------------
Epoch 15/30




Train Loss: 0.8233 | Train Acc: 0.7051
Val Loss: 0.9539 | Val Acc: 0.8097
New best model saved with Val Acc: 0.8097
------------------------------
Epoch 16/30




Train Loss: 0.7949 | Train Acc: 0.7197
Val Loss: 1.0213 | Val Acc: 0.8148
New best model saved with Val Acc: 0.8148
------------------------------
Epoch 17/30




Train Loss: 0.7565 | Train Acc: 0.7183
Val Loss: 1.0384 | Val Acc: 0.8004
------------------------------
Epoch 18/30




Train Loss: 0.7529 | Train Acc: 0.7325
Val Loss: 1.0595 | Val Acc: 0.8184
New best model saved with Val Acc: 0.8184
------------------------------
Epoch 19/30




Train Loss: 0.7418 | Train Acc: 0.7238
Val Loss: 0.8969 | Val Acc: 0.7879
------------------------------
Epoch 20/30




Train Loss: 0.7329 | Train Acc: 0.7253
Val Loss: 1.2793 | Val Acc: 0.7387
------------------------------
Epoch 21/30




Train Loss: 0.7016 | Train Acc: 0.7141
Val Loss: 1.2167 | Val Acc: 0.8206
New best model saved with Val Acc: 0.8206
------------------------------
Epoch 22/30




Train Loss: 0.7383 | Train Acc: 0.7335
Val Loss: 1.0336 | Val Acc: 0.8077
------------------------------
Epoch 23/30




Train Loss: 0.6837 | Train Acc: 0.7360
Val Loss: 1.0744 | Val Acc: 0.8350
New best model saved with Val Acc: 0.8350
------------------------------
Epoch 24/30




Train Loss: 0.6394 | Train Acc: 0.7503
Val Loss: 1.1827 | Val Acc: 0.8407
New best model saved with Val Acc: 0.8407
------------------------------
Epoch 25/30




Train Loss: 0.5625 | Train Acc: 0.7618
Val Loss: 1.2471 | Val Acc: 0.8506
New best model saved with Val Acc: 0.8506
------------------------------
Epoch 26/30




Train Loss: 0.5399 | Train Acc: 0.7672
Val Loss: 1.2548 | Val Acc: 0.8438
------------------------------
Epoch 27/30




Train Loss: 0.5015 | Train Acc: 0.7690
Val Loss: 1.2210 | Val Acc: 0.8498
------------------------------
Epoch 28/30




Train Loss: 0.4909 | Train Acc: 0.7760
Val Loss: 1.2485 | Val Acc: 0.8569
New best model saved with Val Acc: 0.8569
------------------------------
Epoch 29/30




Train Loss: 0.4697 | Train Acc: 0.7747
Val Loss: 1.2112 | Val Acc: 0.8550
------------------------------
Epoch 30/30


  best_checkpoint = torch.load(os.path.join(checkpoint_dir, 'best_checkpoint.pth'), map_location=device)


Train Loss: 0.4552 | Train Acc: 0.7809
Val Loss: 1.1817 | Val Acc: 0.8545
------------------------------
وزن‌های مدل به نام 'resnet.pth' ذخیره شده و در 'submission.zip' بسته‌بندی شدند.


In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import zipfile
import os

def zip_multiple_files(input_paths, output_zip_path, compression_level=5):
    # Ensure compression level is within 1-9
    compression_level = max(1, min(compression_level, 1))

    # Create a zip file with the specified compression level
    compression = zipfile.ZIP_DEFLATED

    # Create the zip file
    with zipfile.ZipFile(output_zip_path, 'w', compression) as zipf:
        for input_path in input_paths:
            # Check if the file or directory exists
            if not os.path.exists(input_path):
                print(f"{input_path} does not exist.")
                continue

            # If it's a directory, recursively add files
            if os.path.isdir(input_path):
                for root, dirs, files in os.walk(input_path):
                    for file in files:
                        file_full_path = os.path.join(root, file)
                        zipf.write(file_full_path,
                                   os.path.relpath(file_full_path,
                                                   os.path.join(input_path, '..')))
            # If it's a single file, add it to the zip file
            else:
                zipf.write(input_path, os.path.basename(input_path))

    print(f"Successfully zipped files to {output_zip_path} with compression level {compression_level}")

# Example usage:
input_paths = ['/content/model.py', '/content/model.pth']  # List of files or directories to zip
output_zip_path = '/content/submission2.zip'  # Path to save the output zip file
compression_level = 5  # Compression level from 1 (fastest) to 9 (most compressed)
zip_multiple_files(input_paths, output_zip_path, compression_level)


In [None]:
!cp /content/submission3.zip /content/drive/MyDrive/ML/Rayan/Q3/submission

cp: cannot create regular file '/content/drive/MyDrive/ML/Rayan/Q3/submission': No such file or directory


In [None]:
!cp /content/submission3.zip /content/drive/MyDrive/ML/Rayan/Q3/submission/


In [None]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive
