In [None]:
# نصب کتابخانه‌های مورد نیاز
!pip install gdown
!pip install tqdm
!pip install huggingface_hub
!pip install timm
!pip install torchmetrics


Collecting torchmetrics
  Downloading torchmetrics-1.5.1-py3-none-any.whl.metadata (20 kB)
Collecting lightning-utilities>=0.8.0 (from torchmetrics)
  Downloading lightning_utilities-0.11.8-py3-none-any.whl.metadata (5.2 kB)
Downloading torchmetrics-1.5.1-py3-none-any.whl (890 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m890.6/890.6 kB[0m [31m45.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading lightning_utilities-0.11.8-py3-none-any.whl (26 kB)
Installing collected packages: lightning-utilities, torchmetrics
Successfully installed lightning-utilities-0.11.8 torchmetrics-1.5.1


In [None]:
import os
from huggingface_hub import hf_hub_download
import tarfile

# دانلود دیتاست
hf_hub_download(repo_id='RayanAi/inat_train_modified',
               filename="inat_train_modified.tar.gz",
               repo_type="dataset",
               local_dir=".")

# استخراج فایل tar.gz
with tarfile.open("inat_train_modified.tar.gz", "r:gz") as tar:
    tar.extractall(path=".")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


inat_train_modified.tar.gz:   0%|          | 0.00/11.4G [00:00<?, ?B/s]

In [None]:
# Import necessary libraries
import os
from typing import Iterator
import torch
import torch.optim as optim
from torch import nn
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
from torchvision import models, transforms
from torchvision.transforms import RandAugment
from PIL import Image
import numpy as np
import time
from tqdm import tqdm
import zipfile

# Set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Define the hierarchical dataset class
class Node:
    def __init__(self, name):
        self.name = name
        self._count = 0
        self.children = {}
        self._entities = []

    def add_to_node(self, path, entity, level=0):
        if level >= len(path):
            self._entities.append(entity)
            return
        part = path[level]
        if part not in self.children:
            self.children[part] = Node(path[:level+1])
        self.children[part].add_to_node(path, entity, level=level+1)
        self._count += 1

    @property
    def is_leaf(self):
        return len(self._entities) > 0

    @property
    def count(self):
        if self.is_leaf:
            return len(self._entities)
        else:
            return self._count

    @property
    def entities(self):
        if self.is_leaf:
            return list((entity, self.name) for entity in self._entities)
        else:
            child_entities = []
            for child in self.children.values():
                child_entities.extend(child.entities)
            return child_entities

    def level_iterator(self, level=None):
        """
        Iterates a certain depth in a tree and returns the nodes
        """
        if level == 0:
            yield self
        elif level is None and self.is_leaf:
            yield self
        elif self.is_leaf and level != 0:
            raise Exception("Incorrect level is specified in tree.")
        else:
            if level is not None:
                level -= 1
            for child in self.children.values():
                yield from child.level_iterator(level)

    def print_node(self, level=0, max_level=None):
        print(' ' * (level * 4) + f"{self.name[-1]} ({self.count})")
        for node in self.children.values():
            if max_level is None or level < max_level:
                node.print_node(level + 1, max_level=max_level)

class HierarchicalDataset(Dataset):
    def __init__(self, dataset_path, level=2, transform=None):
        self.tree = Node("Dataset")
        self.level = level
        self.classes = set()
        data = []
        index = 0
        for group_name in sorted(os.listdir(dataset_path)):
            if not os.path.isdir(os.path.join(dataset_path, group_name)):
                continue
            for image_name in sorted(os.listdir(os.path.join(dataset_path, group_name))):
                group = tuple(group_name.split("_")[1:])
                image_path = os.path.join(dataset_path, group_name, image_name)
                data.append({
                    "image_path": image_path,
                    "group": group,
                })
                self.tree.add_to_node(group, index)
                index += 1
                self.classes.add(group[:self.level])
        self.data = data
        self.class_to_idx = {group: idx for idx, group in enumerate(sorted(self.classes))}
        self.transform = transform
        self.targets = [self.class_to_idx[datum["group"][:self.level]] for datum in self.data]

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        image = Image.open(self.data[idx]["image_path"]).convert('RGB')
        target = self.class_to_idx[self.data[idx]["group"][:self.level]]
        if self.transform:
            image = self.transform(image)
        return image, target

    def get_class_weights(self):
        counts = np.bincount(self.targets)
        weights = 1.0 / counts
        samples_weight = np.array([weights[t] for t in self.targets])
        return samples_weight

# Data augmentation and transforms
train_transforms = transforms.Compose([
    transforms.Resize((256, 256)),
    RandAugment(),
    transforms.RandomCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4556, 0.4714, 0.3700), (0.2370, 0.2318, 0.2431)),
])

# Initialize dataset and dataloader
train_dataset = HierarchicalDataset(dataset_path='train', level=2, transform=train_transforms)
print("Number of classes:", len(train_dataset.classes))

# Handle class imbalance with WeightedRandomSampler
samples_weight = train_dataset.get_class_weights()
samples_weight = torch.from_numpy(samples_weight)
sampler = WeightedRandomSampler(samples_weight, len(samples_weight))

batch_size = 64
train_loader = DataLoader(train_dataset, batch_size=batch_size, sampler=sampler)

# Define the model
model = models.resnet50(pretrained=False)
model.fc = nn.Linear(model.fc.in_features, len(train_dataset.classes))

# Load pretrained weights if available (commented out as per requirement)
# model.load_state_dict(torch.load('resnet50.pth'))

model = model.to(device)

# Define criterion with label smoothing
criterion = nn.CrossEntropyLoss(label_smoothing=0.1)

# Define optimizer and scheduler
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=1e-4)
scheduler = optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=10, T_mult=1)

# Progressive layer unfreezing
def set_parameter_requires_grad(model, feature_extracting, layers_to_freeze):
    if feature_extracting:
        ct = 0
        for child in model.children():
            ct += 1
            if ct <= layers_to_freeze:
                for param in child.parameters():
                    param.requires_grad = False

# Multi-stage training function
def train_model(model, dataloaders, criterion, optimizer, scheduler, num_epochs=25, feature_extract=True):
    since = time.time()
    best_acc = 0.0

    for epoch in range(num_epochs):
        print(f'Epoch {epoch}/{num_epochs - 1}')
        print('-' * 10)

        # Each epoch consists of a training phase
        model.train()  # Set model to training mode

        running_loss = 0.0
        running_corrects = 0

        # Iterate over data
        for inputs, labels in tqdm(dataloaders):
            inputs = inputs.to(device)
            labels = labels.to(device)

            # Zero the parameter gradients
            optimizer.zero_grad()

            # Forward pass
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            # Backward pass and optimization
            loss.backward()
            optimizer.step()

            # Statistics
            _, preds = torch.max(outputs, 1)
            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)

        # Step the scheduler
        scheduler.step()

        epoch_loss = running_loss / len(train_dataset)
        epoch_acc = running_corrects.double() / len(train_dataset)

        print(f'Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

        # Deep copy the model
        if epoch_acc > best_acc:
            best_acc = epoch_acc
            best_model_wts = model.state_dict()

        # Progressive unfreezing
        if feature_extract and epoch % 5 == 0 and epoch != 0:
            layers_to_freeze = 5 - (epoch // 5)
            if layers_to_freeze < 0:
                layers_to_freeze = 0
            set_parameter_requires_grad(model, feature_extract, layers_to_freeze)
            print(f'Unfroze layers after epoch {epoch}')

    time_elapsed = time.time() - since
    print(f'Training complete in {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s')
    print(f'Best Training Acc: {best_acc:.4f}')

    # Load best model weights
    model.load_state_dict(best_model_wts)
    return model

# Train the model
num_epochs = 25
model = train_model(model, train_loader, criterion, optimizer, scheduler, num_epochs=num_epochs, feature_extract=True)

# Save the model weights and create submission.zip
torch.save(model.state_dict(), 'resnet.pth')
with zipfile.ZipFile('submission.zip', 'w') as zipf:
    zipf.write('resnet.pth')

print("Model training complete and submission.zip created.")


Number of classes: 13




Epoch 0/24
----------


100%|██████████| 1563/1563 [18:59<00:00,  1.37it/s]


Loss: 2.1725 Acc: 0.3186
Epoch 1/24
----------


100%|██████████| 1563/1563 [19:00<00:00,  1.37it/s]


Loss: 1.5799 Acc: 0.5606
Epoch 2/24
----------


100%|██████████| 1563/1563 [19:02<00:00,  1.37it/s]


Loss: 1.3494 Acc: 0.6640
Epoch 3/24
----------


100%|██████████| 1563/1563 [19:01<00:00,  1.37it/s]


Loss: 1.1922 Acc: 0.7331
Epoch 4/24
----------


100%|██████████| 1563/1563 [19:01<00:00,  1.37it/s]


Loss: 1.0783 Acc: 0.7787
Epoch 5/24
----------


100%|██████████| 1563/1563 [18:59<00:00,  1.37it/s]


Loss: 0.9752 Acc: 0.8245
Unfroze layers after epoch 5
Epoch 6/24
----------


100%|██████████| 1563/1563 [18:37<00:00,  1.40it/s]


Loss: 0.9110 Acc: 0.8509
Epoch 7/24
----------


100%|██████████| 1563/1563 [18:38<00:00,  1.40it/s]


Loss: 0.8755 Acc: 0.8663
Epoch 8/24
----------


100%|██████████| 1563/1563 [18:39<00:00,  1.40it/s]


Loss: 0.8343 Acc: 0.8845
Epoch 9/24
----------


100%|██████████| 1563/1563 [18:38<00:00,  1.40it/s]


Loss: 0.8147 Acc: 0.8939
Epoch 10/24
----------


100%|██████████| 1563/1563 [18:34<00:00,  1.40it/s]


Loss: 0.9768 Acc: 0.8250
Unfroze layers after epoch 10
Epoch 11/24
----------


100%|██████████| 1563/1563 [18:27<00:00,  1.41it/s]


Loss: 0.9180 Acc: 0.8480
Epoch 12/24
----------


100%|██████████| 1563/1563 [18:27<00:00,  1.41it/s]


Loss: 0.8789 Acc: 0.8655
Epoch 13/24
----------


100%|██████████| 1563/1563 [18:26<00:00,  1.41it/s]


Loss: 0.8326 Acc: 0.8829
Epoch 14/24
----------


100%|██████████| 1563/1563 [18:26<00:00,  1.41it/s]


Loss: 0.7964 Acc: 0.8984
Epoch 15/24
----------


100%|██████████| 1563/1563 [18:27<00:00,  1.41it/s]


Loss: 0.7703 Acc: 0.9081
Unfroze layers after epoch 15
Epoch 16/24
----------


100%|██████████| 1563/1563 [18:27<00:00,  1.41it/s]


Loss: 0.7463 Acc: 0.9174
Epoch 17/24
----------


100%|██████████| 1563/1563 [18:27<00:00,  1.41it/s]


Loss: 0.7253 Acc: 0.9254
Epoch 18/24
----------


100%|██████████| 1563/1563 [18:30<00:00,  1.41it/s]


Loss: 0.7111 Acc: 0.9314
Epoch 19/24
----------


100%|██████████| 1563/1563 [18:27<00:00,  1.41it/s]


Loss: 0.7059 Acc: 0.9327
Epoch 20/24
----------


100%|██████████| 1563/1563 [18:24<00:00,  1.42it/s]


Loss: 0.8144 Acc: 0.8888
Unfroze layers after epoch 20
Epoch 21/24
----------


100%|██████████| 1563/1563 [18:26<00:00,  1.41it/s]


Loss: 0.8162 Acc: 0.8884
Epoch 22/24
----------


100%|██████████| 1563/1563 [18:27<00:00,  1.41it/s]


Loss: 0.7760 Acc: 0.9039
Epoch 23/24
----------


100%|██████████| 1563/1563 [18:26<00:00,  1.41it/s]


Loss: 0.7497 Acc: 0.9138
Epoch 24/24
----------


100%|██████████| 1563/1563 [18:28<00:00,  1.41it/s]


Loss: 0.7327 Acc: 0.9212
Training complete in 465m 35s
Best Training Acc: 0.9327
Model training complete and submission.zip created.


In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import zipfile
import os

def zip_multiple_files(input_paths, output_zip_path, compression_level=5):
    # Ensure compression level is within 1-9
    compression_level = max(1, min(compression_level, 1))

    # Create a zip file with the specified compression level
    compression = zipfile.ZIP_DEFLATED

    # Create the zip file
    with zipfile.ZipFile(output_zip_path, 'w', compression) as zipf:
        for input_path in input_paths:
            # Check if the file or directory exists
            if not os.path.exists(input_path):
                print(f"{input_path} does not exist.")
                continue

            # If it's a directory, recursively add files
            if os.path.isdir(input_path):
                for root, dirs, files in os.walk(input_path):
                    for file in files:
                        file_full_path = os.path.join(root, file)
                        zipf.write(file_full_path,
                                   os.path.relpath(file_full_path,
                                                   os.path.join(input_path, '..')))
            # If it's a single file, add it to the zip file
            else:
                zipf.write(input_path, os.path.basename(input_path))

    print(f"Successfully zipped files to {output_zip_path} with compression level {compression_level}")

# Example usage:
input_paths = ['/content/model.py', '/content/model.pth']  # List of files or directories to zip
output_zip_path = '/content/submission2.zip'  # Path to save the output zip file
compression_level = 5  # Compression level from 1 (fastest) to 9 (most compressed)
zip_multiple_files(input_paths, output_zip_path, compression_level)


In [None]:
!cp /content/submission2.zip /content/drive/MyDrive/ML/Rayan/Q2/submission

In [None]:
import random

def fill_random_array(size, min_value=0, max_value=100):
    return [random.randint(min_value, max_value) for _ in range(size)]


array = fill_random_array(10, 1, 50)
print(array)


[17, 10, 40, 47, 26, 31, 45, 21, 38, 12]
