<h1 style="color:rgb(17, 116, 155);text-align:left;font-size:250%;font-family:verdana;text-decoration:underline;"> 
    Advanced topics - Final Project - Part 3: End-to-End CNN</h1>

## <u>פונקציית הפסד: שימוש ב-Focal Loss, שינוי אופטימזציה ואוגמנטציה משופרת</u>

## <u>Imports</u>

In [3]:
import os
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"

from PIL import Image, ImageFilter
import tensorflow as tf
import torch
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
import torch.nn as nn
import torch.optim as optim
import time
import torch.nn.functional as F
import numpy as np
from sklearn.utils.class_weight import compute_class_weight
import gc

In [4]:
# קביעת התקן (GPU אם קיים)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


## <u>Load and Preprocess Dataset</u>

In [6]:
# טעינת הנתונים (TFRecord) והכנה לפורמט PyTorch
print("Loading Cars196 dataset from TFRecord files...")

data_dir = r"C:\Users\yifat\Data Science\נושאים מתקדמים\Final_Project_Shay\cars196"

# יצירת רשימת קבצי ה-TFRecord
train_files = [os.path.join(data_dir, f"cars196-train.tfrecord-0000{i}-of-00008") for i in range(8)]
test_files = [os.path.join(data_dir, f"cars196-test.tfrecord-0000{i}-of-00008") for i in range(8)]

# פונקציה לקריאת TFRecord
def parse_tfrecord(example):
    feature_description = {
        'image': tf.io.FixedLenFeature([], tf.string),
        'label': tf.io.FixedLenFeature([], tf.int64)
    }
    parsed_example = tf.io.parse_single_example(example, feature_description)
    image = tf.image.decode_jpeg(parsed_example['image'], channels=3)
    image = tf.image.resize(image, (224, 224))
    label = parsed_example['label']
    return image, label

def load_tfrecord_dataset(filenames):
    raw_dataset = tf.data.TFRecordDataset(filenames)
    dataset = raw_dataset.map(parse_tfrecord)
    return list(dataset)  # ממירים לרשימה לשימוש ב-PyTorch

# טעינת ה-Train/Test מ-TFRecord
train_data = [(image.numpy().astype('uint8'), label.numpy()) for image, label in load_tfrecord_dataset(train_files)]
test_data = [(image.numpy().astype('uint8'), label.numpy()) for image, label in load_tfrecord_dataset(test_files)]

print(f"Loaded {len(train_data)} training images and {len(test_data)} test images.")


Loading Cars196 dataset from TFRecord files...
Loaded 8144 training images and 8041 test images.


In [7]:
# מחלקת Dataset לטעינת הנתונים
class Cars196Dataset(Dataset):
    def __init__(self, data, transform=None):
        self.data = data
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        image_np, label = self.data[idx]
        image = Image.fromarray(image_np)
        if self.transform:
            image = self.transform(image)
        return image, label

In [8]:
# ניסיון עם אוגמנטציה משופרת
#########################

In [9]:
# **אוגמנטציה משופרת עבור סט האימון**
# **אוגמנטציה משופרת עבור סט האימון**
train_transform = transforms.Compose([
    transforms.Resize((224, 224)), 
    transforms.RandomHorizontalFlip(p=0.5),  # היפוך אופקי
    transforms.RandomRotation(15),  # סיבוב עד 15 מעלות
    transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4, hue=0.1),  # שינוי צבעים
    transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)),  # תזוזה אקראית
    transforms.RandomPerspective(distortion_scale=0.2, p=0.5),  # שינוי פרספקטיבה
    transforms.RandomResizedCrop(224, scale=(0.6, 1.0)),  # חיתוך עם שינוי קנה מידה
    
    transforms.ToTensor(),  # יש להמיר קודם ל-Tensor

    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),  # נורמליזציה
    
    transforms.RandomErasing(p=0.3, scale=(0.02, 0.2), ratio=(0.3, 3.3)),  # יש להפעיל אחרי ההמרה ל-Tensor
])


# סט הבדיקה ללא אוגמנטציה
test_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])


In [10]:
batch_size = 32
train_dataset = Cars196Dataset(train_data, transform=train_transform)
test_dataset = Cars196Dataset(test_data, transform=test_transform)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=0)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=0)

print(f"Train and Test Dataloaders created with batch size {batch_size}.")
print(f"Training samples: {len(train_dataset)}, Testing samples: {len(test_dataset)}")


Train and Test Dataloaders created with batch size 32.
Training samples: 8144, Testing samples: 8041


In [11]:
# חישוב משקלים למחלקות לטיפול בחוסר איזון
labels_list = [label for _, label in train_data]
class_weights = compute_class_weight(class_weight="balanced", classes=np.unique(labels_list), y=labels_list)
class_weights = torch.tensor(class_weights, dtype=torch.float).to(device)

# פונקציית הפסד משופרת: שימוש ב-Focal Loss
class FocalLoss(nn.Module):
    def __init__(self, alpha=class_weights, gamma=2):
        super(FocalLoss, self).__init__()
        self.alpha = alpha
        self.gamma = gamma
        self.ce_loss = nn.CrossEntropyLoss(weight=self.alpha)

    def forward(self, inputs, targets):
        ce_loss = self.ce_loss(inputs, targets)
        p_t = torch.exp(-ce_loss)
        focal_loss = self.alpha[targets] * (1 - p_t) ** self.gamma * ce_loss
        return focal_loss.mean()


## <u>Define and Train a CNN Model</u>

In [13]:
# Define a CNN model for image classification
class CNNClassifier(nn.Module):
    def __init__(self, num_classes=196):
        super(CNNClassifier, self).__init__()
        
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
        self.conv3 = nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1)
        self.conv4 = nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1)
        
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.dropout = nn.Dropout(0.5)

        self.fc1 = nn.Linear(512 * 14 * 14, 1024)
        self.fc2 = nn.Linear(1024, 512)
        self.fc3 = nn.Linear(512, num_classes)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        x = self.pool(F.relu(self.conv4(x)))

        x = torch.flatten(x, 1)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = F.relu(self.fc2(x))
        x = self.fc3(x)

        return x


In [14]:
# יצירת המודל
model = CNNClassifier(num_classes=196).to(device)

# הגדרת פונקציית הפסד ואופטימיזציה
#criterion = nn.CrossEntropyLoss()
#optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9, weight_decay=1e-4)

###############################
# ניסיון 2 
###############################
# הגדרת פונקציית הפסד ואופטימיזציה
criterion = FocalLoss()  # שימוש ב-Focal Loss
optimizer = optim.AdamW(model.parameters(), lr=0.0001, weight_decay=1e-4)  # שינוי מ-SGD ל-AdamW
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)  # הפחתת הלמידה כל 5 אפוקים


In [15]:
torch.cuda.empty_cache()  # אם יש GPU 
gc.collect()  # ניקוי זיכרון בפייתון


503

In [16]:
# פונקציות אימון ובדיקה
def train_epoch(model, train_loader, criterion, optimizer):
    model.train()
    model.to(device)
    running_loss = 0
    start_time = time.time()
    
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    scheduler.step()  # עדכון הלרנינג רייט בסוף כל אפוק
    end_time = time.time()
    running_loss /= len(train_loader)
    return running_loss, end_time - start_time
    

# פונקציה לבדיקה של המודל
def test_model(model, test_loader, criterion):
    with torch.no_grad():
        model.eval()
        model.to(device)
        running_loss = 0
        total_predictions = 0
        correct_predictions = 0
        
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            
            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total_predictions += labels.size(0)
            correct_predictions += (predicted == labels).sum().item()
        
        running_loss /= len(test_loader)
        accuracy = (correct_predictions / total_predictions)*100.0
        return running_loss, accuracy


In [17]:
# אימון הרשת
num_epochs = 20
for epoch in range(num_epochs):
    train_loss, train_time = train_epoch(model, train_loader, criterion, optimizer)
    test_loss, test_acc = test_model(model, test_loader, criterion)

    print(f"Epoch {epoch+1}/{num_epochs}: Train Loss = {train_loss:.6f}, Time = {train_time:.2f}s")
    print(f"Test Loss = {test_loss:.6f}, Accuracy = {test_acc:.2f}%")

# שמירת המודל
torch.save(model.state_dict(), "cars196_cnn_trained.pth")
print("End-to-End CNN model training completed and saved!")


Epoch 1/20: Train Loss = 5.221171, Time = 223.10s
Test Loss = 5.160427, Accuracy = 0.67%
Epoch 2/20: Train Loss = 5.166117, Time = 221.67s
Test Loss = 5.109072, Accuracy = 1.08%
Epoch 3/20: Train Loss = 5.132992, Time = 221.99s
Test Loss = 5.084668, Accuracy = 0.96%
Epoch 4/20: Train Loss = 5.103366, Time = 220.03s
Test Loss = 5.053532, Accuracy = 1.55%
Epoch 5/20: Train Loss = 5.078332, Time = 220.57s
Test Loss = 5.030348, Accuracy = 2.16%
Epoch 6/20: Train Loss = 5.034039, Time = 217.20s
Test Loss = 5.004449, Accuracy = 2.46%
Epoch 7/20: Train Loss = 5.014855, Time = 220.54s
Test Loss = 4.975571, Accuracy = 2.74%
Epoch 8/20: Train Loss = 4.987268, Time = 220.58s
Test Loss = 4.967661, Accuracy = 2.89%
Epoch 9/20: Train Loss = 4.970644, Time = 220.48s
Test Loss = 4.926007, Accuracy = 3.23%


KeyboardInterrupt: 