<h1 style="color:rgb(17, 116, 155);text-align:left;font-size:250%;font-family:verdana;text-decoration:underline;"> 
    Advanced topics - Final Project - Part 3: End-to-End CNN</h1>

## <u>אופטימזציה: SGD, אוגמנטציה משופרת</u>

## <u>Imports</u>

In [3]:
import os
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"

from PIL import Image, ImageFilter
import tensorflow as tf
import torch
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
import torch.nn as nn
import torch.optim as optim
import time
import torch.nn.functional as F


In [4]:
# קביעת התקן (GPU אם קיים)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


## <u>Load and Preprocess Dataset</u>

In [6]:
# שלב 1: טעינת הנתונים (TFRecord) והכנה לפורמט PyTorch
print("Loading Cars196 dataset from TFRecord files...")

data_dir = r"C:\Users\yifat\Data Science\נושאים מתקדמים\Final_Project_Shay\cars196"

# יצירת רשימת קבצי ה-TFRecord
train_files = [os.path.join(data_dir, f"cars196-train.tfrecord-0000{i}-of-00008") for i in range(8)]
test_files = [os.path.join(data_dir, f"cars196-test.tfrecord-0000{i}-of-00008") for i in range(8)]

# פונקציה לקריאת TFRecord
def parse_tfrecord(example):
    feature_description = {
        'image': tf.io.FixedLenFeature([], tf.string),
        'label': tf.io.FixedLenFeature([], tf.int64)
    }
    parsed_example = tf.io.parse_single_example(example, feature_description)
    image = tf.image.decode_jpeg(parsed_example['image'], channels=3)
    image = tf.image.resize(image, (224, 224))
    label = parsed_example['label']
    return image, label

def load_tfrecord_dataset(filenames):
    raw_dataset = tf.data.TFRecordDataset(filenames)
    dataset = raw_dataset.map(parse_tfrecord)
    return list(dataset)  # ממירים לרשימה לשימוש ב-PyTorch

# טעינת ה-Train/Test מ-TFRecord
train_data = [(image.numpy().astype('uint8'), label.numpy()) for image, label in load_tfrecord_dataset(train_files)]
test_data = [(image.numpy().astype('uint8'), label.numpy()) for image, label in load_tfrecord_dataset(test_files)]

print(f"Loaded {len(train_data)} training images and {len(test_data)} test images.")


Loading Cars196 dataset from TFRecord files...
Loaded 8144 training images and 8041 test images.


In [7]:
# מחלקת Dataset לטעינת הנתונים
class Cars196Dataset(Dataset):
    def __init__(self, data, transform=None):
        self.data = data
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        image_np, label = self.data[idx]
        image = Image.fromarray(image_np)
        if self.transform:
            image = self.transform(image)
        return image, label

In [8]:
# **אוגמנטציה משופרת עבור סט האימון**
train_transform = transforms.Compose([
    transforms.Resize((224, 224)), 
    transforms.RandomHorizontalFlip(p=0.5),  # היפוך אופקי
    transforms.RandomRotation(15),  # סיבוב עד 15 מעלות
    transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3),  # שינוי צבעים
    transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)),  # תזוזה אקראית
    transforms.RandomPerspective(distortion_scale=0.2, p=0.5),  # שינוי פרספקטיבה
    transforms.RandomResizedCrop(224, scale=(0.8, 1.0)),  # חיתוך עם שינוי קנה מידה
    transforms.GaussianBlur(kernel_size=(5, 5)),  # טשטוש אקראי
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# סט הבדיקה ללא אוגמנטציה
test_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])


In [9]:
batch_size = 32
train_dataset = Cars196Dataset(train_data, transform=train_transform)
test_dataset = Cars196Dataset(test_data, transform=test_transform)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=0)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=0)

print(f"Train and Test Dataloaders created with batch size {batch_size}.")
print(f"Training samples: {len(train_dataset)}, Testing samples: {len(test_dataset)}")


Train and Test Dataloaders created with batch size 32.
Training samples: 8144, Testing samples: 8041


## <u>Define and Train a CNN Model</u>

In [11]:
# Define a CNN model for image classification
class CNNClassifier(nn.Module):
    def __init__(self, num_classes=196):
        super(CNNClassifier, self).__init__()
        
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
        self.conv3 = nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1)
        self.conv4 = nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1)
        
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.dropout = nn.Dropout(0.5)

        self.fc1 = nn.Linear(512 * 14 * 14, 1024)
        self.fc2 = nn.Linear(1024, 512)
        self.fc3 = nn.Linear(512, num_classes)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        x = self.pool(F.relu(self.conv4(x)))

        x = torch.flatten(x, 1)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = F.relu(self.fc2(x))
        x = self.fc3(x)

        return x


In [12]:
# יצירת המודל
model = CNNClassifier(num_classes=196).to(device)

# הגדרת פונקציית הפסד ואופטימיזציה
criterion = nn.CrossEntropyLoss()
#optimizer = optim.Adam(model.parameters(), lr=0.0005, weight_decay=1e-4)
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9, weight_decay=1e-4)

In [13]:
import gc
torch.cuda.empty_cache()  # אם יש GPU 
gc.collect()  # ניקוי זיכרון בפייתון


533

In [14]:
# פונקציות אימון ובדיקה
def train_epoch(model, train_loader, criterion, optimizer):
    model.train()
    model.to(device)
    running_loss = 0
    start_time = time.time()
    
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
    
    end_time = time.time()
    running_loss /= len(train_loader)
    return running_loss, end_time - start_time

# פונקציה לבדיקה של המודל
def test_model(model, test_loader, criterion):
    with torch.no_grad():
        model.eval()
        model.to(device)
        running_loss = 0
        total_predictions = 0
        correct_predictions = 0
        
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            
            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total_predictions += labels.size(0)
            correct_predictions += (predicted == labels).sum().item()
        
        running_loss /= len(test_loader)
        accuracy = (correct_predictions / total_predictions) * 100.0
        return running_loss, accuracy


In [15]:
import collections
labels_count = collections.Counter([label for _, label in train_data])
print(labels_count)


Counter({118: 68, 78: 49, 160: 48, 166: 48, 55: 47, 143: 47, 190: 46, 161: 46, 193: 46, 181: 46, 94: 46, 97: 46, 42: 46, 19: 46, 170: 46, 111: 46, 106: 45, 146: 45, 5: 45, 33: 45, 0: 45, 120: 45, 23: 45, 43: 45, 52: 45, 80: 45, 122: 45, 7: 45, 152: 45, 114: 45, 113: 45, 72: 45, 178: 45, 58: 45, 81: 45, 71: 45, 108: 45, 64: 45, 164: 45, 147: 45, 124: 44, 171: 44, 163: 44, 144: 44, 86: 44, 93: 44, 73: 44, 131: 44, 150: 44, 107: 44, 62: 44, 15: 44, 136: 44, 169: 44, 176: 44, 75: 44, 187: 44, 172: 44, 88: 44, 30: 44, 84: 44, 74: 44, 45: 44, 186: 44, 159: 44, 104: 44, 57: 44, 194: 43, 2: 43, 50: 43, 145: 43, 69: 43, 13: 43, 119: 43, 153: 43, 17: 43, 112: 43, 179: 43, 14: 43, 154: 43, 60: 43, 85: 43, 79: 43, 191: 43, 103: 43, 47: 43, 138: 43, 116: 43, 189: 43, 148: 43, 28: 43, 109: 43, 21: 43, 182: 42, 130: 42, 83: 42, 20: 42, 3: 42, 100: 42, 105: 42, 31: 42, 117: 42, 96: 42, 32: 42, 125: 42, 49: 42, 177: 42, 139: 42, 167: 42, 65: 42, 192: 42, 134: 42, 132: 42, 29: 42, 110: 42, 18: 41, 89: 4

In [None]:
# אימון הרשת
num_epochs = 20
for epoch in range(num_epochs):
    train_loss, train_time = train_epoch(model, train_loader, criterion, optimizer)
    test_loss, test_acc = test_model(model, test_loader, criterion)

    print(f"Epoch {epoch+1}/{num_epochs}: Train Loss = {train_loss:.6f}, Time = {train_time:.2f}s")
    print(f"Test Loss = {test_loss:.6f}, Accuracy = {test_acc:.2f}%")

# שמירת המודל
torch.save(model.state_dict(), "cars196_cnn_trained.pth")
print("End-to-End CNN model training completed and saved!")


Epoch 1/20: Train Loss = 5.279534, Time = 153.27s
Test Loss = 5.277839, Accuracy = 0.53%
Epoch 2/20: Train Loss = 5.278195, Time = 148.84s
Test Loss = 5.277124, Accuracy = 0.53%
Epoch 3/20: Train Loss = 5.278186, Time = 146.58s
Test Loss = 5.276682, Accuracy = 0.71%
Epoch 4/20: Train Loss = 5.277505, Time = 146.84s
Test Loss = 5.276024, Accuracy = 0.85%
Epoch 5/20: Train Loss = 5.277150, Time = 145.14s
Test Loss = 5.275410, Accuracy = 0.85%
Epoch 6/20: Train Loss = 5.275642, Time = 144.90s
Test Loss = 5.274494, Accuracy = 0.85%
Epoch 7/20: Train Loss = 5.275830, Time = 153.13s
Test Loss = 5.273646, Accuracy = 0.85%
Epoch 8/20: Train Loss = 5.275751, Time = 152.63s
Test Loss = 5.272994, Accuracy = 0.85%
Epoch 9/20: Train Loss = 5.274849, Time = 153.74s
Test Loss = 5.272412, Accuracy = 0.85%
Epoch 10/20: Train Loss = 5.274338, Time = 151.70s
Test Loss = 5.271716, Accuracy = 0.85%
Epoch 11/20: Train Loss = 5.273350, Time = 152.70s
Test Loss = 5.270616, Accuracy = 0.85%
Epoch 12/20: Train 