In [1]:
import os
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"

In [2]:
# טעינת הנתונים מ-TFRecord והכנתם ל-PyTorch

In [3]:
import tensorflow as tf
import torch
import torchvision.transforms as transforms
import torchvision.models as models
import torch.optim as optim
import torch.nn as nn
import numpy as np
from torch.utils.data import Dataset, DataLoader
import os
import faiss
from PIL import Image
import matplotlib.pyplot as plt

#  נתיב לתיקיית Cars196
data_dir = r"C:\Users\revit\Documents\Data Learning\cars196"

# 🔹 רשימת קבצי ה-TFRecord
train_files = [os.path.join(data_dir, f"cars196-train.tfrecord-0000{i}-of-00008") for i in range(8)]
test_files = [os.path.join(data_dir, f"cars196-test.tfrecord-0000{i}-of-00008") for i in range(8)]

#  פונקציה לקריאת TFRecord
def parse_tfrecord(example):
    feature_description = {
        'image': tf.io.FixedLenFeature([], tf.string),
        'label': tf.io.FixedLenFeature([], tf.int64)
    }
    parsed_example = tf.io.parse_single_example(example, feature_description)
    image = tf.image.decode_jpeg(parsed_example['image'], channels=3)
    image = tf.image.resize(image, (224, 224))
    label = parsed_example['label']
    return image, label

def load_tfrecord_dataset(filenames):
    raw_dataset = tf.data.TFRecordDataset(filenames)
    dataset = raw_dataset.map(parse_tfrecord)
    return list(dataset)  # ממירים לרשימה לשימוש ב-PyTorch

#  טעינת ה-Train/Test מ-TFRecord
train_data = [(image.numpy().astype('uint8'), label.numpy()) for image, label in load_tfrecord_dataset(train_files)]
test_data = [(image.numpy().astype('uint8'), label.numpy()) for image, label in load_tfrecord_dataset(test_files)]

print(f" Loaded {len(train_data)} training images and {len(test_data)} test images.")


✅ Loaded 8144 training images and 8041 test images.


In [4]:
# יצירת PyTorch Dataset ו-DataLoader

In [6]:
#  מחלקת Dataset מותאמת ל-PyTorch
class Cars196Dataset(Dataset):
    def __init__(self, data, transform=None):
        self.data = data
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        image_np, label = self.data[idx]
        image = Image.fromarray(image_np)  # ממירים ל-PIL
        if self.transform:
            image = self.transform(image)
        return image, label

# 🔹 טרנספורמציות ל-ResNet50
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# 🔹 יצירת ה-Datasets וה-DataLoaders
batch_size = 32
train_dataset = Cars196Dataset(train_data, transform=transform)
test_dataset = Cars196Dataset(test_data, transform=transform)

train_loader = DataLoader(
    train_dataset, batch_size=64, shuffle=True, num_workers=0  # הסרנו persistent_workers
)
test_loader = DataLoader(
    test_dataset, batch_size=64, shuffle=False, num_workers=0  # הסרנו persistent_workers
)


In [7]:
# Fine-Tuning של ResNet50 על Cars196

In [8]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

#  טעינת ResNet50 עם התאמה ל-Cars196
model = models.resnet50(weights=models.ResNet50_Weights.DEFAULT)
num_features = model.fc.in_features
model.fc = nn.Linear(num_features, len(set(label for _, label in train_data)))  # מספר המחלקות
model = model.to(device)

#  פונקציית הפסד (Loss) ואופטימיזציה
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)

#  אימון הרשת
print(" Starting training...")
num_epochs = 5  
for epoch in range(num_epochs):
    model.train()
    running_loss, correct, total = 0.0, 0, 0

    print(f" Epoch {epoch+1}/{num_epochs} started.")  # 🛠 האם בכלל התחיל אימון?

    for batch_idx, (images, labels) in enumerate(train_loader):
        print(f"Processing Batch {batch_idx}/{len(train_loader)}...")  # 🛠 בדיקה לפני העברת הנתונים למודל

        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        correct += (predicted == labels).sum().item()
        total += labels.size(0)

        #  עדכון כל 10 באצ'ים (סטטוס ביניים)
        if batch_idx % 10 == 0:
            print(f" Epoch {epoch+1}/{num_epochs}, Batch {batch_idx}/{len(train_loader)}, Loss: {loss.item():.4f}")

    train_accuracy = correct / total
    print(f" Epoch {epoch+1}/{num_epochs} Completed! Loss: {running_loss/len(train_loader):.4f}, Accuracy: {train_accuracy:.4f}")

    #  הוספת Checkpoint שמירה בכל סוף Epoch
    torch.save(model.state_dict(), f"checkpoint_epoch_{epoch+1}.pth")
    print(f" Model checkpoint saved at epoch {epoch+1}")

print(" Training Completed!")


✅ Starting training...
✅ Epoch 1/5 started.
Processing Batch 0/128...
✅ Epoch 1/5, Batch 0/128, Loss: 5.2937
Processing Batch 1/128...
Processing Batch 2/128...
Processing Batch 3/128...
Processing Batch 4/128...
Processing Batch 5/128...
Processing Batch 6/128...
Processing Batch 7/128...
Processing Batch 8/128...
Processing Batch 9/128...
Processing Batch 10/128...
✅ Epoch 1/5, Batch 10/128, Loss: 5.2638
Processing Batch 11/128...
Processing Batch 12/128...
Processing Batch 13/128...
Processing Batch 14/128...
Processing Batch 15/128...
Processing Batch 16/128...
Processing Batch 17/128...
Processing Batch 18/128...
Processing Batch 19/128...
Processing Batch 20/128...
✅ Epoch 1/5, Batch 20/128, Loss: 5.2624
Processing Batch 21/128...
Processing Batch 22/128...
Processing Batch 23/128...
Processing Batch 24/128...
Processing Batch 25/128...
Processing Batch 26/128...
Processing Batch 27/128...
Processing Batch 28/128...
Processing Batch 29/128...
Processing Batch 30/128...
✅ Epoch 1/

In [9]:
# טעינת המודל המאומן והכנת FAISS

In [12]:
last_checkpoint = "checkpoint_epoch_5.pth"
model.load_state_dict(torch.load(os.path.join("C:\\Users\\revit\\Documents\\Data Learning\\Project2", last_checkpoint), map_location=device))
print(" Model loaded from last checkpoint (Epoch 5).")


  model.load_state_dict(torch.load(os.path.join("C:\\Users\\revit\\Documents\\Data Learning\\Project2", last_checkpoint), map_location=device))


✅ Model loaded from last checkpoint (Epoch 5).


In [None]:
#  חיפוש תמונות דומות והערכת איכות

In [13]:
# הסרת שכבת סיווג וחילות מאפפייניפ
#  הסרת שכבת הסיווג
model = torch.nn.Sequential(*list(model.children())[:-1])  # משאירים רק את ה-Feature Extractor
model.eval()
print(" Classification layer removed. Model ready for feature extraction.")


✅ Classification layer removed. Model ready for feature extraction.


In [14]:
# פונקציה לחילוץ מאפיינים
def extract_features(image):
    image = transform(image).unsqueeze(0).to(device)  # המרה לטנסור והוספת Batch Dimension
    with torch.no_grad():
        features = model(image)  # הרצת המודל
    return features.squeeze().cpu().numpy()  # המרה ל-NumPy


In [16]:
#  שלב בדיקת הביצועים על ה-Test Set
model.eval()  # מעביר את המודל למצב הערכה (Evaluation)
correct, total = 0, 0

with torch.no_grad():  # אין צורך לחשב גרדיאנטים בבדיקה
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)  # בוחרים את המחלקה עם הסיכוי הגבוה ביותר
        correct += (predicted == labels).sum().item()
        total += labels.size(0)

test_accuracy = correct / total  # חישוב הדיוק הסופי
print(f" Test Accuracy: {test_accuracy:.4f}")


✅ Test Accuracy: 0.0313


In [15]:
# יצירת אינדקס FAISS לכל התמונות ב-Train
import faiss
from PIL import Image
import numpy as np

index = faiss.IndexFlatL2(2048)  # FAISS Index - חיפוש לפי L2 Distance
all_features, image_data_list = [], []

for image_np, label in train_data:
    pil_image = Image.fromarray(image_np)  # המרת NumPy ל-PIL
    feature_vector = extract_features(pil_image)  # חילוץ מאפיינים
    all_features.append(feature_vector)
    image_data_list.append((pil_image, label))  # שמירת התמונה והתגית שלה

#  נורמליזציה ושמירת וקטורים באינדקס FAISS
all_features = np.array(all_features).astype('float32')
faiss.normalize_L2(all_features)
index.add(all_features)

print(f" FAISS index built with {len(all_features)} images using fine-tuned model.")



KeyboardInterrupt



In [None]:
# חיפוש תמונות דומות (Image Retrieval)
#  בחירת תמונה לבדיקה
query_image_np, query_label = test_data[0]
query_pil_image = Image.fromarray(query_image_np)

#  חיפוש תמונות דומות ב-FAISS
def search_similar(image, index, k=5):
    query_vector = extract_features(image).reshape(1, -1).astype('float32')
    faiss.normalize_L2(query_vector)
    distances, indices = index.search(query_vector, k)
    return indices[0], distances

similar_images, distances = search_similar(query_pil_image, index, k=5)

print(" Similar images found at indices:", similar_images)


In [None]:
# הצגת התוצאות
import matplotlib.pyplot as plt

def show_results(query_image, similar_indices, distances):
    fig, ax = plt.subplots(1, 6, figsize=(15, 5))
    ax[0].imshow(query_image)
    ax[0].set_title("Query Image")
    ax[0].axis("off")

    for i, (idx, dist) in enumerate(zip(similar_indices, distances[0])):
        similar_pil_image, _ = image_data_list[idx]
        ax[i+1].imshow(similar_pil_image)
        ax[i+1].set_title(f"Match {i+1} (Score: {dist:.2f})")
        ax[i+1].axis("off")

    plt.show()

show_results(query_pil_image, similar_images, distances)
