In [78]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Dataset, random_split
from torchvision import models
import numpy as np
import os
from PIL import Image




Epoch [1/15] - Feature extraction complete.
Epoch [2/15] - Feature extraction complete.
Epoch [3/15] - Feature extraction complete.
Epoch [4/15] - Feature extraction complete.
Epoch [5/15] - Feature extraction complete.
Epoch [6/15] - Feature extraction complete.
Epoch [7/15] - Feature extraction complete.
Epoch [8/15] - Feature extraction complete.
Epoch [9/15] - Feature extraction complete.
Epoch [10/15] - Feature extraction complete.
Epoch [11/15] - Feature extraction complete.
Epoch [12/15] - Feature extraction complete.
Epoch [13/15] - Feature extraction complete.
Epoch [14/15] - Feature extraction complete.
Epoch [15/15] - Feature extraction complete.
Training complete!
Most similar image: KADIAN YOGESH1.png (Similarity Score: 1.0000)


  print(f"Most similar image: {best_match} (Similarity Score: {float(best_score):.4f})") # Convert NumPy scalar to float


'KADIAN YOGESH1.png'

In [None]:
# Hyperparameters
BATCH_SIZE = 10
LEARNING_RATE = 0.0001  # Lower learning rate for fine-tuning
EPOCHS = 15
IMAGE_SIZE = (128, 128)  # Increased image size for better feature extraction
SPLIT_RATIO = 0.8  # 80% training, 20% testing

def normalize_features(features):
    return features / torch.norm(features, dim=1, keepdim=True)

# Define transformations without Data Augmentation
transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),  # Convert to grayscale
    transforms.Resize(IMAGE_SIZE),  # Resize to 128x128 for better details
    transforms.ToTensor()
])


In [None]:
# Custom Dataset to Load Images from a Single Folder
class ImageDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.image_paths = [os.path.join(root_dir, img) for img in os.listdir(root_dir) if img.endswith(('png', 'jpg', 'jpeg'))]
        self.image_names = [img for img in os.listdir(root_dir) if img.endswith(('png', 'jpg', 'jpeg'))]

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        image = Image.open(img_path)
        if self.transform:
            image = self.transform(image)
        return image, self.image_names[idx]



In [81]:
# Load dataset and split into train and test sets
dataset = ImageDataset(root_dir="/content/drive/MyDrive/Appynitty_Backup/Home dept./CRIMINAL(Red Alart)", transform=transform)
train_size = int(SPLIT_RATIO * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)



In [82]:
# Define CNN Model using ResNet50 for Feature Extraction
class FeatureExtractor(nn.Module):
    def __init__(self):
        super(FeatureExtractor, self).__init__()
        self.resnet = models.resnet50(pretrained=True)
        self.resnet.conv1 = nn.Conv2d(1, 64, kernel_size=3, stride=1, padding=1, bias=False)  # Adjust for 1-channel input
        self.resnet.fc = nn.Linear(self.resnet.fc.in_features, 512)  # New fully connected layer for feature embedding
        self.batch_norm = nn.BatchNorm1d(512)  # Normalization instead of PCA

        # Enable fine-tuning for entire model
        for param in self.resnet.parameters():
            param.requires_grad = True

    def forward(self, x):
        x = self.resnet(x)
        # Apply Batch Normalization only during training
        if self.training:  # Check if model is in training mode
            x = self.batch_norm(x)
        x = normalize_features(x)  # Normalize before similarity matching
        return x



In [95]:
# Initialize model, loss, and optimizer
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = FeatureExtractor().to(device)
criterion = nn.CosineEmbeddingLoss()  # Loss function for similarity learning
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

# Training loop
for epoch in range(EPOCHS):
    for batch_idx, (data, _) in enumerate(train_loader):
        data = data.to(device)
        optimizer.zero_grad()
        embeddings = model(data)
        loss = torch.tensor(0.0, requires_grad=True)  # Placeholder loss
        loss.backward()
        optimizer.step()

    print(f"Epoch [{epoch+1}/{EPOCHS}] - Feature extraction complete.")

print("Training complete!")



Epoch [1/15] - Feature extraction complete.
Epoch [2/15] - Feature extraction complete.
Epoch [3/15] - Feature extraction complete.
Epoch [4/15] - Feature extraction complete.
Epoch [5/15] - Feature extraction complete.
Epoch [6/15] - Feature extraction complete.
Epoch [7/15] - Feature extraction complete.
Epoch [8/15] - Feature extraction complete.
Epoch [9/15] - Feature extraction complete.
Epoch [10/15] - Feature extraction complete.
Epoch [11/15] - Feature extraction complete.
Epoch [12/15] - Feature extraction complete.
Epoch [13/15] - Feature extraction complete.
Epoch [14/15] - Feature extraction complete.
Epoch [15/15] - Feature extraction complete.
Training complete!


In [96]:
# Store extracted features for dataset
feature_dict = {}
for idx in range(len(dataset)):
    img, img_name = dataset[idx]
    img = img.unsqueeze(0).to(device)
    with torch.no_grad():
        # Set model to evaluation mode for feature extraction
        model.eval()
        feature_dict[img_name] = model(img).cpu().numpy()



In [103]:
# Function to load and test a single image and find the most similar image
def test_single_image(image_path, model, feature_dict):
    model.eval()
    image = Image.open(image_path)
    image = transform(image).unsqueeze(0).to(device)
    with torch.no_grad():
        image_feature = model(image).cpu().numpy()

    # Compute cosine similarity with stored dataset features
    best_match = None
    best_score = -1
    for img_name, stored_feature in feature_dict.items():
        similarity = np.dot(image_feature, stored_feature.T) / (np.linalg.norm(image_feature) * np.linalg.norm(stored_feature))
        if similarity > best_score:
            best_score = similarity
            best_match = img_name

    # Convert best_score to a float before formatting
    print(f"Most similar image: {best_match} (Similarity Score: {float(best_score):.4f})") # Convert NumPy scalar to float
    return best_match

# Example usage:
test_single_image("/content/Chandrim.jpg", model, feature_dict)

Most similar image: PUJARI, HEMANT1.png (Similarity Score: 0.8111)


  print(f"Most similar image: {best_match} (Similarity Score: {float(best_score):.4f})") # Convert NumPy scalar to float


'PUJARI, HEMANT1.png'