In [8]:
!pip install torch torchvision transformers requests

Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m23.7/23.7 MB[0m [31m25.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting nvidia-cuda-runtime-cu12==12.1.105 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m823.6/823.6 kB[0m [31m39.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting nvidia-cuda-cupti-cu12==12.1.105 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m14.1/14.1 MB[0m [31m49.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting nvidia-cudnn-cu12==8.9.2.26 (from torch)
  Downloading nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
from transformers import BeitForImageClassification, BeitConfig, AdamW
import torch
from torch.utils.data import DataLoader
from torchvision import transforms, datasets
from torch.cuda.amp import GradScaler, autocast

# Path to your training data
train_data_path = '/content/drive/MyDrive/Project_IR/Dataset_Indian-monuments/images/train'

# Setup data transforms
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Load the dataset
train_dataset = datasets.ImageFolder(root=train_data_path, transform=transform)

# Setup the data loader with a reduced batch size
batch_size = 16
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

# Initialize BEiT model
num_labels = len(train_dataset.classes)
config = BeitConfig.from_pretrained('microsoft/beit-base-patch16-224', num_labels=num_labels)
model = BeitForImageClassification.from_pretrained(
    'microsoft/beit-base-patch16-224',
    config=config,
    ignore_mismatched_sizes=True
)

# Prepare the model for training
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)
model.train()

# Setup optimizer and gradient scaler for mixed precision training
optimizer = AdamW(model.parameters(), lr=5e-5)
scaler = GradScaler()

# Define the number of steps for gradient accumulation
accumulation_steps = 4

# Calculate total number of batches
total_batches = len(train_loader)

# Training loop with mixed precision and gradient accumulation
for epoch in range(1):  # Adjust the number of epochs based on your needs
    model.train()
    optimizer.zero_grad()

    for step, (images, labels) in enumerate(train_loader):
        images = images.to(device)
        labels = labels.to(device)

        with autocast():  # Mixed precision
            outputs = model(images, labels=labels)
            loss = outputs.loss / accumulation_steps  # Scale the loss for gradient accumulation

        scaler.scale(loss).backward()  # Scale the loss and backprop

        # Perform optimization step every `accumulation_steps`
        if (step + 1) % accumulation_steps == 0:
            scaler.step(optimizer)
            scaler.update()
            optimizer.zero_grad()

        if step % accumulation_steps == 0:
            print(f"Epoch {epoch}, Step {step}/{total_batches}, Loss: {loss.item()}")

        # Calculate percentage of completion
        percent_complete = (step + 1) * batch_size / len(train_dataset) * 100
        print(f"Epoch {epoch}, Step {step}/{total_batches}, Percentage Complete: {percent_complete:.2f}%")

        # Clear unused memory
        torch.cuda.empty_cache()

# Save the fine-tuned model
model.save_pretrained('/content/drive/MyDrive/Project_IR/fine_tuned_model')


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
Some weights of BeitForImageClassification were not initialized from the model checkpoint at microsoft/beit-base-patch16-224 and are newly initialized because the shapes did not match:
- classifier.weight: found shape torch.Size([1000, 768]) in the checkpoint and torch.Size([24, 768]) in the model instantiated
- classifier.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([24]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 0, Step 0/230, Loss: 0.8230054378509521
Epoch 0, Step 0/230, Percentage Complete: 0.44%
Epoch 0, Step 1/230, Percentage Complete: 0.87%
Epoch 0, Step 2/230, Percentage Complete: 1.31%
Epoch 0, Step 3/230, Percentage Complete: 1.74%
Epoch 0, Step 4/230, Loss: 0.8240101933479309
Epoch 0, Step 4/230, Percentage Complete: 2.18%
Epoch 0, Step 5/230, Percentage Complete: 2.62%
Epoch 0, Step 6/230, Percentage Complete: 3.05%
Epoch 0, Step 7/230, Percentage Complete: 3.49%
Epoch 0, Step 8/230, Loss: 0.8207409977912903
Epoch 0, Step 8/230, Percentage Complete: 3.92%
Epoch 0, Step 9/230, Percentage Complete: 4.36%
Epoch 0, Step 10/230, Percentage Complete: 4.80%
Epoch 0, Step 11/230, Percentage Complete: 5.23%
Epoch 0, Step 12/230, Loss: 0.7419025897979736
Epoch 0, Step 12/230, Percentage Complete: 5.67%
Epoch 0, Step 13/230, Percentage Complete: 6.11%
Epoch 0, Step 14/230, Percentage Complete: 6.54%
Epoch 0, Step 15/230, Percentage Complete: 6.98%
Epoch 0, Step 16/230, Loss: 0.73409986495

In [2]:
from transformers import BeitForImageClassification, BeitFeatureExtractor
from PIL import Image
import torch
from torchvision.transforms import Compose, Resize, ToTensor, Normalize
import requests
from io import BytesIO

# Class names mapping
class_names = [
    "Ajanta Caves", "Charar-E- Sharif", "Chhota Imambara", "Ellora Caves", "Fatehpur Sikiri",
    "Hawa Mahal", "Gateway of India", "Khajuraho", "Sun Temple Konark", "Alai Darwaza",
    "Alai Minar", "Basilica of Bom Jesus", "Charminar", "Golden Temple", "Iron Pillar",
    "Jamali Kamali Tomb", "Lotus Temple", "Mysore Palace", "Qutub Minar", "Taj Mahal",
    "Tanjavur Temple", "Victoria Memorial"
]

# Function to predict the class of an image from a URL
def predict_image_class_from_url(image_url):
    # Load the fine-tuned model and the feature extractor
    model_path = '/content/drive/MyDrive/Project_IR/fine_tuned_model'
    model = BeitForImageClassification.from_pretrained(model_path)
    feature_extractor = BeitFeatureExtractor.from_pretrained('microsoft/beit-base-patch16-224')

    # Ensure model is in evaluation mode
    model.eval()

    # Device configuration
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)

    # Use requests to download the image
    response = requests.get(image_url)
    image = Image.open(BytesIO(response.content)).convert("RGB")

    # Preprocess the image
    transform = Compose([
        Resize((224, 224)),
        ToTensor(),
        Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])
    image = transform(image).unsqueeze(0)  # Add batch dimension

    # Move image to the same device as the model
    image = image.to(device)

    # Perform inference
    with torch.no_grad():
        outputs = model(image)
        preds = outputs.logits.softmax(dim=-1)
        predicted_index = preds.argmax(1).item()

    # Translate the predicted index to class name using the mapping
    predicted_class = class_names[predicted_index]
    return f"Predicted class for the image: {predicted_class}"

# Example usage
# Replace the URL with the actual URL of the image you want to classify
image_url = input("ENTER HTTPS LINK: ")
predicted_class = predict_image_class_from_url(image_url)
print(predicted_class)


ENTER HTTPS LINK: https://cdn.britannica.com/70/153470-050-F4594C27/Ajanta-Caves-Maharashtra-India.jpg
Predicted class for the image: Ajanta Caves


In [34]:
from transformers import BeitForImageClassification, BeitFeatureExtractor
from PIL import Image
import torch
from torchvision.transforms import Compose, Resize, ToTensor, Normalize
import requests
from io import BytesIO

# Class names mapping
class_names = [
    "Ajanta Caves", "Charar-E- Sharif", "Chhota Imambara", "Ellora Caves", "Fatehpur Sikiri",
    "Hawa Mahal", "Gateway of India", "Khajuraho", "Sun Temple Konark", "Alai Darwaza",
    "Alai Minar", "Basilica of Bom Jesus", "Charminar", "Golden Temple", "Iron Pillar",
    "Jamali Kamali Tomb", "Lotus Temple", "Mysore Palace", "Qutub Minar", "Taj Mahal",
    "Tanjavur Temple", "Victoria Memorial"
]

# Function to predict the class of an image given a path or URL
def predict_image_class(image_input):
    # Load the fine-tuned model and the feature extractor
    model_path = '/content/drive/MyDrive/Project_IR/fine_tuned_model'
    model = BeitForImageClassification.from_pretrained(model_path)
    feature_extractor = BeitFeatureExtractor.from_pretrained('microsoft/beit-base-patch16-224')

    # Ensure model is in evaluation mode
    model.eval()

    # Device configuration
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)

    # Check if the input is a URL
    if image_input.startswith('http'):
        response = requests.get(image_input)
        image = Image.open(BytesIO(response.content)).convert("RGB")
    else:
        # Assume it's a file path
        image = Image.open(image_input).convert("RGB")

    # Preprocess the image
    transform = Compose([
        Resize((224, 224)),
        ToTensor(),
        Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])
    image = transform(image).unsqueeze(0)  # Add batch dimension

    # Move image to the same device as the model
    image = image.to(device)

    # Perform inference
    with torch.no_grad():
        outputs = model(image)
        preds = outputs.logits.softmax(dim=-1)
        predicted_index = preds.argmax(1).item()

    # Ensure the predicted index is within the range of class names
    if predicted_index < len(class_names):
        predicted_class = class_names[predicted_index]
    else:
        predicted_class = "Unknown Class"

    return f"Predicted class for the image: {predicted_class}"

# Example usage with a path or URL
image_input = '/content/drive/MyDrive/Project_IR/Dataset_Indian-monuments/images/test/Ellora Caves/12.jpg'
# For URL, uncomment and use something like:
# image_input = 'https://example.com/path/to/image.jpg'

predicted_class = predict_image_class(image_input)
print(predicted_class)


Predicted class for the image: Ellora Caves


In [35]:
from torchvision import datasets
import pickle

# Load your dataset
train_data_path = '/content/drive/MyDrive/Project_IR/Dataset_Indian-monuments/images/train'
dataset = datasets.ImageFolder(root=train_data_path)

# Save the image paths
dataset_image_paths = [item[0] for item in dataset.imgs]
with open('/content/drive/MyDrive/Project_IR/Monuments_dataset_image_paths.pkl', 'wb') as f:
    pickle.dump(dataset_image_paths, f)

print("Image paths saved.")


Image paths saved.


In [12]:
import os
from PIL import Image
import numpy as np
from tensorflow.keras.applications.inception_v3 import InceptionV3, preprocess_input
from tensorflow.keras.preprocessing import image as keras_image
from tensorflow.keras.models import Model
import pickle

# Initialize the InceptionV3 model
base_model = InceptionV3(weights='imagenet', include_top=False, pooling='avg')
model = Model(inputs=base_model.input, outputs=base_model.output)

def preprocess_image(img_path):
    """Load and preprocess an image."""
    img = keras_image.load_img(img_path, target_size=(299, 299))
    img_array = keras_image.img_to_array(img)
    img_array = np.expand_dims(img_array, axis=0)
    return preprocess_input(img_array)

def extract_features(image_path, model):
    """Extract features for a single image."""
    img_array = preprocess_image(image_path)
    features = model.predict(img_array)
    return features.flatten()

def process_images_and_save(dataset_path, output_file):
    """Process all images in the dataset and save their features and paths."""
    image_features = []
    image_paths = []

    # Calculate total number of images for progress updates
    total_images = sum([len(files) for r, _, files in os.walk(dataset_path)])
    processed_images = 0

    # Walk through the directory, preprocess images, and extract features
    for root, _, files in os.walk(dataset_path):
        for file in files:
            if file.lower().endswith(('.png', '.jpg', '.jpeg','.jfif')):
                image_path = os.path.join(root, file)
                try:
                    features = extract_features(image_path, model)
                    image_features.append(features)
                    image_paths.append(image_path)
                except Exception as e:
                    print(f"Error processing {image_path}: {e}")

                processed_images += 1
                print(f"Progress: {processed_images}/{total_images} images processed ({(processed_images/total_images)*100:.2f}%)")

    # Save the features and paths to a .pkl file
    with open(output_file, 'wb') as f:
        pickle.dump({'paths': image_paths, 'features': image_features}, f)
    print("All features extracted and saved.")

# Specify the dataset directory and the output file path
dataset_path = '/content/drive/MyDrive/Project_IR/Dataset_Indian-monuments/images/train'  # Update this path
output_file = '/content/drive/MyDrive/Project_IR/dataset_features_Monuments.pkl'  # Update this path

# Execute the processing and saving of image features
process_images_and_save(dataset_path, output_file)


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Progress: 1243/3746 images processed (33.18%)
Progress: 1244/3746 images processed (33.21%)
Progress: 1245/3746 images processed (33.24%)
Progress: 1246/3746 images processed (33.26%)
Progress: 1247/3746 images processed (33.29%)
Progress: 1248/3746 images processed (33.32%)
Progress: 1249/3746 images processed (33.34%)
Progress: 1250/3746 images processed (33.37%)
Progress: 1251/3746 images processed (33.40%)
Progress: 1252/3746 images processed (33.42%)
Progress: 1253/3746 images processed (33.45%)
Progress: 1254/3746 images processed (33.48%)
Progress: 1255/3746 images processed (33.50%)
Progress: 1256/3746 images processed (33.53%)
Progress: 1257/3746 images processed (33.56%)
Progress: 1258/3746 images processed (33.58%)
Progress: 1259/3746 images processed (33.61%)
Progress: 1260/3746 images processed (33.64%)
Progress: 1261/3746 images processed (33.66%)
Progress: 1262/3746 images processed (33.69%)
Progress: 1263/

In [9]:
import torch
from transformers import BeitForImageClassification, BeitFeatureExtractor
from tensorflow.keras.applications.inception_v3 import InceptionV3, preprocess_input
from tensorflow.keras.preprocessing import image as keras_image
from tensorflow.keras.models import Model
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
import pickle
from PIL import Image

# Load the fine-tuned classification model
classification_model_path = '/content/drive/MyDrive/Project_IR/fine_tuned_model'
classification_model = BeitForImageClassification.from_pretrained(classification_model_path)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
classification_model.to(device).eval()

# Initialize InceptionV3 for feature extraction
inception_model = InceptionV3(weights='imagenet', include_top=False, pooling='avg')
feature_model = Model(inputs=inception_model.input, outputs=inception_model.output)

# Class names for classification
class_names = [
    "Ajanta Caves", "Charar-E- Sharif", "Chhota Imambara", "Ellora Caves", "Fatehpur Sikiri",
    "Hawa Mahal", "Gateway of India", "Khajuraho", "Sun Temple Konark", "Alai Darwaza",
    "Alai Minar", "Basilica of Bom Jesus", "Charminar", "Golden Temple", "Iron Pillar",
    "Jamali Kamali Tomb", "Lotus Temple", "Mysore Palace", "Qutub Minar", "Taj Mahal",
    "Tanjavur Temple", "Victoria Memorial"
]


def preprocess_image_inception(image_path):
    img = keras_image.load_img(image_path, target_size=(299, 299))
    img_array = keras_image.img_to_array(img)
    img_array = np.expand_dims(img_array, axis=0)
    return preprocess_input(img_array)

def extract_features_inception(image_path, model):
    img_array = preprocess_image_inception(image_path)
    features = model.predict(img_array)
    return features.flatten()

def classify_image(image_path, model, device):
    transform = Compose([
        Resize((224, 224)),  # Resize image for Beit model
        ToTensor(),
        Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])
    image = Image.open(image_path).convert("RGB")
    image_tensor = transform(image).unsqueeze(0).to(device)
    with torch.no_grad():
        outputs = model(image_tensor)
        preds = outputs.logits.softmax(dim=-1)
        predicted_index = preds.argmax(1).item()
    return class_names[predicted_index]


def load_data(file_name):
    with open(file_name, 'rb') as file:
        data = pickle.load(file)
    return data['features'], data['paths']

def find_similar_images(image_input, dataset_features):
    input_features = extract_features_inception(image_input, feature_model)
    similarities = cosine_similarity([input_features], dataset_features)
    top_10_indices = np.argsort(similarities[0])[-10:][::-1]
    return top_10_indices

# Use the functions
image_input = input("Enter image path: ")
features_file_path = '/content/drive/MyDrive/Project_IR/dataset_features_Monuments.pkl'
dataset_features, dataset_image_paths = load_data(features_file_path)

predicted_class = classify_image(image_input, classification_model, device)
top_10_indices = find_similar_images(image_input, dataset_features)
top_10_similar_images = [dataset_image_paths[i] for i in top_10_indices]

print(f"Predicted class: {predicted_class}")
print("Top 10 similar images:")
for path in top_10_similar_images:
    print(path)


Enter image path: /content/drive/MyDrive/Project_IR/Dataset_Indian-monuments/images/test/Chhota_Imambara/1.jpg
Predicted class: Chhota Imambara
Top 10 similar images:
/content/drive/MyDrive/Project_IR/Dataset_Indian-monuments/images/train/Chhota_Imambara/img1 - Copy.jpg
/content/drive/MyDrive/Project_IR/Dataset_Indian-monuments/images/train/Chhota_Imambara/img1.jpg
/content/drive/MyDrive/Project_IR/Dataset_Indian-monuments/images/train/Chhota_Imambara/img11 - Copy.jpg
/content/drive/MyDrive/Project_IR/Dataset_Indian-monuments/images/train/Chhota_Imambara/img11.jpg
/content/drive/MyDrive/Project_IR/Dataset_Indian-monuments/images/train/Chhota_Imambara/img26 - Copy.jpg
/content/drive/MyDrive/Project_IR/Dataset_Indian-monuments/images/train/Chhota_Imambara/img26.jpg
/content/drive/MyDrive/Project_IR/Dataset_Indian-monuments/images/train/Chhota_Imambara/img32 - Copy.jpg
/content/drive/MyDrive/Project_IR/Dataset_Indian-monuments/images/train/Chhota_Imambara/img32.jpg
/content/drive/MyDrive/