In [3]:
import os
from PIL import Image
import torch
from torchvision.transforms.functional import to_tensor
import json
import numpy as np
from nltk.tokenize import word_tokenize
# Define the directories for your data
# add your own path here
train_dir = r'C:\Users\sruja\Downloads\CarDD_release\CarDD_release\CarDD_COCO\train2017'
val_dir = r'C:\Users\sruja\Downloads\CarDD_release\CarDD_release\CarDD_COCO\val2017'
test_dir = r'C:\Users\sruja\Downloads\CarDD_release\CarDD_release\CarDD_COCO\test2017'

# Function to load images from a directory
def load_images_from_directory(dir_path):
    images = []
  
    for filename in os.listdir(dir_path):
    
        if filename.endswith('.jpg') or filename.endswith('.png'):
  
            with Image.open(os.path.join(dir_path, filename)) as img:
          
                img = img.convert('RGB')
                images.append(img)
    return images

# Load the images
train_images = load_images_from_directory(train_dir)
val_images = load_images_from_directory(val_dir)
test_images = load_images_from_directory(test_dir)


In [6]:
desired_width = 224
desired_height = 224

def preprocess_images(images):
    processed_images = []
    for img in images:
        if not isinstance(img, Image.Image):
            img = Image.fromarray(img)
 
        img = img.resize((desired_width, desired_height))
      
        img = np.array(img) / 255.0
        processed_images.append(img)

    images = np.array(processed_images)
    return images


In [7]:

with open(r'C:\Users\sruja\Downloads\CarDD_release\CarDD_release\CarDD_COCO\annotations\instances_train2017.json') as f:
    data = json.load(f)

train_images_info = [{ 'id': item['id'], 'width': item['width'], 'height': item['height'], 'file_name': item['file_name'] } for item in data['images']]

In [8]:

with open(r'C:\Users\sruja\Downloads\CarDD_release\CarDD_release\CarDD_COCO\annotations\instances_train2017.json') as f:
    data = json.load(f)

id_to_name = {item['id']: item['name'] for item in data['categories']}

train_texts = [id_to_name[item['id']] for item in train_images_info if item['id'] in id_to_name]

In [9]:
with open(r'C:\Users\sruja\Downloads\CarDD_release\CarDD_release\CarDD_COCO\annotations\instances_test2017.json') as f:
    data = json.load(f)

test_images_info = [{ 'id': item['id'], 'width': item['width'], 'height': item['height'], 'file_name': item['file_name'] } for item in data['images']]

In [10]:

with open(r"C:\Users\sruja\Downloads\CarDD_release\CarDD_release\CarDD_COCO\annotations\instances_test2017.json") as f:
    data = json.load(f)


id_to_name = {item['id']: item['name'] for item in data['categories']}


test_texts = [id_to_name[item['id']] for item in train_images_info if item['id'] in id_to_name]

In [11]:
with open(r"C:\Users\sruja\Downloads\CarDD_release\CarDD_release\CarDD_COCO\annotations\instances_val2017.json") as f:
    data = json.load(f)


val_images_info = [{ 'id': item['id'], 'width': item['width'], 'height': item['height'], 'file_name': item['file_name'] } for item in data['images']]

In [12]:

with open(r"C:\Users\sruja\Downloads\CarDD_release\CarDD_release\CarDD_COCO\annotations\instances_val2017.json") as f:
    data = json.load(f)

id_to_name = {item['id']: item['name'] for item in data['categories']}


val_texts = [id_to_name[item['id']] for item in train_images_info if item['id'] in id_to_name]

In [13]:
import nltk
nltk.download('punkt')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\sruja\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [14]:
def preprocess_text(texts):

    tokenized_texts = [word_tokenize(text) for text in texts]
    return tokenized_texts

train_texts = preprocess_text(train_texts)
val_texts = preprocess_text(val_texts)
test_texts = preprocess_text(test_texts)

In [None]:


train_images_tensor = torch.stack([to_tensor(img) for img in train_images])
train_texts_tensor = torch.tensor(train_texts)
val_images_tensor = torch.stack([to_tensor(img) for img in val_images])
val_texts_tensor = torch.tensor(val_texts)


In [None]:
import torch.optim as optim
from tqdm import tqdm
import clip
# Load the pre-trained CLIP model
model, preprocess = clip.load("ViT-B/32", device="cuda")


criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 10  

for epoch in range(num_epochs):

    for images, texts in tqdm(train_loader):
    
        images = images.to("cuda")
        texts = texts.to("cuda")


        optimizer.zero_grad()


        outputs = model(images, texts)
        loss = criterion(outputs, texts)

   
        loss.backward()
        optimizer.step()




In [None]:

def evaluate(model, test_loader):
    model.eval()  
    total_loss = 0
    correct_predictions = 0

    with torch.no_grad():  
        for images, texts in test_loader:
           
            images = images.to("cuda")
            texts = texts.to("cuda")

            outputs = model(images)
            loss = criterion(outputs, texts)
            total_loss += loss.item()

            
            _, predicted = torch.max(outputs.data, 1)
            correct_predictions += (predicted == texts).sum().item()

    # Calculate the average loss and accuracy
    avg_loss = total_loss / len(test_loader.dataset)
    accuracy = correct_predictions / len(test_loader.dataset)

    return avg_loss, accuracy

# Evaluate the model
test_loss, test_accuracy = evaluate(model, test_loader)
print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}")


In [None]:
# Save the model
torch.save(model.state_dict(), '/kaggle/input/car-damage/model.pth')


In [None]:
def convert_prediction_to_text(prediction):
    prediction = prediction.cpu().numpy()
    text = id_to_name[np.argmax(prediction)]
    return text
# Assume `new_image` is your new input image
new_image = Image.open("/kaggle/input/car-damage/CarDD_release/CarDD_COCO/test2017/000000000139.jpg")
new_image = preprocess_images(new_image)  # Don't forget to preprocess your new data
new_image_tensor = torch.tensor(new_image).to("cuda")  # Move your data to GPU

with torch.no_grad():  # No need to track gradients when making predictions
    prediction = model(new_image_tensor)

# Convert the prediction to text
report = convert_prediction_to_text(prediction)
print(report)
