In [11]:
from pymongo import MongoClient
import requests
import torch
import os 
from dotenv import load_dotenv

In [12]:
import torch.nn as nn
from torchvision import models, transforms
from PIL import Image

In [13]:
load_dotenv()

True

In [14]:
def set_all_the_dependencies(db_name_1,collection_name,path_to_the_model):
    #load url from env and set data base into your laptop
    db_url=os.getenv("DB_URL")
    client=MongoClient(db_url)
    db_name =client[db_name_1]
    collection=db_name[collection_name]
    #set headers and session so that we are not get blocked from pakwheels hehehe
    headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36",
    "Accept-Language": "en-US,en;q=0.9",
    "Referer": "https://www.pakwheels.com/used-cars/honda/32"
    }
    session=requests.Session()
    #set model so that it can set acc to our trained model and set weights to none so that we can use our own model
    total_classes=7
    model=models.resnet50(weights=None)
    model.fc=nn.Sequential(
    nn.Linear(model.fc.in_features, 512),
    nn.ReLU(),
    nn.Dropout(0.5),
    nn.Linear(512, 256),
    nn.ReLU(),
    nn.Dropout(0.3),
    nn.Linear(256, total_classes)
    )
    model_path = path_to_the_model
    device="cuda"if torch.cuda.is_available() else "cpu"
    model.load_state_dict(torch.load(model_path, map_location=device))
    model.to(device)
    model.eval() 
    #yay lazmi print ho ga
    print("model loaded successfully")
    #used for pic normalization
    transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
    ])
    classes = {
    0: "Normal",
    1: "crack",
    2: "dent",
    3: "glass_shatter",
    4: "lamp_broken",
    5: "scratch",
    6: "tire_flat"
    }
    return db_url,db_name,collection,headers,session,total_classes,model,model_path,device,transform,classes

In [15]:
db_url,db_name,collection,headers,session,total_classes,model,model_path,device,transform,classes=set_all_the_dependencies("Honda_cars","listings","/home/subhan/All/Car_Project/ImagePreprocessing/model_training_kaggle_data/notebooks/saved_training_results.pth")

model loaded successfully


In [16]:
from io import BytesIO
from tqdm import tqdm

In [17]:
def model_prediction(url,headers,session,device,transform,model):
    try:
        response=session.get(url,headers=headers,timeout=10)
        response.raise_for_status() 
        img= Image.open(BytesIO(response.content)).convert("RGB")
        input_tensor=transform(img).unsqueeze(0).to(device)
        model.eval()
        with torch.no_grad():
            outputs=model(input_tensor)
            probs = torch.softmax(outputs, dim=1)#convert logits into prob
            pred_index = torch.argmax(probs, dim=1).item()#pick the highest prob
            confidence = probs[0][pred_index].item()#get the confidence
        return pred_index,confidence
    except Exception as e:
        print(f"Error processing {url}: {e}")
        return None,0.0

In [18]:
def set_data_base(collection, headers, session, device, classes, transform, model):
    print("Fetching document IDs...")
    all_ids = [
        doc["_id"] 
        for doc in collection.find(
            {"exterior_images": {"$exists": True}}, 
            {"_id": 1}
        )
    ]
    
    print(f"Found {len(all_ids)} documents to process")
    
    for doc_id in tqdm(all_ids, desc="Processing documents"):
        doc = collection.find_one({"_id": doc_id})
        
        if not doc:
            continue
        
        labeled_arrays = {class_name: [] for class_name in classes.values()}
        exterior_images_url = doc.get("exterior_images", [])
        
        for url in exterior_images_url:
            result, _ = model_prediction(url, headers=headers, session=session, device=device, transform=transform, model=model)
            
            if result is None:  
                continue
            
            predicted_class = classes[result]
            
            for i in labeled_arrays:
                if i == predicted_class:
                    labeled_arrays[i].append(1)
                else:
                    labeled_arrays[i].append(0)
        
        collection.update_one(
            {"_id": doc_id},
            {"$set": {"damage_labels": labeled_arrays}}
        )
    
    print("Done evaluation")

In [19]:
set_data_base(collection,headers,session,device,classes,transform,model)

Fetching document IDs...
Found 2604 documents to process


Processing documents:   6%|â–‹         | 167/2604 [07:08<1:44:13,  2.57s/it]


KeyboardInterrupt: 