In [1]:
# Envs
from dataset import MyData
from torchvision import transforms
from torch.utils.data import random_split, DataLoader
from train import *
from path import ROOT_DIR
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import os
import torchvision.models as models
import torch.optim.lr_scheduler as lr_scheduler

In [2]:
# load Data
csv_file = os.path.join(ROOT_DIR, "data", "labels", "car_imgs_4000.csv")
data = pd.read_csv(csv_file)

image_paths = data['filename'].tolist()
labels = data[['perspective_score_hood', 'perspective_score_backdoor_left']].values.tolist() 

In [3]:
# config 
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
num_epochs = 10
batch_size = 32
learning_rate = 0.0001

# image preporcessing
train_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomRotation(degrees=5), 
    transforms.ColorJitter(hue=0.15, saturation=0.15, brightness=0.25), 
    transforms.GaussianBlur((7, 7), sigma=(0.1, 0.5)), 
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

val_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

In [4]:
dataset = MyData(image_paths, labels)

# load model
model = models.densenet121(weights=models.DenseNet121_Weights.IMAGENET1K_V1)
model.classifier = nn.Sequential(
    nn.Linear(model.classifier.in_features, 2), 
    nn.Sigmoid()  
)
model.to(device)

criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=10, eta_min=0)

train_size = int(0.8 * len(dataset)) 
val_size = len(dataset) - train_size  
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

train_dataset.dataset.transform = val_transform
val_dataset.dataset.transform = val_transform 

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

for epoch in range(num_epochs):
    train_loss = train(model, train_loader, optimizer, criterion, device)
    val_loss = validate(model, val_loader, criterion, device)
    
    print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.4f}, Validation Loss: {val_loss:.4f}")

torch.save(model.state_dict(), os.path.join(ROOT_DIR, "model", 'densenet121_model_10_1e-4_32_224x224_decay.pth'))

Epoch 1/10, Train Loss: 0.0445, Validation Loss: 0.0149
Epoch 2/10, Train Loss: 0.0092, Validation Loss: 0.0115
Epoch 3/10, Train Loss: 0.0055, Validation Loss: 0.0108
Epoch 4/10, Train Loss: 0.0039, Validation Loss: 0.0092
Epoch 5/10, Train Loss: 0.0036, Validation Loss: 0.0092
Epoch 6/10, Train Loss: 0.0028, Validation Loss: 0.0098
Epoch 7/10, Train Loss: 0.0020, Validation Loss: 0.0088
Epoch 8/10, Train Loss: 0.0018, Validation Loss: 0.0082
Epoch 9/10, Train Loss: 0.0016, Validation Loss: 0.0078
Epoch 10/10, Train Loss: 0.0013, Validation Loss: 0.0077


In [5]:
# test

test_image_path = os.path.join(ROOT_DIR, "data", "imgs", "00b9e159-3ce0-4325-8e9b-4d829db719e8.jpg")

In [6]:
from PIL import Image

val_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

def load_image(image_path):
    image = Image.open(image_path).convert('RGB') 
    image = val_transform(image) 
    image = image.unsqueeze(0) 
    return image

model = models.densenet121() 
model.classifier = nn.Sequential(
    nn.Linear(model.classifier.in_features, 2),
    nn.Sigmoid() 
)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)
model.load_state_dict(torch.load(os.path.join(ROOT_DIR, "model", 'densenet121_model_10_1e-4_32_224x224_decay.pth')))
model.eval()

image = load_image(test_image_path).to(device)
with torch.no_grad(): 
    output = model(image)  

output

tensor([[0.0121, 0.8465]], device='cuda:0')