In [28]:
import torch
import torch.nn as nn
import torchvision.models as models
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
import json
from tqdm import tqdm
from sklearn.model_selection import train_test_split

In [2]:
class ResNet(nn.Module):
    def __init__(self, num_classes=4):
        super(ResNet, self).__init__()
        self.resnet = models.resnet18(pretrained=True)
        num_features = self.resnet.fc.in_features
        self.resnet.fc = nn.Linear(num_features, num_classes * 4)  # Four coordinates for each class

    def forward(self, x):
        x = self.resnet(x)
        return x

In [3]:
# Create an instance of your ResNet model
model = ResNet()



In [4]:
# Define your loss function and optimizer
criterion = nn.SmoothL1Loss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

In [5]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize the image to 224x224
    transforms.ToTensor(),  # Convert image to tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize image
])

In [15]:
class CustomDataset(Dataset):
    def __init__(self, image_paths, targets, transform=None):
        self.image_paths = image_paths
        self.targets = torch.tensor(targets, dtype=torch.float32)
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, index):
        image_path = self.image_paths[index]
        target = self.targets[index]

        # Load the image
        image = Image.open(image_path).convert("RGB")

        # Apply transformations if specified
        if self.transform is not None:
            image = self.transform(image)

        return image, target

In [16]:
with open("targets.json") as f:
    data = json.load(f)

In [17]:
image_paths = [item['image'] for item in data]
targets = [item['labels'] for item in data] 

In [29]:
train_image_paths, test_image_paths, train_targets, test_targets = train_test_split(
    image_paths, targets, test_size=0.2, random_state=42)

In [34]:
train_dataset = CustomDataset(train_image_paths, train_targets, transform=transform)
test_dataset = CustomDataset(test_image_paths, test_targets, transform=transform)

In [35]:
dataloader_train = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True)
dataloader_test = torch.utils.data.DataLoader(test_dataset, batch_size=32, shuffle=True)

In [20]:
num_epochs = 10

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

In [36]:
loss = []

In [None]:
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0

    for images, labels in tqdm(dataloader_train):
        images = images.to(device)
        labels = labels.to(device)

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(images)

        # Reshape the output to match the target shape
        outputs = outputs.reshape(-1, 4)

        # Reshape the targets to match the output shape
        labels = labels.reshape(-1, 4)

        # Calculate the loss
        loss = criterion(outputs, labels)

        # Backward pass and optimize
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        loss.append(loss.item())

    epoch_loss = running_loss / len(dataloader_train)
    print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {epoch_loss}")