In [1]:
import torch
import time
import os
from tempfile import TemporaryDirectory
from tqdm import tqdm
import torch.optim as optim
from torch.optim import lr_scheduler
import torch.nn as nn
from torchvision import datasets, models, transforms
from torch.utils.data import DataLoader, random_split

device = "cuda"

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
tree = ET.parse("inference_label_studio\Annotations\9b1e7b8b-20230608_110037.xml")
root = tree.getroot()
bndbox_values = {}

for i, obj in enumerate(root.findall('object')):
    bndbox = obj.find('bndbox')
    name = obj.find('name').text

    xmin = float(bndbox.find('xmin').text)
    ymin = float(bndbox.find('ymin').text)
    xmax = float(bndbox.find('xmax').text)
    ymax = float(bndbox.find('ymax').text)
    bndbox_values[name + str(i)] = {'xmin': xmin, 'ymin': ymin, 'xmax': xmax, 'ymax': ymax}

bndbox_values

{'spot0': {'xmin': 131.0, 'ymin': 2367.0, 'xmax': 1176.0, 'ymax': 2955.0},
 'spot1': {'xmin': 1259.0, 'ymin': 2097.0, 'xmax': 2304.0, 'ymax': 2685.0},
 'spot2': {'xmin': 2339.0, 'ymin': 2035.0, 'xmax': 3093.0, 'ymax': 2464.0},
 'spot3': {'xmin': 2969.0, 'ymin': 1806.0, 'xmax': 3744.0, 'ymax': 2242.0},
 'spot4': {'xmin': 3620.0, 'ymin': 1668.0, 'xmax': 4395.0, 'ymax': 2104.0}}

In [2]:
image_folder = "inference_label_studio\images"
annotation_folder = "inference_label_studio\Annotations"

import cv2
from utils.image_utils import extract_bndbox_values

from PIL import Image
for image_filename in os.listdir(image_folder):
    image_path = os.path.join(image_folder, image_filename)

    image_to_draw = cv2.imread(image_path)
    print("Path: ", image_path)
    full_image = Image.open(image_path)
    if image_filename.endswith(".jpg") or image_filename.endswith(".png"):
        annotation_filename = os.path.join(
            annotation_folder,
            image_filename.replace(".jpg", ".xml").replace(".png", ".xml"),
        )

    # Check if the annotation file exists
    if os.path.isfile(annotation_filename):
        bndbox_values = extract_bndbox_values(annotation_filename)
    for key in bndbox_values:
            values = bndbox_values[key]
            # Extract coordinates from the bounding box
            xmin = int(values["xmin"])
            ymin = int(values["ymin"])
            xmax = int(values["xmax"])
            ymax = int(values["ymax"])
            # Crop patch for the image
            patch = full_image.crop((xmin, ymin, xmax, ymax))
            print(patch)
            #img = Image.open(patch)
            

Path:  inference_label_studio\images\692159c7-Screenshot_2023-12-14_211832.jpg
<PIL.Image.Image image mode=RGBA size=165x234 at 0x1DBC0A6CCD0>


AttributeError: 'Image' object has no attribute 'read'

In [3]:
pk_lot_dir  = "pk_lot_data"
cnr_parking_dir  = "cnr_parking_data"

data_transforms = {
        "train": transforms.Compose(
            [
                transforms.RandomResizedCrop(224),
                transforms.RandomHorizontalFlip(),
                transforms.ToTensor(),
                transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
            ]
        ),
        "val": transforms.Compose(
            [
                transforms.Resize(256),
                transforms.CenterCrop(224),
                transforms.ToTensor(),
                transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
            ]
        ),
    }

image_datasets_pk_lot = {
    x: datasets.ImageFolder(os.path.join(pk_lot_dir, y), data_transforms[y])
    for x,y  in zip(["train", "val"], ["train", "val"])
}
image_datasets_cnr_park = {
    x: datasets.ImageFolder(os.path.join(cnr_parking_dir, y), data_transforms[y])
    for x,y  in zip(["train", "val"], ["train", "val"])
}

In [4]:
dataset_size_cnr_val = len(image_datasets_cnr_park["val"])
split_point = dataset_size_cnr_val // 2
image_datasets_cnr_park["val"], dataset_part2 = random_split(image_datasets_cnr_park["val"], [split_point, dataset_size_cnr_val - split_point])


dataset_size_pk_lot_val = len(image_datasets_pk_lot["val"])
split_point = dataset_size_pk_lot_val // 2
image_datasets_pk_lot["val"], dataset_part2 = random_split(image_datasets_pk_lot["val"], [split_point, dataset_size_pk_lot_val - split_point])


In [5]:
train_dataset = torch.utils.data.ConcatDataset([image_datasets_pk_lot["train"], image_datasets_cnr_park["train"]])
val_dataset = torch.utils.data.ConcatDataset([image_datasets_pk_lot["val"], image_datasets_cnr_park["val"]])

In [6]:
dataloaders_cnr_park= {
    "train": torch.utils.data.DataLoader(
        image_datasets_cnr_park["train"],
        batch_size=32,
        shuffle=True,
        num_workers=0,
    ),
    "val": torch.utils.data.DataLoader(
        image_datasets_cnr_park["val"],
        batch_size=32,
        shuffle=True,
        num_workers=0,
    )
    
}



dataloaders= {
    "train": torch.utils.data.DataLoader(
        train_dataset,
        batch_size=32,
        shuffle=True,
        num_workers=0,
    ),
    "val": torch.utils.data.DataLoader(
        val_dataset,
        batch_size=32,
        shuffle=True,
        num_workers=0,
    )
    
}



In [7]:
dataloaders_cnr_park

{'train': <torch.utils.data.dataloader.DataLoader at 0x1dbc0c4c090>,
 'val': <torch.utils.data.dataloader.DataLoader at 0x1dbbfe94ad0>}

In [8]:
dataset_sizes = {"train": len(train_dataset) , "val": len(val_dataset)}
print("Size of train and test: ", dataset_sizes)

Size of train and test:  {'train': 618925, 'val': 60504}


In [11]:
model = models.alexnet(weights="IMAGENET1K_V1")
model.classifier = nn.Sequential(
    nn.Dropout(p=0.5, inplace=False),
    nn.Linear(in_features=9216, out_features=256, bias=True),
    nn.ReLU(inplace=True),
    nn.Dropout(p=0.5, inplace=False),
    nn.Linear(in_features=256, out_features=128, bias=True),
    nn.ReLU(inplace=True),
    nn.Linear(in_features=128, out_features=1, bias=True),
)
model.load_state_dict(torch.load("./models/alex_net_pk.pth"))
model.to(device)
model

AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=256, bias=True)
  

In [None]:
from utils.image_utils import mAlexNet

model = mAlexNet()
model.to(device)
model.load_state_dict(torch.load("./models/malex_net_combined_bce.pth"))
model

In [26]:
inputs, labels  = next(iter(dataloaders["val"]))
criterion = nn.BCEWithLogitsLoss()
#criterion = nn.CrossEntropyLoss()

# Define optimizer for nn
optimizer = optim.AdamW(model.parameters(),lr=0.001, fused=True)
scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)
inputs = inputs.to(device)
labels = labels.to(device)
running_corrects=0
running_loss =0
  # zero the parameter gradients
optimizer.zero_grad()

# forward
# track history if only in train
with torch.set_grad_enabled(True):
        outputs = model(inputs).squeeze(dim=1)
        print(outputs)
        print(labels)
        #_, preds = torch.max(outputs, 1)
        #preds = np.where(outputs > 0, 1, 0)
        preds = (torch.sigmoid(outputs) > 0.5).float()

        loss = criterion(outputs, labels.float())

        # backward + optimize only if in training phase
        
        loss.backward()
        optimizer.step()
        
# statistics
running_loss += loss.item() * inputs.size(0)
running_corrects += torch.sum(preds == labels.data)

scheduler.step()
print(running_loss) 
print(running_corrects) 

tensor([-0.1108, -0.1108, -0.1108, -0.1108, -0.1108, -0.1108, -0.1108, -0.1108,
        -0.1108, -0.1108, -0.1108, -0.1108, -0.1108, -0.1108, -0.1108, -0.1108,
        -0.1108, -0.1108, -0.1108, -0.1108, -0.1108, -0.1108, -0.1108, -0.1108,
        -0.1108, -0.1108, -0.1108, -0.1108, -0.1108, -0.1108, -0.1108, -0.1108],
       device='cuda:0', grad_fn=<SqueezeBackward1>)
tensor([1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1,
        0, 0, 1, 1, 0, 0, 1, 1], device='cuda:0')
22.340538024902344
tensor(15, device='cuda:0')


In [45]:
running_corrects = 0
for (inputs, labels) in tqdm(dataloaders_cnr_park["val"]):
    
    inputs = inputs.to(device)
    labels = labels.to(device)
    with torch.no_grad():
        outputs = model(inputs)
        #_, preds = torch.max(outputs, 1)
        preds = (torch.sigmoid(outputs) > 0.5).float()
        running_corrects += torch.sum(preds == labels.data)

      
        
train_accuracy = running_corrects/ len(dataloaders_cnr_park["val"])
train_accuracy

100%|██████████| 146/146 [00:26<00:00,  5.45it/s]


tensor(605.8014, device='cuda:0')

In [44]:
torch.save(model.state_dict(), f"combined_m_alexnet_model.pth")
