In [11]:
import torch

# Check if a GPU is available for linux machines
if torch.cuda.is_available():
    device = torch.device('cuda')
    print("cuda")
# Check if a GPU is available for macOS machines
elif torch.backends.mps.is_available():
    device = torch.device("mps")
    print("apple silicon")
else:
    device = torch.device("cpu")
    print("no silicon, no cuda")


apple silicon


In [26]:
import os

  # Root directory
train_dir = './data/train'
val_dir = './data/val'
test_dir = './data/test'

In [27]:

from torchvision import datasets, transforms
from torch.utils.data import DataLoader

# Define data transformations (data augmentation for train, normalization for all)
train_transforms = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize all images to 224x224
    transforms.ToTensor(),  # Convert to PyTorch tensors
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])  # Normalize RGB channels
])

val_test_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

# Load datasets
train_dataset = datasets.ImageFolder(root=train_dir, transform=train_transforms)
val_dataset = datasets.ImageFolder(root=val_dir, transform=val_test_transforms)
test_dataset = datasets.ImageFolder(root=test_dir, transform=val_test_transforms)

# Create DataLoaders
batch_size = 32

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Check dataset class mappings
print("Class Mappings:", train_dataset.class_to_idx)

# Example: Iterate over a DataLoader to check data shape
for images, labels in train_loader:
    print("Image batch shape:", images.shape)
    print("Label batch shape:", labels.shape)
    break

Class Mappings: {'awake': 0, 'sleepy': 1}
Image batch shape: torch.Size([32, 3, 224, 224])
Label batch shape: torch.Size([32])


In [28]:
import torchvision.models as models
import torch.nn as nn

# pre-trained ResNet model
resnet = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)

# Freeze all layers
for param in resnet.parameters():
    param.requires_grad = True

# Modify the final fully connected layer to output 56 classes
num_features = resnet.fc.in_features  # Get the input features for the fully connected layer
resnet.fc = nn.Linear(num_features, 2)  # Replace with a new fully connected layer with 56 outputs


In [29]:
import torch.optim as optim

def one_hot_encoding(targets, num_classes=2, device=device):
    return torch.eye(num_classes=2, device=device)[targets]

# Instantiate the model once and move it to the device
# model = plate_recognize_model().to(device)
model = resnet.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)

# Train function
def train(model, device, train_loader, optimizer):
    model.train()
    train_loss = 0
    correct = 0
    total = 0
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        
        optimizer.zero_grad()

        # Forward pass
        output = model(data)
        # print("Output shape:", output.shape)
        # print("Target shape:", target.shape)
        # Calculate loss with CrossEntropyLoss
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()

        # Track loss
        train_loss += loss.item()

        # Track accuracy
        pred = output.argmax(dim=1, keepdim=True)
        correct += pred.eq(target.view_as(pred)).sum().item()
        total += target.size(0)

        

    avg_train_loss = train_loss / len(train_loader)
    train_accuracy = correct / total
    
    return avg_train_loss, train_accuracy

# Test function
def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    total = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            
            # Calculate loss with CrossEntropyLoss
            loss = criterion(output, target)
            test_loss += loss.item()
            
            # Track accuracy
            pred = output.argmax(dim=1, keepdim=True)
            correct += pred.eq(target.view_as(pred)).sum().item()
            total += target.size(0)
    
    avg_test_loss = test_loss / len(test_loader)
    test_accuracy = correct / total
    return avg_test_loss, test_accuracy

# Training loop
num_epochs = 50
train_losses = []
test_losses = []
train_accuracies = []
test_accuracies = []

for epoch in range(num_epochs):
    avg_train_loss, train_accuracy = train(
        model=model, device=device, train_loader=train_loader, optimizer=optimizer
    )
    avg_test_loss, test_accuracy = test(
        model=model, device=device, test_loader=test_loader
    )
    
    train_losses.append(avg_train_loss)
    test_losses.append(avg_test_loss)
    train_accuracies.append(train_accuracy)
    test_accuracies.append(test_accuracy)
    
    print(f'Epoch {epoch}: \tTrain Loss: {avg_train_loss:.4f} \tTest Loss: {avg_test_loss:.4f}'
          + f'\tTrain Accuracy: {train_accuracy:.4f}, \tTest Accuracy: {test_accuracy:.4f}')

Epoch 0: 	Train Loss: 0.0702 	Test Loss: 0.0418	Train Accuracy: 0.9765, 	Test Accuracy: 0.9865
Epoch 1: 	Train Loss: 0.0530 	Test Loss: 0.0545	Train Accuracy: 0.9818, 	Test Accuracy: 0.9803
Epoch 2: 	Train Loss: 0.0505 	Test Loss: 0.0650	Train Accuracy: 0.9827, 	Test Accuracy: 0.9788
Epoch 3: 	Train Loss: 0.0453 	Test Loss: 0.0857	Train Accuracy: 0.9843, 	Test Accuracy: 0.9734
Epoch 4: 	Train Loss: 0.0423 	Test Loss: 0.0413	Train Accuracy: 0.9851, 	Test Accuracy: 0.9870
Epoch 5: 	Train Loss: 0.0392 	Test Loss: 0.0284	Train Accuracy: 0.9861, 	Test Accuracy: 0.9905
Epoch 6: 	Train Loss: 0.0363 	Test Loss: 0.0325	Train Accuracy: 0.9871, 	Test Accuracy: 0.9890
Epoch 7: 	Train Loss: 0.0337 	Test Loss: 0.0325	Train Accuracy: 0.9882, 	Test Accuracy: 0.9899
Epoch 8: 	Train Loss: 0.0329 	Test Loss: 0.0271	Train Accuracy: 0.9885, 	Test Accuracy: 0.9909
Epoch 9: 	Train Loss: 0.0312 	Test Loss: 0.0323	Train Accuracy: 0.9888, 	Test Accuracy: 0.9893
Epoch 10: 	Train Loss: 0.0301 	Test Loss: 0.0276	T

KeyboardInterrupt: 

In [9]:
import torch
import torch.nn as nn

architecture_config = [
    # kernel_size, channel, stride, padding
    (7, 64, 2, 3),
    "Maxpooling",
    (3, 192, 1, 1),
    "Maxpooling",
    (1, 128, 1, 1),
    (3, 256, 1, 1),
    (1, 256, 1, 0),
    (3, 512, 1, 1),
    "Maxpooling",
    [(1, 256, 1, 0), (3, 512, 1, 1), 4],
    (1, 512, 1, 0),
    (3, 1024, 1, 1),
    "Maxpooling",
    [(1, 512, 1, 0), (3, 1024, 1, 1), 2],
    (3, 1024, 1, 1),
    (3, 1024, 2, 1),
    (3, 1024, 1, 1),
    (3, 1024, 1, 1),
]

class CNNBlock(nn.Module):
    def __init__(self, in_channels,out_channels, **kwargs) -> None:
        super(CNNBlock,self).__init__()
        self.conv = nn.Conv2d(in_channels,out_channels, bias=False, **kwargs)
        self.batchnorm = nn.BatchNorm2d(out_channels)
        self.leakyrelu = nn.LeakyReLU(0.1)

    def forward(self, x):
        return self.leakyrelu(self.batchnorm(self.conv(x)))
    
class Yolov1(nn.Module):
    def __init__(self, in_channels = 3, **kwargs) -> None:
        super(Yolov1, self).__init__()
        self.architecture = architecture_config
        self.in_channels = in_channels
        self.darknet = self._create_conv_layers(self.architecture)
        self.fcs = self._create_fcs(**kwargs)

    def forward(self, x):
        x = self.darknet(x)
        return self.fcs(torch.flatten(x, start_dim=1))
    
    def _create_conv_layers(self, architecture):
        layers = []
        in_channels = self.in_channels

        for x in architecture:
            if type(x) == tuple:
                layers += [
                    CNNBlock(
                        in_channels,
                        x[1],
                        kernel_size = x[0],
                        stride = x[2],
                        padding = x[3],
                    )
                ]

                in_channels = x[1]
            elif type(x) == str:
                layers += [nn.MaxPool2d(kernel_size=2, stride = 2)]
            elif type(x) == list:
                conv1 = x[0] # Tuple
                conv2 = x[1] # Tuple
                num_repeats = x[2] # Int
                
                for _ in range(num_repeats):
                    layers += [
                        CNNBlock(
                        in_channels,
                        conv1[1],
                        kernel_size = conv1[0],
                        stride = conv1[2],
                        padding = conv1[3]
                        )
                    ]

                    layers += [
                        CNNBlock(
                        conv1[1], # in_channels
                        conv2[1],
                        kernel_size = conv2[0],
                        stride = conv2[2],
                        padding = conv2[3]
                        )
                    ]

                    in_channels = conv2[1]

        return nn.Sequential(*layers)

    def _create_fcs(self, split_size, num_boxes, num_classes):
        S, B, C = split_size, num_boxes, num_classes

        return nn.Sequential( 
            nn.Flatten(),
            nn.Linear(1024 * S * S, 496) ,
            nn.Dropout(0.0),
            nn.LeakyReLU(0.1),
            nn.Linear(496, S*S*(C + B*5)) # last Linear Layer
        )
    
def test(S = 7, B = 2, C = 20):
        model = Yolov1(split_size = S, num_boxes = B,num_classes = C)
        x = torch.randn((2,3,448,448))
        print(model(x).shape)

test()

torch.Size([2, 1470])


### Make Loss fuction

In [12]:
import torch
import torch.nn as nn
from utils import  intersection_over_union


ImportError: cannot import name 'intersection_over_union' from 'utils' (/opt/anaconda3/lib/python3.11/site-packages/utils/__init__.py)