In [None]:

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader
import os
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import requests
from io import BytesIO
import json

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Set up training and testing data
root = 'D:/COCO Dataset'
if not os.path.exists(root):
    os.mkdir(root)

train_dataset = datasets.CocoDetection(root + '/train2017',
                                       'D:/COCO Dataset/annotations/instances_train2017.json',
                                       transform=transforms.Compose([transforms.Resize((256, 256)),
                                                                     transforms.ToTensor()]))

test_dataset = datasets.CocoDetection(root + '/val2017',
                                      'D:/COCO Dataset/annotations/instances_val2017.json',
                                      transform=transforms.Compose([transforms.Resize((256, 256)),
                                                                    transforms.ToTensor()]))

train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=4, shuffle=True, num_workers=2)

# Define model
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()

        # convolutional layers
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm2d(num_features=32)
        self.conv2 = nn.Conv2d(32, 64, 3, 1, 1)
        self.bn2 = nn.BatchNorm2d(num_features=64)
        self.conv3 = nn.Conv2d(64, 128, 3, 1, 1)
        self.bn3 = nn.BatchNorm2d(num_features=128)

        # linear layers
        self.fc1 = nn.Linear(128*32*32, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, 2)

    def forward(self, x):
        # convolutional layers with batch normalization
        x = F.relu(self.bn1(self.conv1(x)))
        x = F.max_pool2d(F.relu(self.bn2(self.conv2(x))), 2)
        x = F.max_pool2d(F.relu(self.bn3(self.conv3(x))), 2)

        # flatten to feed into linear layers
        x = x.view(-1, 128*32*32)
        # linear layers
        x = F.dropout(F.relu(self.fc1(x)), 0.5)
        x = F.dropout(F.relu(self.fc2(x)), 0.5)
        x = self.fc3(x)

        return x


In [None]:
model = CNN().to(device)

# Define optimizer and loss function
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

# Train model
def train(model, device, train_loader, optimizer, criterion, epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % 10 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                       100. * batch_idx / len(train_loader), loss.item()))

# Test model
def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += criterion(output, target).item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)
    print('Test set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

for epoch in range(1, 11):
  train(model, device, train_loader, optimizer, criterion, epoch)
  test(model, device, test_loader)

# Detect Persons in an image
def detect_person(model, img_path):
    transform = transforms.Compose([transforms.Resize((256, 256)),
                                    transforms.ToTensor()])
    img = Image.open(img_path)
    img = transform(img)
    img = img.to(device)
    model.eval()
    with torch.no_grad():
        output = model(img[None, ...])
        _, preds = torch.max(output, 1)
    if preds.item() == 1:
        print("Person detected!")
    else:
        print("No person detected!")

# Test image
img_url = 'https://www.wired.com/wp-content/uploads/2015/09/google-logo.jpg'
response = requests.get(img_url)
img = Image.open(BytesIO(response.content))
img.save("test.jpg")

# Detect persons in the test image
detect_person(model, 'test.jpg')

In [4]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader
from torch.profiler import profile, record_function, ProfilerActivity

import os


# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Set up training and testing data
root = 'D:/COCO Dataset'
if not os.path.exists(root):
    os.mkdir(root)

class ResizeTransform:
    def __init__(self, size):
        self.size = size
        
    def __call__(self, img):
        return transforms.functional.resize(img, self.size)

train_dataset = datasets.CocoDetection(root + '/train2017',
                                       'D:/COCO Dataset/annotations/instances_train2017.json',
                                       transform=transforms.Compose([ResizeTransform((256, 256)),
                                                                     transforms.ToTensor()]))


# Define collate function that resizes images to the same size
def collate_fn(batch):
    images = []
    targets = []
    for image, target in batch:
        # Resize image
        image = ResizeTransform((256, 256))(image)
        images.append(transforms.ToTensor()(image))
        targets.append(target)
    return torch.stack(images, dim=0), targets

train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True, num_workers=2, collate_fn=collate_fn)

# Define model
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()

        # convolutional layers
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm2d(num_features=32)
        self.conv2 = nn.Conv2d(32, 64, 3, 1, 1)
        self.bn2 = nn.BatchNorm2d(num_features=64)
        self.conv3 = nn.Conv2d(64, 128, 3, 1, 1)
        self.bn3 = nn.BatchNorm2d(num_features=128)

        # linear layers
        self.fc1 = nn.Linear(128*32*32, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, 2)

    def forward(self, x):
        # convolutional layers with batch normalization
        x = F.relu(self.bn1(self.conv1(x)))
        x = F.max_pool2d(F.relu(self.bn2(self.conv2(x))), 2)
        x = F.max_pool2d(F.relu(self.bn3(self.conv3(x))), 2)

        # flatten to feed into linear layers
        x = x.view(-1, 128*32*32)
        # linear layers
        x = F.dropout(F.relu(self.fc1(x)), 0.5)
        x = F.dropout(F.relu(self.fc2(x)), 0.5)
        x = self.fc3(x)

        return x

model = CNN().to(device)

# Define optimizer and loss function
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()



loading annotations into memory...
Done (t=17.55s)
creating index...
index created!


In [7]:
for batch_idx, (data, target) in enumerate(train_loader):
    print(len(train_loader))

In [7]:
# Train model
def train(model, device, train_loader, optimizer, criterion, epoch):
    model.train()
    print(1)
    for batch_idx, (data, target) in enumerate(train_loader):
        print(2)
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % 10 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                       100. * batch_idx / len(train_loader), loss.item()))

for epoch in range(1, 11):
  train(model, device, train_loader, optimizer, criterion, epoch)

1
