In [None]:
import torch
import torchvision
import numpy as np

# Define transforms to resize the images and normalize the pixel values
transform = torchvision.transforms.Compose([
    torchvision.transforms.Resize((224, 224)),
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
])

# Load the COCO dataset
class PaddedCocoDataset(torchvision.datasets.CocoDetection):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        
    def __getitem__(self, index):
        img, targets = super().__getitem__(index)
        img = transform(img)
        img_shape = img.shape[-2:]
        max_shape = torch.tensor([224, 224])
        pad = (torch.tensor(max_shape) - torch.tensor(img_shape)).clamp(min=0)
        pad_l, pad_r, pad_t, pad_b = pad[0] // 2, pad[0] - pad[0] // 2, pad[1] // 2, pad[1] - pad[1] // 2
        img = torch.nn.functional.pad(img, [pad_l, pad_r, pad_t, pad_b])
        
        # pad the annotations to match the padded image
        for target in targets:
            target["bbox"][0] += pad_l
            target["bbox"][1] += pad_t
            target["bbox"][2] += pad_l
            target["bbox"][3] += pad_t
        
        return img, targets



coco_dataset = PaddedCocoDataset(root='C:/Users/lolol/OneDrive/Документы/СOCO Dataset/train2017',
                                 annFile='C:/Users/lolol/OneDrive/Документы/СOCO Dataset/annotations/instances_train2017.json')

# Define the CNN architecture
class CNN(torch.nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = torch.nn.Conv2d(3, 16, kernel_size=3, padding=1)
        self.conv2 = torch.nn.Conv2d(16, 32, kernel_size=3, padding=1)
        self.conv3 = torch.nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.pool = torch.nn.MaxPool2d(kernel_size=2, stride=2)
        self.fc1 = torch.nn.Linear(64 * 28 * 28, 128)
        self.fc2 = torch.nn.Linear(128, 2)
        self.relu = torch.nn.ReLU()

    def forward(self, x):
        x = self.relu(self.conv1(x))
        x = self.pool(self.relu(self.conv2(x)))
        x = self.pool(self.relu(self.conv3(x)))
        x = x.view(-1, 64 * 28 * 28)
        x = self.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Define the model
model = CNN()

# Define the loss function and optimizer
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Train the model
num_epochs = 10
batch_size = 4
data_loader = torch.utils.data.DataLoader(coco_dataset, batch_size=batch_size, shuffle=True)
for epoch in range(num_epochs):
    running_loss = 0.0
    for i, data in enumerate(data_loader, 0):
        # Get the inputs and labels
        inputs = data[0]
        annotations = data[1]
        labels = torch.tensor([annotation['category_id'] for annotation in annotations])
        print(inputs.shape, labels.shape)
        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward + backward + optimize
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # Print statistics
        running_loss += loss.item()
        if i % 1000 == 999:
            print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 1000))
            running_loss = 0.0

# Test the model
model.eval()
total_correct = 0
total_images = 0
with torch.no_grad():
    for data in coco_dataset:
        images, labels = data
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total_images += labels.size(0)
        total_correct += (predicted == labels).sum().item()

accuracy = 100 * total_correct / total_images
print('Accuracy: %.2f %%' % accuracy)

# Save the model
torch.save(model.state_dict(), 'person_detection_model.pth')


In [None]:
import torch
import torchvision
from torchvision import transforms
from torch.utils.data import DataLoader
from torch.utils.data.dataset import Dataset
import torch.nn as nn
import torch.optim as optim

# Загрузка COCO датасета
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor()
])

trainset = torchvision.datasets.CocoDetection(root='C:/Users/lolol/OneDrive/Документы/СOCO Dataset/train2017',
                                 annFile='C:/Users/lolol/OneDrive/Документы/СOCO Dataset/annotations/instances_train2017.json',
                                              transform=transform)

trainloader = torch.utils.data.DataLoader(trainset, batch_size=32,
                                          shuffle=True, num_workers=8) 


# Определение модели CNN
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.resnet18 = torchvision.models.resnet18(pretrained=True)
        self.fc = nn.Linear(1000, 1)

    def forward(self, x):
        x = self.resnet18(x)
        x = self.fc(x)
        x = torch.sigmoid(x)
        return x

# Обучение модели
net = Net()
criterion = nn.BCELoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

for epoch in range(10):
    print(1)
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        print(2)
        inputs, targets = data
        labels = torch.tensor([1 if t[0]['category_id'] == 1 else 0 for t in targets], dtype=torch.float32)
        optimizer.zero_grad()
        outputs = net(inputs)
        loss = criterion(outputs, labels.unsqueeze(1))
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 2000))
        if i % 2000 == 1999:    
            print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0

print('Finished Training')

# Сохранение модели
torch.save(net.state_dict(), 'human_detection_model.pth')


In [None]:
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms
from torch.utils.data import DataLoader
from torch.utils.data.dataset import Dataset

# Проверка доступности GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Подготовка датасета
transform = transforms.Compose([
    transforms.Resize((224, 224)), # изменение размера изображения
    transforms.ToTensor() # преобразование в тензор
])

trainset = torchvision.datasets.CocoDetection(root='C:/Users/lolol/OneDrive/Документы/СOCO Dataset/train2017',
                                 annFile='C:/Users/lolol/OneDrive/Документы/СOCO Dataset/annotations/instances_train2017.json',
                                              transform=transform)

trainloader = torch.utils.data.DataLoader(trainset, batch_size=4,
                                          shuffle=True, num_workers=2)

# Определение модели
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.resnet18 = torchvision.models.resnet18(pretrained=True)
        self.fc = nn.Linear(1000, 1) # один выходной узел для бинарной классификации

    def forward(self, x):
        x = self.resnet18(x)
        x = self.fc(x)
        x = torch.sigmoid(x) # применение сигмоидной функции активации
        return x

net = Net()
net.to(device) # перенос модели на устройство GPU

# Определение функции потерь и оптимизатора
criterion = nn.BCELoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
print(1)
# Обучение модели
for epoch in range(10):
    print(2)
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        print(3)
        inputs, targets = data
        labels = torch.tensor([1 if t[0]['category_id'] == 1 else 0 for t in targets], dtype=torch.float32)
        inputs, labels = inputs.to(device), labels.to(device) # перенос данных на устройство GPU

        optimizer.zero_grad()
        outputs = net(inputs)
        loss = criterion(outputs, labels.unsqueeze(1))
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 2000))
        if i % 2000 == 1999:
            print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0

print('Finished Training')

# Сохранение модели
torch.save(net.state_dict(), 'human_detection_model.pth')


In [None]:
import torch
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.datasets import CocoDetection
from torch.utils.data import DataLoader
import torchvision.transforms as transforms

def collate_fn(batch):
    return tuple(zip(*batch))

# Define the transform to be applied to each image
transform = transforms.Compose([transforms.ToTensor()])

# Load the COCO dataset
train_dataset = CocoDetection(root='C:/Users/lolol/OneDrive/Документы/СOCO Dataset/train2017',
                                annFile='C:/Users/lolol/OneDrive/Документы/СOCO Dataset/annotations/instances_train2017.json',
                                              transform=transform)

# Define the dataloader
train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True, num_workers=0, collate_fn=collate_fn)

# Load the Faster R-CNN model with a ResNet-50 backbone pre-trained on ImageNet
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)

# Replace the model's classification head with a new one that outputs 2 classes (person and background)
num_classes = 2  # person + background
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

# Set the device to train on (GPU if available)
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model.to(device)

# Define the optimizer and learning rate scheduler
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)

# Define the training loop
num_epochs = 10
for epoch in range(num_epochs):
    running_loss = 0.0
    for images, targets in train_loader:
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets] if isinstance(targets, list) else targets

        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())
        running_loss += losses.item()
        
        optimizer.zero_grad()
        losses.backward()
        optimizer.step()
        
    lr_scheduler.step()
    print(f"Epoch {epoch+1} loss: {running_loss/len(train_loader)}")
    
# save the trained model
torch.save(net.state_dict(), 'human_detection_model.pth')
print(f"Model saved")
