In [4]:
import torch
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.datasets import CocoDetection
from torchvision.transforms import ToTensor

# Load the COCO dataset
dataset = CocoDetection(root='D:/COCO Dataset/train2017', annFile='D:/COCO Dataset/annotations/instances_train2017.json', transform=ToTensor())

# Load a pre-trained model  
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)

# Replace the classifier with a new one that has 1 output channel (person or not person)
num_classes = 2  # 1 class (person) + background
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

# Move the model to the GPU if available
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model.to(device)

# Run the model on an image
image = dataset[0][0].to(device)
outputs = model([image])

# Print the predicted boxes and labels for each person in the image
for i in range(len(outputs)):
    boxes = outputs[i]['boxes']
    labels = outputs[i]['labels']
    for j in range(len(boxes)):
        if labels[j] == 1:
            print(f'Person {j}: {boxes[j]}')


In [None]:
import torch
import torchvision

# Загрузка предобученной модели
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)


# Сохранение модели на диск
torch.save(model.state_dict(), 'R-CNN.pth')

In [1]:
import torch 
import torchvision 
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor 
from torchvision.datasets import CocoDetection 
from torchvision.transforms import ToTensor 
 
# Load the COCO dataset 
dataset = CocoDetection(root='D:/COCO Dataset/train2017', annFile='D:/COCO Dataset/annotations/instances_train2017.json', transform=ToTensor()) 
 
# Load a pre-trained model   
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True) 
 
# Replace the classifier with a new one that has 1 output channel (person or not person) 
num_classes = 2  # 1 class (person) + background 
in_features = model.roi_heads.box_predictor.cls_score.in_features 
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes) 
 
# Move the model to the GPU if available 
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') 
model.to(device) 
 
# Prepare the image for inference
image, _ = dataset[0]
image = image.unsqueeze(0) # Add batch dimension
image = image.to(device)
 
# Run the model on the image 
model.eval() # Set to evaluation mode
with torch.no_grad():
    outputs = model(image)
 
# Print the predicted boxes and labels for each person in the image 
for i in range(len(outputs)): 
    boxes = outputs[i]['boxes'] 
    labels = outputs[i]['labels'] 
    for j in range(len(boxes)): 
        if labels[j] == 1: 
            print(f'Person {j}: {boxes[j]}')

loading annotations into memory...
Done (t=17.22s)
creating index...
index created!




Person 0: tensor([535.4479,  10.9487, 638.7635, 280.7072])
Person 1: tensor([506.5498, 112.3238, 640.0000, 356.4161])
Person 2: tensor([499.1276,  94.1813, 629.7615, 230.0587])
Person 3: tensor([450.2258,  16.1875, 636.0623, 325.9147])
Person 4: tensor([298.5901, 357.8939, 430.1841, 468.0307])
Person 5: tensor([223.8287,   5.6326, 334.6924,  84.3947])
Person 6: tensor([523.8218, 330.4018, 533.4785, 345.7201])
Person 7: tensor([322.6916, 115.0905, 640.0000, 346.2043])
Person 8: tensor([549.6943, 403.2085, 571.6334, 421.1720])
Person 9: tensor([562.5801, 197.3125, 640.0000, 452.9887])
Person 10: tensor([589.6063,  57.2560, 640.0000, 188.0374])
Person 11: tensor([348.1002,  34.4026, 622.9925, 251.0898])
Person 12: tensor([295.2576,  12.1948, 314.0999,  31.0676])
Person 13: tensor([312.6689, 257.5293, 553.2711, 424.9976])
Person 14: tensor([391.9442,   0.0000, 456.8119,  49.2766])
Person 15: tensor([431.5002, 179.6529, 577.5114, 432.0287])
Person 16: tensor([328.2457, 319.1385, 395.4215, 4

In [None]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.optim as optim

# Загрузка и предобработка данных
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

trainset = torchvision.datasets.CocoDetection(root='./data', annFile='./annotations_trainval2014.json', transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=32, shuffle=True)

# Определение архитектуры нейронной сети
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, 3)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(16, 32, 3)
        self.fc1 = nn.Linear(32 * 6 * 6, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 2)  # Количество классов (person и background)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 32 * 6 * 6)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

net = Net()

# Определение функции потерь и оптимизатора
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

# Обучение модели
for epoch in range(10):  # Количество эпох
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data
        optimizer.zero_grad()
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 2000 == 1999:
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0

print('Обучение завершено!')
