In [14]:
import torch
import torch.nn as nn
import torchvision
from torchvision import datasets
from torch.utils.data import Dataset
from torchvision.transforms import transforms
from torch.utils.data import DataLoader

In [15]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.4914, 0.4822, 0.4465],
                         std=[0.2023, 0.1994, 0.2010])
])

NUM_CLASSES = 10

train_data = datasets.CIFAR10(
    root='./data',
    train=True,
    download=True,
    transform=transform
)

test_data = datasets.CIFAR10(
    root='./data',
    train=False,
    download=True,
    transform=transform
)

train_dataloader = DataLoader(train_data, batch_size=32, shuffle=True)
test_dataloader = DataLoader(test_data, batch_size=32, shuffle=False)


device = ('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Device: {device}, Img: {train_data[0][0].size()}")

Files already downloaded and verified
Files already downloaded and verified
Device: cuda, Img: torch.Size([3, 32, 32])


In [16]:
class BottleNeck(nn.Module):
    expansion = 4

    def __init__(self, in_channels, mid_channels, stride=1, downsample=None):
        super().__init__()
        out_channels = BottleNeck.expansion * mid_channels

        self.conv1 = nn.Conv2d(in_channels, mid_channels, 1, stride=stride, bias=False)
        self.bn1 = nn.BatchNorm2d(mid_channels)
        self.conv2 = nn.Conv2d(mid_channels, mid_channels, 3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(mid_channels)
        self.conv3 = nn.Conv2d(mid_channels, out_channels, 1, stride=1, bias=False)
        self.bn3 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)

        self.downsample = downsample

    def forward(self, x):
        identity = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)

        out = self.conv3(out)
        out = self.bn3(out)

        if self.downsample:
            identity = self.downsample(x)

        out += identity
        out = self.relu(out)

        return out

In [17]:
class ResNet50(nn.Module):
    def __init__(self):
        super().__init__()

        self.initial = nn.Sequential(
            nn.Conv2d(3, 64, 7, stride=2, padding=3, bias=False),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(3, stride=2, padding=1),
        )

        self.layer2 = self._make_layer(64, blocks=3, stride=1)
        self.layer3 = self._make_layer(128, blocks=4, stride=2)
        self.layer4 = self._make_layer(256, blocks=6, stride=2)
        self.layer5 = self._make_layer(512, blocks=3, stride=2)

        self.avgpool = nn.AdaptiveAvgPool2d((1,1))
        self.fc = nn.Linear(512 * BottleNeck.expansion, NUM_CLASSES)

    def _make_layer(self, mid_channels, blocks, stride):
        layers = []
        in_channels = self._in_channels if hasattr(self, '_in_channels') else 64
        out_channels = mid_channels * BottleNeck.expansion

        # Downsample для первого блока
        downsample = None
        if stride != 1 or in_channels != out_channels:
            downsample = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels)
            )
        layers.append(BottleNeck(in_channels, mid_channels, stride=stride, downsample=downsample))

        # Обновляем число входных каналов
        self._in_channels = out_channels
        # Последующие блоки
        for _ in range(1, blocks):
            layers.append(BottleNeck(self._in_channels, mid_channels, stride=1, downsample=None))

        return nn.Sequential(*layers)



    def forward(self, x):
        x = self.initial(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = self.layer5(x)
        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x
    
resnet50 = ResNet50().to(device)

In [18]:
crit = nn.CrossEntropyLoss()
optim = torch.optim.Adam(resnet50.parameters(), lr=1e-3)

In [19]:
for epoch in range(5):
    running_loss = 0
    print(f"epoch: {epoch}")

    for i, data in enumerate(train_dataloader):
        inputs, lables = data
        inputs = inputs.to(device)
        lables = lables.to(device)

        optim.zero_grad()
        outputs = resnet50(inputs)

        loss = crit(outputs, lables)
        loss.backward()
        optim.step()

        running_loss += loss.item()
        if i % 2000 == 1999:
            print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.3f}')
            running_loss = 0.0

print('Finished Training')

epoch: 0
epoch: 1
epoch: 2
epoch: 3
epoch: 4
Finished Training


In [20]:
correct = 0
total = 0

with torch.no_grad():
    for images, labels in test_dataloader:
        images = images.to(device)
        labels = labels.to(device)

        outputs = resnet50(images)

        _, pred = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (pred == labels).sum().item()
        
print(f'Accuracy on test data: {100 * correct / total:.2f}%')

Accuracy on test data: 55.92%
