In [2]:
import torch
import torch.nn as nn
from torchvision import transforms, models, datasets

In [3]:
class YOLOv1BackBone(nn.Module):
    def __init__(self, num_classes=100):
        super(YOLOv1BackBone, self).__init__()
        self.features = nn.Sequential(
                            nn.Conv2d(in_channels=3, out_channels=64, kernel_size=(7, 7), stride=2, padding=3),
                            nn.LeakyReLU(negative_slope=0.1),
                            nn.MaxPool2d(kernel_size=(2, 2), stride=2),
            
                            nn.Conv2d(in_channels=64, out_channels=192, kernel_size=(3, 3), stride=1, padding=1),
                            nn.LeakyReLU(negative_slope=0.1),
                            nn.MaxPool2d(kernel_size=(2, 2), stride=2),

                            nn.Conv2d(in_channels=192, out_channels=128, kernel_size=(1, 1)),
                            nn.LeakyReLU(negative_slope=0.1),
                            nn.Conv2d(in_channels=128, out_channels=256, kernel_size=(3, 3), padding=1),
                            nn.LeakyReLU(negative_slope=0.1),
                            nn.Conv2d(in_channels=256, out_channels=256, kernel_size=(1, 1)),
                            nn.LeakyReLU(negative_slope=0.1),
                            nn.Conv2d(in_channels=256, out_channels=512, kernel_size=(3, 3), padding=1),
                            nn.LeakyReLU(negative_slope=0.1),
                            nn.MaxPool2d(kernel_size=(2, 2), stride=2),

                            nn.Conv2d(in_channels=512, out_channels=256, kernel_size=(1, 1)),
                            nn.LeakyReLU(negative_slope=0.1),
                            nn.Conv2d(in_channels=256, out_channels=512, kernel_size=(3, 3), padding=1),
                            nn.LeakyReLU(negative_slope=0.1),
                            nn.Conv2d(in_channels=512, out_channels=256, kernel_size=(1, 1)),
                            nn.LeakyReLU(negative_slope=0.1),
                            nn.Conv2d(in_channels=256, out_channels=512, kernel_size=(3, 3), padding=1),
                            nn.LeakyReLU(negative_slope=0.1),
                            nn.Conv2d(in_channels=512, out_channels=256, kernel_size=(1, 1)),
                            nn.LeakyReLU(negative_slope=0.1),
                            nn.Conv2d(in_channels=256, out_channels=512, kernel_size=(3, 3), padding=1),
                            nn.LeakyReLU(negative_slope=0.1),
                            nn.Conv2d(in_channels=512, out_channels=256, kernel_size=(1, 1)),
                            nn.LeakyReLU(negative_slope=0.1),
                            nn.Conv2d(in_channels=256, out_channels=512, kernel_size=(3, 3), padding=1),
                            nn.LeakyReLU(negative_slope=0.1),
                            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=(1, 1)),
                            nn.LeakyReLU(negative_slope=0.1),
                            nn.Conv2d(in_channels=512, out_channels=1024, kernel_size=(3, 3), padding=1),
                            nn.LeakyReLU(negative_slope=0.1),
                            nn.MaxPool2d(kernel_size=(2, 2), stride=2),

                            nn.Conv2d(in_channels=1024, out_channels=512, kernel_size=(1, 1)),
                            nn.LeakyReLU(negative_slope=0.1),
                            nn.Conv2d(in_channels=512, out_channels=1024, kernel_size=(3, 3), padding=1),
                            nn.LeakyReLU(negative_slope=0.1),
                            nn.Conv2d(in_channels=1024, out_channels=512, kernel_size=(1, 1)),
                            nn.LeakyReLU(negative_slope=0.1),
                            nn.Conv2d(in_channels=512, out_channels=1024, kernel_size=(3, 3), padding=1),
                            nn.LeakyReLU(negative_slope=0.1),
                            nn.Conv2d(in_channels=1024, out_channels=1024, kernel_size=(3, 3), padding=1),
                            nn.LeakyReLU(negative_slope=0.1),
                            nn.Conv2d(in_channels=1024, out_channels=1024, kernel_size=(3, 3), stride=2, padding=1),
                            nn.LeakyReLU(negative_slope=0.1),

                            nn.Conv2d(in_channels=1024, out_channels=1024, kernel_size=(3, 3), padding=1),
                            nn.LeakyReLU(negative_slope=0.1),
                            nn.Conv2d(in_channels=1024, out_channels=1024, kernel_size=(3, 3), padding=1),
                            nn.LeakyReLU(negative_slope=0.1)
                        )

        self.classifier = nn.Sequential(
                        nn.Flatten(),
                        nn.Linear(in_features=1024 * 7 * 7, out_features=4096),
                        nn.Dropout(),
                        nn.LeakyReLU(negative_slope=0.1),
                        nn.Linear(in_features=4096, out_features=num_classes)
        )
        
    def forward(self, x):
        x = self.features(x)
        # print(x.shape)
        x = self.classifier(x)
        return x

In [30]:
# initial model
model = YOLOv1BackBone(num_classes=100)

# device (gpu)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

# define loss and optimizer
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(params=model.parameters(), lr=0.0001)


In [31]:
# define data load
transform = transforms.Compose([
    transforms.Resize((448, 448)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.507, 0.487, 0.441], std=[0.267, 0.256, 0.276]),
])

train_dataset = datasets.CIFAR100(
    root='./data',
    train=True,
    transform=transform,
    download=True,   
)

train_loader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=64,
    shuffle=True
)

Files already downloaded and verified


In [33]:
from tqdm import tqdm
epochs = 30

for epoch in tqdm(range(epochs)):
    model.train()
    loss_train = 0.0

    for inputs, labels in train_loader:
        inputs = inputs.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
    
        outputs = model(inputs)
        loss = loss_fn(outputs, labels)
        loss.backward()
        optimizer.step()

        loss_train+=loss.item()

    print(f'Epoch:{epoch+1}/{epochs}, Loss:{loss_train/len(train_loader)}')

  3%|███▉                                                                                                                 | 1/30 [05:29<2:39:22, 329.74s/it]

Epoch:1/30, Loss:3.8631150923726505


  7%|███████▊                                                                                                             | 2/30 [11:00<2:34:09, 330.35s/it]

Epoch:2/30, Loss:3.716121257723445


 10%|███████████▋                                                                                                         | 3/30 [16:31<2:28:48, 330.68s/it]

Epoch:3/30, Loss:3.5658776970470654


 13%|███████████████▌                                                                                                     | 4/30 [22:02<2:23:21, 330.83s/it]

Epoch:4/30, Loss:3.423477333829836


 17%|███████████████████▌                                                                                                 | 5/30 [27:34<2:17:58, 331.14s/it]

Epoch:5/30, Loss:3.2952771192926273


 20%|███████████████████████▍                                                                                             | 6/30 [33:05<2:12:30, 331.27s/it]

Epoch:6/30, Loss:3.2643279779292738


 23%|███████████████████████████▎                                                                                         | 7/30 [38:37<2:07:04, 331.49s/it]

Epoch:7/30, Loss:3.061522433824856


 27%|███████████████████████████████▏                                                                                     | 8/30 [44:09<2:01:30, 331.40s/it]

Epoch:8/30, Loss:2.9173968591348594


 30%|███████████████████████████████████                                                                                  | 9/30 [49:40<1:55:57, 331.33s/it]

Epoch:9/30, Loss:2.7731445232010863


 33%|██████████████████████████████████████▋                                                                             | 10/30 [55:11<1:50:25, 331.30s/it]

Epoch:10/30, Loss:2.642133318554715


 37%|█████████████████████████████████████████▊                                                                        | 11/30 [1:00:45<1:45:09, 332.10s/it]

Epoch:11/30, Loss:2.4594816934422155


 40%|█████████████████████████████████████████████▌                                                                    | 12/30 [1:06:16<1:39:32, 331.82s/it]

Epoch:12/30, Loss:2.278856529451697


 43%|█████████████████████████████████████████████████▍                                                                | 13/30 [1:11:48<1:34:00, 331.79s/it]

Epoch:13/30, Loss:2.073618384730785


 47%|█████████████████████████████████████████████████████▏                                                            | 14/30 [1:17:19<1:28:25, 331.61s/it]

Epoch:14/30, Loss:1.8470302707398945


 50%|█████████████████████████████████████████████████████████                                                         | 15/30 [1:22:51<1:22:55, 331.67s/it]

Epoch:15/30, Loss:1.5902953686769052


 53%|████████████████████████████████████████████████████████████▊                                                     | 16/30 [1:28:22<1:17:21, 331.53s/it]

Epoch:16/30, Loss:1.318342224258901


 57%|████████████████████████████████████████████████████████████████▌                                                 | 17/30 [1:33:53<1:11:48, 331.42s/it]

Epoch:17/30, Loss:1.032118062343439


 60%|████████████████████████████████████████████████████████████████████▍                                             | 18/30 [1:39:24<1:06:16, 331.37s/it]

Epoch:18/30, Loss:0.8173480369627019


 63%|████████████████████████████████████████████████████████████████████████▏                                         | 19/30 [1:44:56<1:00:44, 331.33s/it]

Epoch:19/30, Loss:0.6311470898597137


 67%|█████████████████████████████████████████████████████████████████████████████▎                                      | 20/30 [1:50:28<55:15, 331.54s/it]

Epoch:20/30, Loss:0.5145402897501845


 70%|█████████████████████████████████████████████████████████████████████████████████▏                                  | 21/30 [1:56:00<49:45, 331.68s/it]

Epoch:21/30, Loss:0.42764081478195115


 73%|█████████████████████████████████████████████████████████████████████████████████████                               | 22/30 [2:01:31<44:12, 331.59s/it]

Epoch:22/30, Loss:0.3699827700796182


 77%|████████████████████████████████████████████████████████████████████████████████████████▉                           | 23/30 [2:07:02<38:40, 331.47s/it]

Epoch:23/30, Loss:0.31713133552075956


 80%|████████████████████████████████████████████████████████████████████████████████████████████▊                       | 24/30 [2:12:34<33:08, 331.42s/it]

Epoch:24/30, Loss:0.2861929608513708


 83%|████████████████████████████████████████████████████████████████████████████████████████████████▋                   | 25/30 [2:18:05<27:36, 331.38s/it]

Epoch:25/30, Loss:0.2638852528017729


 87%|████████████████████████████████████████████████████████████████████████████████████████████████████▌               | 26/30 [2:23:36<22:05, 331.26s/it]

Epoch:26/30, Loss:0.23515031166622402


 90%|████████████████████████████████████████████████████████████████████████████████████████████████████████▍           | 27/30 [2:29:07<16:34, 331.35s/it]

Epoch:27/30, Loss:0.21209713578214653


 93%|████████████████████████████████████████████████████████████████████████████████████████████████████████████▎       | 28/30 [2:34:39<11:02, 331.40s/it]

Epoch:28/30, Loss:0.20538147534612958


 97%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏   | 29/30 [2:40:12<05:31, 331.77s/it]

Epoch:29/30, Loss:0.18964939056169194


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 30/30 [2:45:45<00:00, 331.53s/it]

Epoch:30/30, Loss:0.17744083578229103





In [34]:
torch.save(model.state_dict(), 'yolov1_CIFAR100_pretrain_40epochs.pth')

In [4]:
S = 7
C = 20
B = 2
depth = 5 * B + C
model_yolov1 = YOLOv1BackBone(100)
pretrained_weights = torch.load('yolov1_CIFAR100_pretrain_40epochs.pth')
model_yolov1.load_state_dict(pretrained_weights)

  pretrained_weights = torch.load('yolov1_CIFAR100_pretrain_40epochs.pth')


<All keys matched successfully>