<a href="https://colab.research.google.com/github/SIDED00R/machinelearning/blob/main/Untitled14.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from __future__ import print_function
from torch import nn, optim, cuda
from torch.utils import data
from torchvision import datasets, transforms
import torch.nn.functional as F
import time
import os
from google.colab import drive
drive.mount('resnet')
batch_size=64
device='cuda' if cuda.is_available() else 'cpu'
print(f'traing model on {device}\n{"="*44}')

path2data = '/content/resnet/MyDrive/data'

# if not exists the path, make the directory
if not os.path.exists(path2data):
    os.mkdir(path2data)

# load dataset
train_dataset = datasets.STL10(path2data, split='train', download=True, transform=transforms.ToTensor())
test_dataset = datasets.STL10(path2data, split='test', download=True, transform=transforms.ToTensor())
train_loader=data.DataLoader(dataset=train_dataset,
                              batch_size=batch_size,
                              shuffle=True)
test_loader=data.DataLoader(dataset=test_dataset,
                              batch_size=batch_size,
                              shuffle=False)

class BasicBlock(nn.Module):
    expansion = 1
    def __init__(self, in_channels, out_channels, stride=1):
        super().__init__()

        self.residual_function = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(),
            nn.Conv2d(out_channels, out_channels * BasicBlock.expansion, kernel_size=3, stride=1, padding=1, bias=False),
            nn.BatchNorm2d(out_channels * BasicBlock.expansion),
        )

        self.shortcut = nn.Sequential()

        self.relu = nn.ReLU()

        if stride != 1 or in_channels != BasicBlock.expansion * out_channels:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, out_channels * BasicBlock.expansion, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels * BasicBlock.expansion)
            )

    def forward(self, x):
        x = self.residual_function(x) + self.shortcut(x)
        x = self.relu(x)
        return x


class BottleNeck(nn.Module):
    expansion = 4
    def __init__(self, in_channels, out_channels, stride=1):
        super().__init__()

        self.residual_function = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(),
            nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(),
            nn.Conv2d(out_channels, out_channels * BottleNeck.expansion, kernel_size=1, stride=1, bias=False),
            nn.BatchNorm2d(out_channels * BottleNeck.expansion),
        )

        self.shortcut = nn.Sequential()

        self.relu = nn.ReLU()

        if stride != 1 or in_channels != out_channels * BottleNeck.expansion:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, out_channels*BottleNeck.expansion, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels*BottleNeck.expansion)
            )
            
    def forward(self, x):
        x = self.residual_function(x) + self.shortcut(x)
        x = self.relu(x)
        return x

class ResNet(nn.Module):
    def __init__(self, block, num_block, num_classes=10, init_weights=True):
        '''
        block: 'BasicBlock' or 'BottleNeck'
        num_block: [n, n, n, n]
        '''
        super().__init__()

        self.in_channels=64

        self.conv1 = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        )

        self.conv2_x = self._make_layer(block, 64, num_block[0], 1)
        self.conv3_x = self._make_layer(block, 128, num_block[1], 2)
        self.conv4_x = self._make_layer(block, 256, num_block[2], 2)
        self.conv5_x = self._make_layer(block, 512, num_block[3], 2)

        self.avg_pool = nn.AdaptiveAvgPool2d((1,1))
        self.fc = nn.Linear(512 * block.expansion, num_classes)

        # weights inittialization
        if init_weights:
            self._initialize_weights()

    def _make_layer(self, block, out_channels, num_blocks, stride):
        strides = [stride] + [1] * (num_blocks - 1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_channels, out_channels, stride))
            self.in_channels = out_channels * block.expansion

        return nn.Sequential(*layers)

    def forward(self,x):
        output = self.conv1(x)
        output = self.conv2_x(output)
        x = self.conv3_x(output)
        x = self.conv4_x(x)
        x = self.conv5_x(x)
        x = self.avg_pool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)

model=ResNet(BottleNeck,[3,8,36,3])
model.to(device)
criterion=nn.CrossEntropyLoss()
optimizer=optim.SGD(model.parameters(), lr=0.01, momentum=0.5)
def train(epoch):
  model.train()
  for batch_idx, (data, target) in enumerate(train_loader):
    data,target=data.to(device), target.to(device)
    optimizer.zero_grad()
    ouput=model(data)
    loss=criterion(ouput,target)
    loss.backward()
    optimizer.step()

    if batch_idx%10==0:
      print('train epoch: {} | batch staus: {}/{} ({: .0f}%) | Loss: {: .6f}'.format(epoch,batch_idx*len(data), len(train_loader.dataset),
                                                                                     100.*batch_idx / len(train_loader), loss.item()))
  
def test():
  model.eval()
  test_loss=0
  correct=0
  for data, target in test_loader:
    data, target=data.to(device), target.to(device)
    output=model(data)
    test_loss += criterion(output,target).item()
    pred=output.data.max(1,keepdim=True)[1]
    correct += pred.eq(target.data.view_as(pred)).cpu().sum()

  test_loss/=len(test_loader.dataset)
  print(f'=======\n test set: average loss: {test_loss: .4f}, Accuracy: {correct}/{len(test_loader.dataset)}'
  f'({100. * correct / len(test_loader.dataset):.0f}%)')



if __name__ == '__main__':
    since = time.time()
    for epoch in range(1, 10):
        epoch_start = time.time()
        train(epoch)
        m, s = divmod(time.time() - epoch_start, 60)
        print(f'Training time: {m:.0f}m {s:.0f}s')
        test()
        m, s = divmod(time.time() - epoch_start, 60)
        print(f'Testing time: {m:.0f}m {s:.0f}s')
    m,s=divmod(time.time()-since,60)
    print(f'total time: {m:.0f}m {s: .0f}s \n model was trained on {device}!')

Mounted at resnet
traing model on cuda
Files already downloaded and verified
Files already downloaded and verified


  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


train epoch: 1 | batch staus: 0/5000 ( 0%) | Loss:  2.331322
train epoch: 1 | batch staus: 640/5000 ( 13%) | Loss:  4.031568
train epoch: 1 | batch staus: 1280/5000 ( 25%) | Loss:  2.743551
train epoch: 1 | batch staus: 1920/5000 ( 38%) | Loss:  3.310320
train epoch: 1 | batch staus: 2560/5000 ( 51%) | Loss:  3.203821
train epoch: 1 | batch staus: 3200/5000 ( 63%) | Loss:  3.438390
train epoch: 1 | batch staus: 3840/5000 ( 76%) | Loss:  3.435224
train epoch: 1 | batch staus: 4480/5000 ( 89%) | Loss:  2.590989
Training time: 1m 11s
 test set: average loss:  0.0545, Accuracy: 1377/8000(17%)
Testing time: 1m 46s
train epoch: 2 | batch staus: 0/5000 ( 0%) | Loss:  2.590879
train epoch: 2 | batch staus: 640/5000 ( 13%) | Loss:  2.258070
train epoch: 2 | batch staus: 1280/5000 ( 25%) | Loss:  2.535842
train epoch: 2 | batch staus: 1920/5000 ( 38%) | Loss:  2.247748
train epoch: 2 | batch staus: 2560/5000 ( 51%) | Loss:  2.297128
train epoch: 2 | batch staus: 3200/5000 ( 63%) | Loss:  2.09169

In [10]:
from __future__ import print_function
from torch import nn, optim, cuda
from torch.utils import data
from torchvision import datasets, transforms
import torch.nn.functional as F
import time
import os
from google.colab import drive
from google.colab import drive
drive.mount('inceptionv4')
batch_size=64
device='cuda' if cuda.is_available() else 'cpu'
print(f'traing model on {device}\n{"="*44}')
path2data = '/content/inceptionv4/MyDrive/data'

# if not exists the path, make the directory
if not os.path.exists(path2data):
    os.mkdir(path2data)

train_dataset = datasets.STL10(path2data, split='train', download=True, transform=transforms.ToTensor())
test_dataset = datasets.STL10(path2data, split='test', download=True, transform=transforms.ToTensor())
train_loader=data.DataLoader(dataset=train_dataset,
                              batch_size=batch_size,
                              shuffle=True)
test_loader=data.DataLoader(dataset=test_dataset,
                              batch_size=batch_size,
                              shuffle=False)
class BasicConv2d(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, **kwargs):
        super().__init__()

        # bias=Fasle, because BN after conv includes bias.
        self.conv = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size, bias=False, **kwargs),
            nn.BatchNorm2d(out_channels),
            nn.ReLU()
        )

    def forward(self, x):
        x = self.conv(x)
        return x


class Stem(nn.Module):
    def __init__(self):
        super().__init__()

        self.conv1 = nn.Sequential(
            BasicConv2d(3, 32, 3, stride=2, padding=0), # 149 x 149 x 32
            BasicConv2d(32, 32, 3, stride=1, padding=0), # 147 x 147 x 32
            BasicConv2d(32, 64, 3, stride=1, padding=1), # 147 x 147 x 64 
        )

        self.branch3x3_conv = BasicConv2d(64, 96, 3, stride=2, padding=0) # 73x73x96

        #  kernel_size=4: 피쳐맵 크기 73, kernel_size=3: 피쳐맵 크기 74
        self.branch3x3_pool = nn.MaxPool2d(4, stride=2, padding=1) # 73x73x64

        self.branch7x7a = nn.Sequential(
            BasicConv2d(160, 64, 1, stride=1, padding=0),
            BasicConv2d(64, 96, 3, stride=1, padding=0)
        ) # 71x71x96

        self.branch7x7b = nn.Sequential(
            BasicConv2d(160, 64, 1, stride=1, padding=0),
            BasicConv2d(64, 64, (7,1), stride=1, padding=(3,0)),
            BasicConv2d(64, 64, (1,7), stride=1, padding=(0,3)),
            BasicConv2d(64, 96, 3, stride=1, padding=0)
        ) # 71x71x96

        self.branchpoola = BasicConv2d(192, 192, 3, stride=2, padding=0) # 35x35x192

        #  kernel_size=4: 피쳐맵 크기 73, kernel_size=3: 피쳐맵 크기 74
        self.branchpoolb = nn.MaxPool2d(4, 2, 1) # 35x35x192


    def forward(self, x):
        x = self.conv1(x)
        x = torch.cat((self.branch3x3_conv(x), self.branch3x3_pool(x)), dim=1)
        x = torch.cat((self.branch7x7a(x), self.branch7x7b(x)), dim=1)
        x = torch.cat((self.branchpoola(x), self.branchpoolb(x)), dim=1)
        return x


class Inception_Resnet_A(nn.Module):
    def __init__(self, in_channels):
        super().__init__()

        self.branch1x1 = BasicConv2d(in_channels, 32, 1, stride=1, padding=0)

        self.branch3x3 = nn.Sequential(
            BasicConv2d(in_channels, 32, 1, stride=1, padding=0),
            BasicConv2d(32, 32, 3, stride=1, padding=1)
        )

        self.branch3x3stack = nn.Sequential(
            BasicConv2d(in_channels, 32, 1, stride=1, padding=0),
            BasicConv2d(32, 48, 3, stride=1, padding=1),
            BasicConv2d(48, 64, 3, stride=1, padding=1)
        )
        
        self.reduction1x1 = nn.Conv2d(128, 384, 1, stride=1, padding=0)
        self.shortcut = nn.Conv2d(in_channels, 384, 1, stride=1, padding=0)
        self.bn = nn.BatchNorm2d(384)
        self.relu = nn.ReLU()

    def forward(self, x):
        x_shortcut = self.shortcut(x)
        x = torch.cat((self.branch1x1(x), self.branch3x3(x), self.branch3x3stack(x)), dim=1)
        x = self.reduction1x1(x)
        x = self.bn(x_shortcut + x)
        x = self.relu(x)
        return x


class Inception_Resnet_B(nn.Module):
    def __init__(self, in_channels):
        super().__init__()

        self.branch1x1 = BasicConv2d(in_channels, 192, 1, stride=1, padding=0)
        self.branch7x7 = nn.Sequential(
            BasicConv2d(in_channels, 128, 1, stride=1, padding=0),
            BasicConv2d(128, 160, (1,7), stride=1, padding=(0,3)),
            BasicConv2d(160, 192, (7,1), stride=1, padding=(3,0))
        )

        self.reduction1x1 = nn.Conv2d(384, 1152, 1, stride=1, padding=0)
        self.shortcut = nn.Conv2d(in_channels, 1152, 1, stride=1, padding=0)
        self.bn = nn.BatchNorm2d(1152)
        self.relu = nn.ReLU()

    def forward(self, x):
        x_shortcut = self.shortcut(x)
        x = torch.cat((self.branch1x1(x), self.branch7x7(x)), dim=1)
        x = self.reduction1x1(x) * 0.1
        x = self.bn(x + x_shortcut)
        x = self.relu(x)
        return x


class Inception_Resnet_C(nn.Module):
    def __init__(self, in_channels):
        super().__init__()

        self.branch1x1 = BasicConv2d(in_channels, 192, 1, stride=1, padding=0)
        self.branch3x3 = nn.Sequential(
            BasicConv2d(in_channels, 192, 1, stride=1, padding=0),
            BasicConv2d(192, 224, (1,3), stride=1, padding=(0,1)),
            BasicConv2d(224, 256, (3,1), stride=1, padding=(1,0))
        )

        self.reduction1x1 = nn.Conv2d(448, 2144, 1, stride=1, padding=0)
        self.shortcut = nn.Conv2d(in_channels, 2144, 1, stride=1, padding=0) # 2144
        self.bn = nn.BatchNorm2d(2144)
        self.relu = nn.ReLU()

    def forward(self, x):
        x_shortcut = self.shortcut(x)
        x = torch.cat((self.branch1x1(x), self.branch3x3(x)), dim=1)
        x = self.reduction1x1(x) * 0.1
        x = self.bn(x_shortcut + x)
        x = self.relu(x)
        return x

    
class ReductionA(nn.Module):
    def __init__(self, in_channels, k, l, m, n):
        super().__init__()

        self.branchpool = nn.MaxPool2d(3, 2)
        self.branch3x3 = BasicConv2d(in_channels, n, 3, stride=2, padding=0)
        self.branch3x3stack = nn.Sequential(
            BasicConv2d(in_channels, k, 1, stride=1, padding=0),
            BasicConv2d(k, l, 3, stride=1, padding=1),
            BasicConv2d(l, m, 3, stride=2, padding=0)
        )

        self.output_channels = in_channels + n + m

    def forward(self, x):
        x = torch.cat((self.branchpool(x), self.branch3x3(x), self.branch3x3stack(x)), dim=1)
        return x


class ReductionB(nn.Module):
    def __init__(self, in_channels):
        super().__init__()

        self.branchpool = nn.MaxPool2d(3, 2)
        self.branch3x3a = nn.Sequential(
            BasicConv2d(in_channels, 256, 1, stride=1, padding=0),
            BasicConv2d(256, 384, 3, stride=2, padding=0)
        )
        self.branch3x3b = nn.Sequential(
            BasicConv2d(in_channels, 256, 1, stride=1, padding=0),
            BasicConv2d(256, 288, 3, stride=2, padding=0)
        )
        self.branch3x3stack = nn.Sequential(
            BasicConv2d(in_channels, 256, 1, stride=1, padding=0),
            BasicConv2d(256, 288, 3, stride=1, padding=1),
            BasicConv2d(288, 320, 3, stride=2, padding=0)
        )

    def forward(self, x):
        x = torch.cat((self.branchpool(x), self.branch3x3a(x), self.branch3x3b(x), self.branch3x3stack(x)), dim=1)
        return x


class InceptionResNetV2(nn.Module):
    def __init__(self, A, B, C, k=256, l=256, m=384, n=384, num_classes=10, init_weights=True):
        super().__init__()
        blocks = []
        blocks.append(Stem())
        for i in range(A):
            blocks.append(Inception_Resnet_A(384))
        blocks.append(ReductionA(384, k, l, m, n))
        for i in range(B):
            blocks.append(Inception_Resnet_B(1152))
        blocks.append(ReductionB(1152))
        for i in range(C):
            blocks.append(Inception_Resnet_C(2144))

        self.features = nn.Sequential(*blocks)

        self.avgpool = nn.AdaptiveAvgPool2d((1,1))
        # drop out
        self.dropout = nn.Dropout2d(0.2)
        self.linear = nn.Linear(2144, num_classes)

        # weights inittialization
        if init_weights:
            self._initialize_weights()

    def forward(self, x):
        x = self.features(x)
        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.dropout(x)
        x = self.linear(x)
        return x

    # define weight initialization function
    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)


model = InceptionResNetV2(10, 20, 10).to(device)
model.to(device)
criterion=nn.CrossEntropyLoss()
optimizer=optim.SGD(model.parameters(), lr=0.01, momentum=0.5)
def train(epoch):
  model.train()
  for batch_idx, (data, target) in enumerate(train_loader):
    data,target=data.to(device), target.to(device)
    optimizer.zero_grad()
    ouput=model(data)
    loss=criterion(ouput,target)
    loss.backward()
    optimizer.step()

    if batch_idx%10==0:
      print('train epoch: {} | batch staus: {}/{} ({: .0f}%) | Loss: {: .6f}'.format(epoch,batch_idx*len(data), len(train_loader.dataset),
                                                                                     100.*batch_idx / len(train_loader), loss.item()))
  
def test():
  model.eval()
  test_loss=0
  correct=0
  for data, target in test_loader:
    data, target=data.to(device), target.to(device)
    output=model(data)
    test_loss += criterion(output,target).item()
    pred=output.data.max(1,keepdim=True)[1]
    correct += pred.eq(target.data.view_as(pred)).cpu().sum()

  test_loss/=len(test_loader.dataset)
  print(f'=======\n test set: average loss: {test_loss: .4f}, Accuracy: {correct}/{len(test_loader.dataset)}'
  f'({100. * correct / len(test_loader.dataset):.0f}%)')



if __name__ == '__main__':
    since = time.time()
    for epoch in range(1, 8):
        epoch_start = time.time()
        train(epoch)
        m, s = divmod(time.time() - epoch_start, 60)
        print(f'Training time: {m:.0f}m {s:.0f}s')
        test()
        m, s = divmod(time.time() - epoch_start, 60)
        print(f'Testing time: {m:.0f}m {s:.0f}s')
    m,s=divmod(time.time()-since,60)
    print(f'total time: {m:.0f}m {s: .0f}s \n model was trained on {device}!')

Drive already mounted at inceptionv4; to attempt to forcibly remount, call drive.mount("inceptionv4", force_remount=True).
traing model on cuda
Files already downloaded and verified
Files already downloaded and verified


RuntimeError: ignored

In [6]:
from __future__ import print_function
from torch import nn, optim, cuda
from torch.utils import data
from torchvision import datasets, transforms
import torch.nn.functional as F
import time
import torch
import os
from google.colab import drive
from google.colab import drive
drive.mount('densenet')
batch_size=64
device='cuda' if cuda.is_available() else 'cpu'
print(f'traing model on {device}\n{"="*44}')

path2data = '/content/densenet/MyDrive/data'

# if not exists the path, make the directory
if not os.path.exists(path2data):
    os.mkdir(path2data)

train_dataset = datasets.STL10(path2data, split='train', download=True, transform=transforms.ToTensor())
test_dataset = datasets.STL10(path2data, split='test', download=True, transform=transforms.ToTensor())
train_loader=data.DataLoader(dataset=train_dataset,
                              batch_size=batch_size,
                              shuffle=True)
test_loader=data.DataLoader(dataset=test_dataset,
                              batch_size=batch_size,
                              shuffle=False)
class BottleNeck(nn.Module):
    def __init__(self, in_channels, growth_rate):
        super().__init__()
        inner_channels = 4 * growth_rate

        self.residual = nn.Sequential(
            nn.BatchNorm2d(in_channels),
            nn.ReLU(),
            nn.Conv2d(in_channels, inner_channels, 1, stride=1, padding=0, bias=False),
            nn.BatchNorm2d(inner_channels),
            nn.ReLU(),
            nn.Conv2d(inner_channels, growth_rate, 3, stride=1, padding=1, bias=False)
        )

        self.shortcut = nn.Sequential()

    def forward(self, x):
        return torch.cat([self.shortcut(x), self.residual(x)], 1)


# Transition Block: reduce feature map size and number of channels
class Transition(nn.Module):
    def __init__(self, in_channels, out_channels):
        super().__init__()

        self.down_sample = nn.Sequential(
            nn.BatchNorm2d(in_channels),
            nn.ReLU(),
            nn.Conv2d(in_channels, out_channels, 1, stride=1, padding=0, bias=False),
            nn.AvgPool2d(2, stride=2)
        )

    def forward(self, x):
        return self.down_sample(x)

# DenseNet
class DenseNet(nn.Module):
    def __init__(self, nblocks, growth_rate=12, reduction=0.5, num_classes=10, init_weights=True):
        super().__init__()

        self.growth_rate = growth_rate
        inner_channels = 2 * growth_rate # output channels of conv1 before entering Dense Block

        self.conv1 = nn.Sequential(
            nn.Conv2d(3, inner_channels, 7, stride=2, padding=3),
            nn.MaxPool2d(3, 2, padding=1)
        )

        self.features = nn.Sequential()

        for i in range(len(nblocks)-1):
            self.features.add_module('dense_block_{}'.format(i), self._make_dense_block(nblocks[i], inner_channels))
            inner_channels += growth_rate * nblocks[i]
            out_channels = int(reduction * inner_channels)
            self.features.add_module('transition_layer_{}'.format(i), Transition(inner_channels, out_channels))
            inner_channels = out_channels 
        
        self.features.add_module('dense_block_{}'.format(len(nblocks)-1), self._make_dense_block(nblocks[len(nblocks)-1], inner_channels))
        inner_channels += growth_rate * nblocks[len(nblocks)-1]
        self.features.add_module('bn', nn.BatchNorm2d(inner_channels))
        self.features.add_module('relu', nn.ReLU())

        self.avg_pool = nn.AdaptiveAvgPool2d((1,1))
        self.linear = nn.Linear(inner_channels, num_classes)

        # weight initialization
        if init_weights:
            self._initialize_weights()
    
    def forward(self, x):
        x = self.conv1(x)
        x = self.features(x)
        x = self.avg_pool(x)
        x = x.view(x.size(0), -1)
        x = self.linear(x)
        return x

    def _make_dense_block(self, nblock, inner_channels):
        dense_block = nn.Sequential()
        for i in range(nblock):
            dense_block.add_module('bottle_neck_layer_{}'.format(i), BottleNeck(inner_channels, self.growth_rate))
            inner_channels += self.growth_rate
        return dense_block

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)

def DenseNet_121():
    return DenseNet([6, 12, 24, 6])
model = DenseNet_121()
model.to(device)
criterion=nn.CrossEntropyLoss()
optimizer=optim.SGD(model.parameters(), lr=0.01, momentum=0.5)
def train(epoch):
  model.train()
  for batch_idx, (data, target) in enumerate(train_loader):
    data,target=data.to(device), target.to(device)
    optimizer.zero_grad()
    ouput=model(data)
    loss=criterion(ouput,target)
    loss.backward()
    optimizer.step()

    if batch_idx%10==0:
      print('train epoch: {} | batch staus: {}/{} ({: .0f}%) | Loss: {: .6f}'.format(epoch,batch_idx*len(data), len(train_loader.dataset),
                                                                                     100.*batch_idx / len(train_loader), loss.item()))
  
def test():
  model.eval()
  test_loss=0
  correct=0
  for data, target in test_loader:
    data, target=data.to(device), target.to(device)
    output=model(data)
    test_loss += criterion(output,target).item()
    pred=output.data.max(1,keepdim=True)[1]
    correct += pred.eq(target.data.view_as(pred)).cpu().sum()

  test_loss/=len(test_loader.dataset)
  print(f'=======\n test set: average loss: {test_loss: .4f}, Accuracy: {correct}/{len(test_loader.dataset)}'
  f'({100. * correct / len(test_loader.dataset):.0f}%)')



if __name__ == '__main__':
    since = time.time()
    for epoch in range(1, 10):
        epoch_start = time.time()
        train(epoch)
        m, s = divmod(time.time() - epoch_start, 60)
        print(f'Training time: {m:.0f}m {s:.0f}s')
        test()
        m, s = divmod(time.time() - epoch_start, 60)
        print(f'Testing time: {m:.0f}m {s:.0f}s')
    m,s=divmod(time.time()-since,60)
    print(f'total time: {m:.0f}m {s: .0f}s \n model was trained on {device}!')

Drive already mounted at densenet; to attempt to forcibly remount, call drive.mount("densenet", force_remount=True).
traing model on cuda
Files already downloaded and verified
Files already downloaded and verified
train epoch: 1 | batch staus: 0/5000 ( 0%) | Loss:  2.308819
train epoch: 1 | batch staus: 640/5000 ( 13%) | Loss:  2.285026
train epoch: 1 | batch staus: 1280/5000 ( 25%) | Loss:  2.239379
train epoch: 1 | batch staus: 1920/5000 ( 38%) | Loss:  2.206953
train epoch: 1 | batch staus: 2560/5000 ( 51%) | Loss:  2.212089
train epoch: 1 | batch staus: 3200/5000 ( 63%) | Loss:  2.165587
train epoch: 1 | batch staus: 3840/5000 ( 76%) | Loss:  2.131694
train epoch: 1 | batch staus: 4480/5000 ( 89%) | Loss:  2.056072
Training time: 0m 13s
 test set: average loss:  0.0324, Accuracy: 2179/8000(27%)
Testing time: 0m 24s
train epoch: 2 | batch staus: 0/5000 ( 0%) | Loss:  2.066060
train epoch: 2 | batch staus: 640/5000 ( 13%) | Loss:  2.033450
train epoch: 2 | batch staus: 1280/5000 ( 25