In [50]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from sklearn.model_selection import train_test_split
import numpy as np
from tqdm import tqdm
import torch.nn.functional as F

In [65]:
params = {
    'device': torch.device('cuda' if torch.cuda.is_available() else 'cpu'),
    'lr': 0.001,
    'batch': 32,
    'valid_size': 0.2,
    'seed': 41,
    'epoch':30
}

In [106]:
train_set = torchvision.datasets.MNIST(
    root='./data/MNIST',
    train=True,
    download= True,
    transform= transforms.Compose([
        transforms.ToTensor()
    ])
)

In [107]:
test_dataset = torchvision.datasets.MNIST(
    root='./data/MNIST',
    train=False,
    download= True,
    transform= transforms.Compose([
        transforms.ToTensor()
    ])
)

In [108]:
train_dataset,vali_dataset = train_test_split(train_set,test_size=params['valid_size'],random_state=params['seed'],shuffle=True )
print(len(train_dataset), len(vali_dataset), len(test_dataset))

48000 12000 10000


In [109]:
train_loader = torch.utils.data.DataLoader(train_dataset,batch_size=params['batch'])
valid_loader = torch.utils.data.DataLoader(vali_dataset,batch_size=params['batch'])
test_loader = torch.utils.data.DataLoader(test_dataset,batch_size=params['batch'])

### model

In [110]:
from efficientnet_pytorch import EfficientNet

In [167]:
class half_efficientNetb0(nn.Module):
    def __init__(self):
        super(half_efficientNetb0, self).__init__()
        self.backbone = EfficientNet.from_pretrained('efficientnet-b0',in_channels=1,num_classes=10)
        self.layer1 = nn.Sequential(
            self.backbone._conv_stem,
            self.backbone._bn0
        )
        self.layer2 = self.backbone._blocks[0]
        self.layer3 = nn.Sequential(
            self.backbone._blocks[1],
            self.backbone._blocks[2]
        )
        self.layer4 = nn.Sequential(
            self.backbone._blocks[3],
            self.backbone._blocks[4]
        )
        self.avg_pooling = nn.AdaptiveAvgPool2d(output_size=1)
        self.dropout = nn.Dropout(0.2)
        self.fc = nn.Linear(in_features=40, out_features=10, bias=True)
        self.softmax = nn.Softmax(1)
    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = self.avg_pooling(out)
        out = out.flatten(start_dim=1)
        out = self.fc(out)
        out = self.softmax(out)
        return out

In [168]:
model = half_efficientNetb0()

Loaded pretrained weights for efficientnet-b0


In [169]:
from torchsummary import summary
summary(model,(1,28,28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
         ZeroPad2d-1            [-1, 1, 29, 29]               0
         ZeroPad2d-2            [-1, 1, 29, 29]               0
Conv2dStaticSamePadding-3           [-1, 32, 14, 14]             288
Conv2dStaticSamePadding-4           [-1, 32, 14, 14]             288
       BatchNorm2d-5           [-1, 32, 14, 14]              64
       BatchNorm2d-6           [-1, 32, 14, 14]              64
         ZeroPad2d-7           [-1, 32, 16, 16]               0
         ZeroPad2d-8           [-1, 32, 16, 16]               0
Conv2dStaticSamePadding-9           [-1, 32, 14, 14]             288
Conv2dStaticSamePadding-10           [-1, 32, 14, 14]             288
      BatchNorm2d-11           [-1, 32, 14, 14]              64
      BatchNorm2d-12           [-1, 32, 14, 14]              64
MemoryEfficientSwish-13           [-1, 32, 14, 14]               0
MemoryEfficient

In [162]:
def validation(model,criterion, validation_loader):
    model.eval()
    epoch_loss = 0 
    with torch.no_grad():
        for data, target in tqdm(validation_loader):
            data = data.float().to(params['device'])
            target = target.to(params['device'])
            target = F.one_hot(target, num_classes=10).float()
            pred = model(data)
            batch_loss = criterion(pred,target)
            epoch_loss += batch_loss.item()
        return epoch_loss/len(validation_loader)

In [176]:
def train(model, train_loader, valid_loader):
    model = model.to(params['device'])
    best_model = None
    min_val_loss = float('inf')
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr = params['lr'])
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=5, min_lr=1e-8, verbose=True)
    for epoch in range(params['epoch']):
        model.train()
        epoch_loss = 0
        for data, target in tqdm(train_loader):
            data = data.float().to(params['device'])
            target = target.to(params['device'])
            # target = F.one_hot(target, num_classes=10).float()
            pred = model(data)
            # print(pred[0],target[0])
            batch_loss = criterion(pred,target)
            
            batch_loss.backward()
            optimizer.step()
            
            epoch_loss += batch_loss.item()
        
        valid_loss = validation(model, criterion, valid_loader)
        scheduler.step(valid_loss)
        
        print(f'Epoch{epoch+1}, Train_loss: {epoch_loss/len(train_loader):6f}, Validation_loss: {valid_loss:6f}')
        if min_val_loss>valid_loss:
            min_val_loss = valid_loss
            best_model = model
            best_epoch = epoch+1
            torch.save(best_model,'C:/cv_task/best_model.pt')
            print('New best Model!')
    print(f'TRAINING DONE\nBest epoch: {best_epoch} at {min_val_loss:6f}')
    return best_model

In [177]:
best_model = train(model,train_loader,valid_loader)

  0%|          | 0/1500 [00:00<?, ?it/s]

100%|██████████| 1500/1500 [01:58<00:00, 12.62it/s]
100%|██████████| 375/375 [00:09<00:00, 41.03it/s]


Epoch1, Train_loss: 1.565958, Validation_loss: 1.635330
New best Model!


 63%|██████▎   | 948/1500 [01:16<00:44, 12.38it/s]


KeyboardInterrupt: 

### inference

In [None]:
def inference(model, test_loader):
    with torch.no_grad():
        total = 0
        correct = 0
        model.eval()
        for data, target in tqdm(test_loader):
            data = data.float().to(params['device'])
            target = target.to(params['device'])
            pred = model(data)
            pred = torch.max(pred,1)[1]
            print(pred.shape)
            total+=len(target)
            correct+=(pred==target).sum().item()
        print(f'Test accuracy: {correct/total*100:3f}%')

In [None]:
inference(best_model, test_loader)