# newModel train & evaluation

In [1]:
import matplotlib.pyplot as plt
from torch import nn
import torch.nn.functional as F
import numpy as np
import torch
import os
from tqdm import tqdm as tqdm
import time

# from models.my_model import MyNet, train, evaluate, evaluate_test
from torchvision import datasets, transforms
from data.loaders import get_cifar_loader


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
PATH="data/cifar-10-python.tar.gz"

module_path = os.path.dirname(os.getcwd())
home_path = module_path
figures_path = os.path.join(home_path, 'reports', 'figures')
models_path = os.path.join(home_path, 'reports', 'models')

transform = transforms.Compose( [transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])


# # load dataset from cifar10
trainset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
train_dataloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True, num_workers=2)

validset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
valid_dataloader = torch.utils.data.DataLoader(validset, batch_size=64, shuffle=False, num_workers=2)

testset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
test_dataloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=False, num_workers=2)


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# choose device as cuda
# device_id = device_id
# os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
# device = torch.device("cuda:{}".format(3) if torch.cuda.is_available() else "cpu")
print(device)
print(torch.cuda.get_device_name(3))

module_path = os.path.dirname(os.getcwd())
home_path = module_path
figures_path = os.path.join(home_path, 'reports', 'figures')
models_path = os.path.join(home_path, 'reports', 'models')

Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
cuda
NVIDIA GeForce RTX 3090


In [3]:
def cal_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

In [4]:
class MyNet(nn.Module):
    def __init__(self,num_features,hidden_size,output_size):
        super(MyNet, self).__init__()
        self.conv1 = nn.Conv2d(num_features, 64, 5, 1)
        self.bn1 = nn.BatchNorm2d(64)
        self.conv2 = nn.Conv2d(64, 64, 5, 1)
        self.bn2 = nn.BatchNorm2d(64)
        self.fc1 = nn.Linear(1600, hidden_size)
        self.fc2 = nn.Linear(hidden_size, output_size)
        self.dropout = nn.Dropout(p=0.3)
        
    def forward(self, x):
        # x:1 * 28 * 28
        
        x = self.bn1(self.conv1(x))
        x = F.relu(x) # 20 * 28 * 28
        print(x.shape)
        x = F.max_pool2d(x, 2, 2) # 20 * 14 * 14
        print(x.shape)
        
        x = self.dropout(x)
        x = self.bn2(self.conv2(x))
        x = F.relu(x) # 20 * 10* 10
        print(x.shape)
        x = F.max_pool2d(x, 2, 2) # 50 * 5 * 5
        print(x.shape)
        
        x = x.view(-1, 1600) #reshape
        print(x.shape)
        x1 = F.relu(self.fc1(x))
        # x1 = F.max_pool2d(x1, 2, 2) 
        x1 = self.dropout(x1)
        x1 = self.fc2(x1)
        
        return F.log_softmax(x1, dim = 1) # log probability


class myVGGNet(nn.Module):
    def __init__(self,num_features, hidden_size, output_size):
        super(testNet,self).__init__()
        
        self.layer0 = nn.Sequential(
            nn.Conv2d(num_features,64,3,padding=1),
            nn.Conv2d(64,64,3,padding=1),
            nn.MaxPool2d(2, 2),
            nn.BatchNorm2d(64),
            nn.ReLU()
        )
        
        self.layer1 = nn.Sequential(
            nn.Conv2d(64,128,3,padding=1),
            nn.Conv2d(128, 128, 3,padding=1),
            nn.MaxPool2d(2, 2, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU()
        )
        
        self.layer2 = nn.Sequential(
            nn.Conv2d(128,128, 3,padding=1),
            nn.Conv2d(128, 128, 3,padding=1),
            nn.Conv2d(128, 128, 1,padding=1),
            nn.MaxPool2d(2, 2, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU()
        )
        
        self.layer3 = nn.Sequential(
            nn.Conv2d(128, 256, 3,padding=1),
            nn.Conv2d(256, 256, 3, padding=1),
            nn.Conv2d(256, 256, 1, padding=1),
            nn.MaxPool2d(2, 2, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU()
        )

        self.layer4 = nn.Sequential(
            nn.Conv2d(256, 512, 3, padding=1),
            nn.Conv2d(512, 512, 3, padding=1),
            nn.Conv2d(512, 512, 1, padding=1),
            nn.MaxPool2d(2, 2, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU()
        )

        # self.avgpool = nn.AdaptiveAvgPool2d(output_size=(1, 1))
        
        self.layer5 = nn.Sequential(
            nn.Linear(512*4*4,1024),
            nn.ReLU(),
            nn.Dropout(p=0.3),
            nn.Linear(1024,hidden_size),
            nn.ReLU(),
            nn.Dropout(p=0.3),
            nn.Linear(hidden_size,output_size)
        )
        
        
    def forward(self,x):
        x = self.layer0(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        
        # x = self.avgpool(x)
        x = x.view(-1,512*4*4)
        
        x = self.layer5(x)
        
        return F.log_softmax(x, dim=1)

def train(model, device, train_dataloader, optimizer, epoch, loss_fn):
    model.train()

    for idx, (data, target) in enumerate(train_dataloader):
        data, target = data.to(device), target.to(device)
        preds = model(data) # batch_size * 10
        loss = loss_fn(preds, target)
        # loss = F.nll_loss(preds,target)
        # 前向传播+反向传播+优化
        # loss = F.CrossEntropyLoss(preds, target)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if idx % 1000 == 0:
            print("iteration: {};    Loss: {}".format(idx, loss.item()))

def evaluate(model, device, valid_dataloader,loss_fn, flag):
    model.eval()
    total_loss = 0.
    correct = 0.
    total_len = len(valid_dataloader.dataset)
    with torch.no_grad():
        for idx, (data, target) in enumerate(valid_dataloader):
            
            data, target = data.to(device), target.to(device)
            output = model(data) # batch_size * 1
            total_loss += loss_fn(output, target).item()
            # total_loss += F.nll_loss(output, target, reduction = "sum").item()
            pred = output.argmax(dim = 1)
            correct += pred.eq(target.view_as(pred)).sum().item()
            
    total_loss = total_loss / total_len
    acc = correct/total_len
    if flag == 1:
        print("Accuracy on test set:{}".format(acc)) 
    else:
        print("valid loss:{}, Accuracy:{}".format(total_loss, acc)) 
    return total_loss, acc

In [5]:
# hyper parameter
lr = 0.01
momentum = 0.5
weight_decay = 1e-5
num_features = 3
hidden_size = 1024
output_size = 10
loss_fn = nn.CrossEntropyLoss()

model = MyNet(num_features,hidden_size,output_size).to(device)
#选择一个optimizer
#SGD/ADAM
optimizer1 = torch.optim.SGD(model.parameters(), lr = lr, momentum=momentum,weight_decay=weight_decay)
# optimizer2 = torch.optim.SGD(model.parameters(), lr = 0.001, momentum=momentum, weight_decay=weight_decay)

starttime = time.time()
num_epochs = 50
total_loss = []
acc = []

train(model, device, train_dataloader, optimizer1, 0, loss_fn)
total_loss_0, acc_0 = evaluate(model, device, valid_dataloader,loss_fn, 0)
torch.save(model.state_dict(),"CIFAR10_cnn.pth")    
total_loss.append(total_loss_0)
acc.append(acc_0)

for epoch in range(1,num_epochs):
    print("Training epoch:", epoch)
    # if epoch<20:
    train(model, device, train_dataloader, optimizer1, epoch, loss_fn)
    # else: 
        # train(model, device, train_dataloader, optimizer2, epoch, loss_fn)
    
    total_loss_0, acc_0 = evaluate(model, device, valid_dataloader,loss_fn, 0)
    if total_loss_0 < min(total_loss) and acc_0 > max(acc):
        torch.save(model.state_dict(),"CIFAR10_cnn.pth")
    total_loss.append(total_loss_0)
    acc.append(acc_0)

model_ready = MyNet(num_features,hidden_size,output_size).to(device)
model_ready.load_state_dict(torch.load('CIFAR10_cnn.pth'))
evaluate(model_ready, device, test_dataloader,loss_fn, 1)

endtime = time.time()
time_cost = endtime - starttime
print("Finish! running time: %.8s s" % time_cost)

x1 = range(0, num_epochs)
x2 = range(0, num_epochs)
y1 = acc
y2 = total_loss

torch.Size([64, 64, 28, 28])
torch.Size([64, 64, 14, 14])
torch.Size([64, 64, 10, 10])
torch.Size([64, 64, 5, 5])
torch.Size([64, 1600])


RuntimeError: non-empty 3D or 4D (batch mode) tensor expected for input

In [None]:
# test part
model_ready = testNet(num_features,hidden_size,output_size).to(device)
model_ready.load_state_dict(torch.load('CIFAR10_cnn.pth'))
evaluate(model_ready, device, test_dataloader,loss_fn, 1)

endtime = time.time()
dtime = endtime - starttime
print("Finish! run time: %.8s s" % dtime)

Accuracy on test set:0.8016
Finish! run time: 812.4841 s


In [None]:
model = VGG_A(num_features,hidden_size,output_size).to(device)
print(cal_parameters(model))

10271104


## 画图

In [None]:
# 绘制结果
plt.clf()
plt.subplot(2, 1, 1)
plt.plot(x1, y1, 'o-')
plt.title('valid accuracy vs. epoches')
plt.ylabel('valid accuracy')
plt.subplot(2, 1, 2)
plt.plot(x2, y2, '.-')
plt.xlabel('epoches')
plt.ylabel('valid loss')

plt.savefig("newNet_accuracy_loss.jpg")