<a href="https://colab.research.google.com/github/KennyThinh/pytorch/blob/main/test_deterministic.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [42]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as f
from torch.utils.data import DataLoader
import torchvision
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import numpy as np
import random
import os

In [43]:
# for deterministic
seed = 42
torch.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)

# for using cuda
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

In [44]:
# for training
trial_num = 1000
num_classes = 2
num_epochs = 10
batch_size = 32
learning_rate = 0.00001 #0.000001

In [45]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [46]:
if not os.path.exists(f'checkpoints{trial_num}'):
  os.mkdir(f'checkpoints{trial_num}')

def save_checkpoint(state, epoch):
    print("=> Saving checkpoint")
    torch.save(state, "checkpoints{0}/checkpoint{1}.pth.tar".format(trial_num,epoch))

def load_checkpoint(model, optimizer, epoch_to_load):
    print(f"=> Loading checkpoint {epoch_to_load}")
    checkpoint = torch.load(f'checkpoints{trial_num}/checkpoint{epoch_to_load}.pth.tar')
    model.load_state_dict(checkpoint["state_dict"])
    optimizer.load_state_dict(checkpoint["optimizer"])
    return model, optimizer

In [47]:
#check acc on train and test
def check_train(loader, model):
  num_correct = 0
  num_samples = 0
  model.eval() #inform that we are going to evaluate
  with torch.no_grad():
    for x, y in loader:
      x = x.to(device)
      y = y.to(device)

      x = x.reshape(x.shape[0], -1)
      scores = model(x) #dim (64,10)      
      _, predictions = torch.max(scores, dim=1) #find the max among 10 outputs, so the dim is 1
      #predictions ([64])
      num_correct += (predictions==y).sum()
      num_samples += predictions.size(0) #each time is 64 examples
    print(f'--Train Acc {float(num_correct)/float(num_samples)*100:.2f}({num_correct}/{num_samples})')

#check acc on train and test
def check_val(loader, model):
  num_correct = 0
  num_samples = 0
  losses = []
  model.eval() #inform that we are going to evaluate
  with torch.no_grad():
    for x, y in loader:
      x = x.to(device)
      y = y.to(device)

      x = x.reshape(x.shape[0], -1)
      scores = model(x) #dim (64,10)      
      loss = criterion(scores, y)
      losses.append(loss)

      _, predictions = torch.max(scores, dim=1) #find the max among 10 outputs, so the dim is 1
      num_correct += (predictions==y).sum()
      num_samples += predictions.size(0) #each time is 64 examples
    print(f'--Val Loss: {sum(losses)/len(losses)}')
    print(f'--Val Acc: {float(num_correct)/float(num_samples)*100:.2f}({num_correct}/{num_samples})')

In [48]:
# create fully connected network
class NN(nn.Module):
  def __init__(self, input_size, num_classes):
    super(NN, self).__init__()
    self.fc1 = nn.Linear(input_size, 50)
    self.fc2 = nn.Linear(50, num_classes)
  
  def forward(self, x):
    x = f.relu(self.fc1(x))
    x = self.fc2(x)
    return x

#test
model = NN(768, 10)
x = torch.rand(64,768)
print(model(x).shape)


torch.Size([64, 10])


In [49]:

# Transform the data to torch tensors and normalize it 
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307), ((0.3081)))])

# Prepare training set and testing set
trainset = torchvision.datasets.MNIST('mnist', train=True, download=True, transform=transform)
testset = torchvision.datasets.MNIST('mnist', train=False, download=True, transform=transform)



# Shuffle the indices
indices = np.arange(60000)
np.random.shuffle(indices)

# Build the train loader
train_loader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=False, sampler=torch.utils.data.SubsetRandomSampler(indices[:55000])) #SubsetRandomSampler and Shuffle=True are mutual exclusive

# Build the validation loader
val_loader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=False, sampler=torch.utils.data.SubsetRandomSampler(indices[55000:]))

# Build the test loader
test_loader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=False, num_workers=0) 


# Compute the shape of the training set and testing set
trainset_shape = train_loader.dataset.train_data.shape
testset_shape = test_loader.dataset.test_data.shape

# Print the computed shapes
print(trainset_shape, testset_shape)


torch.Size([60000, 28, 28]) torch.Size([10000, 28, 28])




In [54]:
model = NN(784, 10).to(device)

# Instantiate the cross-entropy loss
criterion = nn.CrossEntropyLoss()

# Instantiate the Adam optimizer
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [51]:
print(model)

NN(
  (fc1): Linear(in_features=784, out_features=50, bias=True)
  (fc2): Linear(in_features=50, out_features=10, bias=True)
)


In [52]:
#train 
for epoch in range(num_epochs):
  losses = []
  for batch_idx, (data, target) in enumerate(train_loader):
    data = data.to(device)
    target = target.to(device)
    
    data = data.reshape(data.shape[0], -1)        
    
    #forward
    scores = model(data)
    loss = criterion(scores, target)

    #backward
    optimizer.zero_grad() #to clear out gradients of last time
    loss.backward()
    losses.append(loss.item())

    #gradient to update params
    optimizer.step()

  print(f"Epoch {epoch}----------------")
  print(f"--Train Loss: {sum(losses)/len(losses)}")  
  check_train(train_loader, model)  
  check_val(val_loader, model)
  checkpoint = {"state_dict": model.state_dict(), "optimizer": optimizer.state_dict()}
  save_checkpoint(checkpoint, epoch)  

Epoch 0----------------
--Train Loss: 1.8487000964408697
--Train Acc 69.72(38348/55000)
--Val Loss: 1.4167749881744385
--Val Acc: 70.24(3512/5000)
=> Saving checkpoint
Epoch 1----------------
--Train Loss: 1.1640800999347554
--Train Acc 79.72(43848/55000)
--Val Loss: 0.9441763162612915
--Val Acc: 80.20(4010/5000)
=> Saving checkpoint
Epoch 2----------------
--Train Loss: 0.8336471954057383
--Train Acc 84.21(46318/55000)
--Val Loss: 0.7175724506378174
--Val Acc: 84.52(4226/5000)
=> Saving checkpoint
Epoch 3----------------
--Train Loss: 0.6641962762835414
--Train Acc 86.22(47423/55000)
--Val Loss: 0.5964516997337341
--Val Acc: 86.38(4319/5000)
=> Saving checkpoint
Epoch 4----------------
--Train Loss: 0.5653504361939985
--Train Acc 87.37(48053/55000)
--Val Loss: 0.5161192417144775
--Val Acc: 87.56(4378/5000)
=> Saving checkpoint
Epoch 5----------------
--Train Loss: 0.5024099657355353
--Train Acc 88.19(48504/55000)
--Val Loss: 0.46305254101753235
--Val Acc: 88.44(4422/5000)
=> Saving ch

In [53]:
for epoch in range(9, 1, -1):
  model, optimizer = load_checkpoint(model, optimizer, epoch)
  check_train(train_loader, model)
  # print("Checking accuracy on Validation Set")
  check_val(val_loader, model)
  # print("Checking accuracy on Test Set")
  # check_val(test_loader, model)

=> Loading checkpoint 9
--Train Acc 89.82(49399/55000)
--Val Loss: 0.3650217652320862
--Val Acc: 90.08(4504/5000)
=> Loading checkpoint 8
--Train Acc 89.49(49222/55000)
--Val Loss: 0.37907713651657104
--Val Acc: 89.74(4487/5000)
=> Loading checkpoint 7
--Train Acc 89.12(49018/55000)
--Val Loss: 0.40356743335723877
--Val Acc: 89.38(4469/5000)
=> Loading checkpoint 6
--Train Acc 88.72(48795/55000)
--Val Loss: 0.4272948205471039
--Val Acc: 88.98(4449/5000)
=> Loading checkpoint 5
--Train Acc 88.19(48504/55000)
--Val Loss: 0.46529287099838257
--Val Acc: 88.44(4422/5000)
=> Loading checkpoint 4
--Train Acc 87.37(48053/55000)
--Val Loss: 0.5196802616119385
--Val Acc: 87.56(4378/5000)
=> Loading checkpoint 3
--Train Acc 86.22(47423/55000)
--Val Loss: 0.5940357446670532
--Val Acc: 86.38(4319/5000)
=> Loading checkpoint 2
--Train Acc 84.21(46318/55000)
--Val Loss: 0.7181698679924011
--Val Acc: 84.52(4226/5000)


PP o tren 

*   Accracy tren tap val van giu nguyen
*   Loss thay doi
*   Can test speed, co ve se lau hon ben duoi
*   Moi lan run gia tri loss se khac nhau, acc cung khac nhau --> co ve may cai seed khong hoat dong tot. Tuy nhien su khac nhau la ko nhieu, vd loss 0.66 --> 0.67...

In [55]:
data_loader = {"train": train_loader, "val": val_loader}
dataset_sizes = {"train": 55000, "val": 5000}
model = NN(784, 10).to(device)

# Instantiate the cross-entropy loss
criterion = nn.CrossEntropyLoss()

# Instantiate the Adam optimizer
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
import pandas as pd
df = pd.DataFrame(columns=["epoch", "train_loss", "train_acc", "val_loss", "val_acc"])
for epoch in range(num_epochs):
  print('Epoch {}/{}'.format(epoch, num_epochs-1))
  print('-' * 10)
  epoch_loss = 0
  epoch_acc = 0

  for phase in ["train", "val"]:
      if phase == "train":
          model.train()
      else:
          model.eval()
      current_loss = 0.0
      current_corrects = 0

      for batch_idx, (data, target) in enumerate(data_loader[phase]):
          data = data.to(device)
          target = target.to(device)
          data = data.reshape(data.shape[0], -1)  
          
          # forward
          scores = model(data)
          _, predictions = scores.max(1)
          loss = criterion(scores, target)
          
          if phase == "train":
              optimizer.zero_grad()
              loss.backward()
              optimizer.step()

          # CPU環境では item() 不要
          current_loss += loss.item() * data.size(0) 
          current_corrects += torch.sum(predictions == target)
      
      epoch_loss = current_loss / dataset_sizes[phase]
      epoch_acc = current_corrects.item() / dataset_sizes[phase]
      
      print(f'{phase}: loss {epoch_loss}, acc {epoch_acc} ({current_corrects.item()}/{dataset_sizes[phase]})')
  checkpoint = {"state_dict": model.state_dict(), "optimizer": optimizer.state_dict()}
  save_checkpoint(checkpoint, epoch)  

Epoch 0/9
----------
train: loss 1.8966946595105258, acc 0.48761818181818184 (26819/55000)
val: loss 1.4539854480743408, acc 0.721 (3605/5000)
=> Saving checkpoint
Epoch 1/9
----------
train: loss 1.1861037666494196, acc 0.7718363636363637 (42451/55000)
val: loss 0.9525429840087891, acc 0.821 (4105/5000)
=> Saving checkpoint
Epoch 2/9
----------
train: loss 0.8421434968861666, acc 0.8284 (45562/55000)
val: loss 0.7197362869262696, acc 0.8544 (4272/5000)
=> Saving checkpoint
Epoch 3/9
----------
train: loss 0.6684629465623335, acc 0.8527272727272728 (46900/55000)
val: loss 0.5923709656715394, acc 0.8682 (4341/5000)
=> Saving checkpoint
Epoch 4/9
----------
train: loss 0.5677720988100226, acc 0.8672181818181818 (47697/55000)
val: loss 0.5146588773727417, acc 0.8802 (4401/5000)
=> Saving checkpoint
Epoch 5/9
----------
train: loss 0.5032622916698456, acc 0.8770909090909091 (48240/55000)
val: loss 0.46274208242893217, acc 0.8858 (4429/5000)
=> Saving checkpoint
Epoch 6/9
----------
train: 

In [56]:
for epoch in range(9, 0, -1):
  model, optimizer = load_checkpoint(model, optimizer, epoch)
  check_train(train_loader, model)
  # print("Checking accuracy on Validation Set")
  check_val(val_loader, model)
  # print("Checking accuracy on Test Set")
  # check_val(test_loader, model)

=> Loading checkpoint 9
--Train Acc 89.85(49417/55000)
--Val Loss: 0.36805450916290283
--Val Acc: 90.28(4514/5000)
=> Loading checkpoint 8
--Train Acc 89.59(49273/55000)
--Val Loss: 0.37662434577941895
--Val Acc: 89.92(4496/5000)
=> Loading checkpoint 7
--Train Acc 89.16(49040/55000)
--Val Loss: 0.40196922421455383
--Val Acc: 89.62(4481/5000)
=> Loading checkpoint 6
--Train Acc 88.70(48785/55000)
--Val Loss: 0.42421549558639526
--Val Acc: 89.26(4463/5000)
=> Loading checkpoint 5
--Train Acc 88.07(48437/55000)
--Val Loss: 0.4598385691642761
--Val Acc: 88.58(4429/5000)
=> Loading checkpoint 4
--Train Acc 87.28(48006/55000)
--Val Loss: 0.5131676197052002
--Val Acc: 88.02(4401/5000)
=> Loading checkpoint 3
--Train Acc 86.18(47397/55000)
--Val Loss: 0.5937952995300293
--Val Acc: 86.82(4341/5000)
=> Loading checkpoint 2
--Train Acc 84.50(46477/55000)
--Val Loss: 0.721510112285614
--Val Acc: 85.44(4272/5000)
=> Loading checkpoint 1
--Train Acc 81.21(44666/55000)
--Val Loss: 0.9565715789794922



*   So voi cach dung torch.no_grad thi train acc (tinh khi train theo cach nay) se bi thap hon mot chut.
*   Moi lan run se khac nhau mot chut, nhung ket qua cuoi cung cung tuong doi gan nhau

