In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import torch
import json

In [3]:
from main import cifar_loaders, train_model, test_model
from models import mlp_preprocessing, MLP_NO_ACT, MLP
from models import cnn_preprocessing, CNN_NO_ACT, CNN

## Load Dataset

In [4]:
batch_size = 64
test_batch_size = 64
train_loader, _ = cifar_loaders(batch_size)
_, test_loader = cifar_loaders(test_batch_size)

Files already downloaded and verified
Files already downloaded and verified


## Train and test MLP model

In [5]:
## Common training settings
device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")
num_epoch=20

In [6]:
# train and test MLP
mlp_model = MLP(3*32*32, 10)

mlp_train_history, mlp_train_accuracy = train_model(
    mlp_model, device=device, train_loader=train_loader, num_epoch=num_epoch, 
    opt=torch.optim.Adam(mlp_model.parameters(), lr=1e-3),
    loss_fn=torch.nn.CrossEntropyLoss(),
    preprocessing_fn=mlp_preprocessing
)

_, mlp_test_accuracy = test_model(
    mlp_model, device=device, test_loader=test_loader,
    preprocessing_fn=mlp_preprocessing
)



Train epoch 0: 100%|██████████| 782/782 [00:16<00:00, 48.12it/s, Accuracy=0.288, Total_loss=1.91]
Train epoch 1: 100%|██████████| 782/782 [00:15<00:00, 50.00it/s, Accuracy=0.376, Total_loss=1.72]
Train epoch 2: 100%|██████████| 782/782 [00:15<00:00, 49.59it/s, Accuracy=0.414, Total_loss=1.62]
Train epoch 3: 100%|██████████| 782/782 [00:16<00:00, 46.85it/s, Accuracy=0.438, Total_loss=1.56]
Train epoch 4: 100%|██████████| 782/782 [00:16<00:00, 47.16it/s, Accuracy=0.453, Total_loss=1.52]
Train epoch 5: 100%|██████████| 782/782 [00:16<00:00, 46.82it/s, Accuracy=0.465, Total_loss=1.49]
Train epoch 6: 100%|██████████| 782/782 [00:16<00:00, 47.50it/s, Accuracy=0.475, Total_loss=1.46]
Train epoch 7: 100%|██████████| 782/782 [00:16<00:00, 47.15it/s, Accuracy=0.483, Total_loss=1.44]
Train epoch 8: 100%|██████████| 782/782 [00:16<00:00, 47.05it/s, Accuracy=0.491, Total_loss=1.42]
Train epoch 9: 100%|██████████| 782/782 [00:17<00:00, 44.93it/s, Accuracy=0.497, Total_loss=1.4]  
Train epoch 10: 100

In [7]:
with open('../experiment_result/mlp_model.json', 'w') as f:
    json.dump({
        "train_acc": float(mlp_train_accuracy),
        "num_epoch": num_epoch,
        "test_acc": float(mlp_test_accuracy),
        "train_loss": [float(x) for x in mlp_train_history],
    }, f, indent=4)


## Train MLP model with out Activation layer

In [8]:
mlp_no_act_model = MLP_NO_ACT(3*32*32, 10)

mlp_no_act_train_history, mlp_no_act_train_accuracy = train_model(
    mlp_no_act_model, device=device, train_loader=train_loader, num_epoch=num_epoch,
    opt=torch.optim.Adam(mlp_no_act_model.parameters(), lr=1e-3),
    loss_fn=torch.nn.CrossEntropyLoss(),
    preprocessing_fn=mlp_preprocessing
)

_, mlp_no_act_test_accuracy = test_model(
    mlp_no_act_model, device=device, test_loader=test_loader, preprocessing_fn=mlp_preprocessing 
)

with open('../experiment_result/mlp_no_act_model.json', 'w') as f:
    json.dump({
        "train_acc": float(mlp_no_act_train_accuracy),
        "num_epoch": num_epoch,
        "test_acc": float(mlp_no_act_test_accuracy),
        "train_loss": [float(x) for x in mlp_no_act_train_history],
    }, f, indent=4)

Train epoch 0: 100%|██████████| 782/782 [00:15<00:00, 49.13it/s, Accuracy=0.259, Total_loss=2.02]
Train epoch 1: 100%|██████████| 782/782 [00:15<00:00, 49.39it/s, Accuracy=0.291, Total_loss=1.96]
Train epoch 2: 100%|██████████| 782/782 [00:15<00:00, 50.27it/s, Accuracy=0.302, Total_loss=1.94]
Train epoch 3: 100%|██████████| 782/782 [00:15<00:00, 50.36it/s, Accuracy=0.307, Total_loss=1.93]
Train epoch 4: 100%|██████████| 782/782 [00:15<00:00, 49.45it/s, Accuracy=0.312, Total_loss=1.93]
Train epoch 5: 100%|██████████| 782/782 [00:15<00:00, 50.14it/s, Accuracy=0.316, Total_loss=1.92]
Train epoch 6: 100%|██████████| 782/782 [00:15<00:00, 49.28it/s, Accuracy=0.317, Total_loss=1.91]
Train epoch 7: 100%|██████████| 782/782 [00:15<00:00, 49.81it/s, Accuracy=0.324, Total_loss=1.91]
Train epoch 8: 100%|██████████| 782/782 [00:15<00:00, 50.32it/s, Accuracy=0.317, Total_loss=1.91]
Train epoch 9: 100%|██████████| 782/782 [00:16<00:00, 48.65it/s, Accuracy=0.32, Total_loss=1.9]  
Train epoch 10: 100%

## Train CNN model

In [13]:
cnn_model = CNN(3*32*32, 10)
num_epoch_cnn = 30

cnn_train_history, cnn_train_accuracy = train_model(
    cnn_model, device=device, train_loader=train_loader, num_epoch=num_epoch_cnn,
    opt=torch.optim.Adam(cnn_model.parameters(), lr=1e-3),
    loss_fn=torch.nn.CrossEntropyLoss(),
    preprocessing_fn=cnn_preprocessing,
)

_, cnn_test_accuracy = test_model(
    cnn_model, device=device, test_loader=test_loader, 
    preprocessing_fn=cnn_preprocessing
)

with open('../experiment_result/cnn_model.json', 'w') as f:
    json.dump({
        "train_acc": float(cnn_train_accuracy),
        "num_epoch": num_epoch_cnn,
        "test_acc": float(cnn_test_accuracy),
        "train_loss": [float(x) for x in cnn_train_history],
    }, f, indent=4)

Train epoch 0: 100%|██████████| 782/782 [00:27<00:00, 28.50it/s, Accuracy=0.394, Total_loss=1.62]
Train epoch 1: 100%|██████████| 782/782 [00:29<00:00, 26.49it/s, Accuracy=0.56, Total_loss=1.21]  
Train epoch 2: 100%|██████████| 782/782 [00:27<00:00, 27.97it/s, Accuracy=0.636, Total_loss=1.02] 
Train epoch 3: 100%|██████████| 782/782 [00:27<00:00, 28.19it/s, Accuracy=0.686, Total_loss=0.886]
Train epoch 4: 100%|██████████| 782/782 [00:28<00:00, 27.19it/s, Accuracy=0.713, Total_loss=0.813]
Train epoch 5:  69%|██████▊   | 537/782 [00:20<00:10, 23.39it/s, accuracy=0.797, batch_loss=0.635]

## Train CNN without activation layer

In [10]:
cnn_no_act_model = CNN_NO_ACT(3*32*32, 10)

cnn_no_act_train_history, cnn_no_act_train_accuracy = train_model(
    cnn_no_act_model, device=device, train_loader=train_loader, num_epoch=num_epoch_cnn,
    opt=torch.optim.Adam(cnn_no_act_model.parameters(), lr=1e-3),
    loss_fn=torch.nn.CrossEntropyLoss(),
    preprocessing_fn=cnn_preprocessing,
)

_, cnn_no_act_test_accuracy = test_model(
    cnn_no_act_model, device=device, test_loader=test_loader,
    preprocessing_fn=cnn_preprocessing
)

with open('../experiment_result/cnn_no_act_model.json', 'w') as f:
    json.dump({
        "train_acc": float(cnn_no_act_train_accuracy),
        "num_epoch": num_epoch_cnn,
        "test_acc": float(cnn_no_act_test_accuracy),
        "train_loss": [float(x) for x in cnn_no_act_train_history],
    }, f, indent=4)


Train epoch 0: 100%|██████████| 782/782 [00:25<00:00, 30.63it/s, Accuracy=0.263, Total_loss=2.04]
Train epoch 1: 100%|██████████| 782/782 [00:25<00:00, 30.66it/s, Accuracy=0.287, Total_loss=1.99]
Train epoch 2: 100%|██████████| 782/782 [00:25<00:00, 30.59it/s, Accuracy=0.293, Total_loss=1.98] 
Train epoch 3: 100%|██████████| 782/782 [00:25<00:00, 30.53it/s, Accuracy=0.294, Total_loss=1.97]
Train epoch 4: 100%|██████████| 782/782 [00:27<00:00, 28.82it/s, Accuracy=0.295, Total_loss=1.97] 
Train epoch 5: 100%|██████████| 782/782 [00:29<00:00, 26.95it/s, Accuracy=0.303, Total_loss=1.96]
Train epoch 6: 100%|██████████| 782/782 [00:28<00:00, 27.15it/s, Accuracy=0.301, Total_loss=1.96]
Train epoch 7: 100%|██████████| 782/782 [00:29<00:00, 26.18it/s, Accuracy=0.304, Total_loss=1.96]
Train epoch 8: 100%|██████████| 782/782 [00:29<00:00, 26.56it/s, Accuracy=0.308, Total_loss=1.95]
Train epoch 9: 100%|██████████| 782/782 [00:30<00:00, 25.95it/s, Accuracy=0.309, Total_loss=1.95]
Train epoch 10: 10