# Final project: DEQ implementation

### Set up environment

In [1]:
# from google.colab import drive
# drive.mount('/content/drive')
# %cd /content/drive/MyDrive/
# !mkdir -p 10714
# %cd /content/drive/MyDrive/10714
# !git clone https://github.com/LiableFish/dlsyscourse-project.git
# %cd /content/drive/MyDrive/10714/dlsyscourse-project

# !pip3 install --upgrade --no-deps git+https://github.com/dlsys10714/mugrade.git
# !pip3 install pybind11
# !pip3 install tqdm

In [2]:
# !make

In [None]:
# # Download the datasets you will be using for this assignment

# import urllib.request
# import os

# !mkdir -p './data/ptb'
# # Download Penn Treebank dataset
# ptb_data = "https://raw.githubusercontent.com/wojzaremba/lstm/master/data/ptb."
# for f in ['train.txt', 'test.txt', 'valid.txt']:
#     if not os.path.exists(os.path.join('./data/ptb', f)):
#         urllib.request.urlretrieve(ptb_data + f, os.path.join('./data/ptb', f))

# # Download CIFAR-10 dataset
# if not os.path.isdir("./data/cifar-10-batches-py"):
#     urllib.request.urlretrieve("https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz", "./data/cifar-10-python.tar.gz")
#     !tar -xvzf './data/cifar-10-python.tar.gz' -C './data'

In [1]:
import sys
sys.path.append('./python')
sys.path.append('./apps')

In [2]:
import numpy as np

In [3]:
import needle as ndl

In [4]:
device = ndl.cpu()

In [5]:
train_dataset = ndl.data.CIFAR10Dataset("data/cifar-10-batches-py", train=True)
test_dataset = ndl.data.CIFAR10Dataset("data/cifar-10-batches-py", train=False)

In [6]:
train_dataloader = ndl.data.DataLoader(
         dataset=train_dataset,
         batch_size=128,
         shuffle=True,
)

test_dataloader = ndl.data.DataLoader(
         dataset=test_dataset,
         batch_size=128,
         shuffle=False,
)

In [7]:
from simple_training import train_cifar10, evaluate_cifar10

In [8]:
def _train(model, n_epochs: int = 10):
    train_cifar10(
        model,
        train_dataloader,
        n_epochs=n_epochs,
        optimizer=ndl.optim.Adam,
        lr=0.001, 
        weight_decay=0.001,
    )
    evaluate_cifar10(model, test_dataloader)

### Training "tanh" model

In [9]:
from models import TanhLinearDEQ

In [10]:
tanh_model = TanhLinearDEQ(
    in_features=3072,
    hidden_size=128,
    n_classes=10,
    device=device,
    use_deq=False,
    depth=10,#100,
)

In [18]:
tanh_deq_model = TanhLinearDEQ(
    in_features=3072,
    hidden_size=128,
    n_classes=10,
    device=device,
    use_deq=True,
    solver=ndl.solver.ForwardIteration(max_iter=100, debug=True),
)

In [13]:
X, _ = next(iter(train_dataloader))
X = ndl.Tensor(X, device=device)

In [14]:
%%time
tanh_model(X).shape

CPU times: user 81 ms, sys: 10.2 ms, total: 91.2 ms
Wall time: 87 ms


(128, 10)

In [15]:
%%time
tanh_deq_model(X).shape

CPU times: user 345 ms, sys: 0 ns, total: 345 ms
Wall time: 342 ms


(128, 10)

In [17]:
_train(tanh_model)

  0%|          | 0/390 [00:00<?, ?it/s]

Epoch 0/9 accuracy: 0.29556 loss: [1.9659365] time: 85.618s


  0%|          | 0/390 [00:00<?, ?it/s]

Epoch 1/9 accuracy: 0.36212 loss: [1.7897403] time: 112.840s


  0%|          | 0/390 [00:00<?, ?it/s]

Epoch 2/9 accuracy: 0.38266 loss: [1.7257469] time: 87.993s


  0%|          | 0/390 [00:00<?, ?it/s]

Epoch 3/9 accuracy: 0.39814 loss: [1.684417] time: 99.096s


  0%|          | 0/390 [00:00<?, ?it/s]

Epoch 4/9 accuracy: 0.40616 loss: [1.6610756] time: 94.021s


  0%|          | 0/390 [00:00<?, ?it/s]

Epoch 5/9 accuracy: 0.41718 loss: [1.637973] time: 139.520s


  0%|          | 0/390 [00:00<?, ?it/s]

Epoch 6/9 accuracy: 0.42318 loss: [1.6173705] time: 168.485s


  0%|          | 0/390 [00:00<?, ?it/s]

Epoch 7/9 accuracy: 0.42854 loss: [1.597893] time: 151.479s


  0%|          | 0/390 [00:00<?, ?it/s]

Epoch 8/9 accuracy: 0.4345 loss: [1.587076] time: 163.587s


  0%|          | 0/390 [00:00<?, ?it/s]

Epoch 9/9 accuracy: 0.43654 loss: [1.577322] time: 170.690s


  0%|          | 0/78 [00:00<?, ?it/s]

accuracy: 0.4163 loss: [1.6141825]


In [16]:
_train(tanh_deq_model)

  0%|          | 0/390 [00:00<?, ?it/s]

Epoch 0/9 accuracy: 0.30106 loss: [1.9590493] time: 88.557s


  0%|          | 0/390 [00:00<?, ?it/s]

Epoch 1/9 accuracy: 0.36106 loss: [1.78632] time: 87.084s


  0%|          | 0/390 [00:00<?, ?it/s]

Epoch 2/9 accuracy: 0.38256 loss: [1.7327211] time: 98.414s


  0%|          | 0/390 [00:00<?, ?it/s]

Epoch 3/9 accuracy: 0.39756 loss: [1.6942847] time: 84.235s


  0%|          | 0/390 [00:00<?, ?it/s]

Epoch 4/9 accuracy: 0.40964 loss: [1.6598368] time: 82.076s


  0%|          | 0/390 [00:00<?, ?it/s]

Epoch 5/9 accuracy: 0.41836 loss: [1.6371758] time: 83.969s


  0%|          | 0/390 [00:00<?, ?it/s]

Epoch 6/9 accuracy: 0.42436 loss: [1.6190336] time: 83.572s


  0%|          | 0/390 [00:00<?, ?it/s]

Epoch 7/9 accuracy: 0.42972 loss: [1.6061577] time: 88.276s


  0%|          | 0/390 [00:00<?, ?it/s]

Epoch 8/9 accuracy: 0.43538 loss: [1.5870414] time: 87.417s


  0%|          | 0/390 [00:00<?, ?it/s]

Epoch 9/9 accuracy: 0.43994 loss: [1.5748775] time: 88.116s


  0%|          | 0/78 [00:00<?, ?it/s]

accuracy: 0.4107 loss: [1.6440225]


### Training "ResNet" model

In [9]:
from models import ResNetDEQ

In [11]:
resnet_model = ResNetDEQ(
    in_channels=3,
    out_channels=16,
    hidden_size=32,
    kernel_size=3,
    n_classes=10,
    device=device,
    use_deq=False,
    depth=1,#10,
)

In [12]:
resnet_deq_model = ResNetDEQ(
    in_channels=3,
    out_channels=16,
    hidden_size=32,
    kernel_size=3,
    n_classes=10,
    device=device,
    use_deq=True,
    solver=ndl.solver.ForwardIteration(max_iter=100),
)

In [13]:
X, _ = next(iter(train_dataloader))
X = ndl.Tensor(X, device=device)

In [14]:
%%time
resnet_model(X).shape

CPU times: user 3.47 s, sys: 494 ms, total: 3.96 s
Wall time: 3.99 s


(128, 10)

In [15]:
%%time
resnet_deq_model(X).shape

CPU times: user 4min 59s, sys: 9.27 s, total: 5min 9s
Wall time: 5min 10s


(128, 10)

In [None]:
_train(resnet_model)

In [None]:
_train(resnet_deq_model)