In [1]:
import sys
sys.path.append('./python')

import numpy as np
import pytest
import needle as ndl
from needle import backend_ndarray as nd
import needle.nn as nn
import needle.optim as optim
from needle import data as ndldata
import needle.init as init
import needle.ops as ops
import time
from tqdm import tqdm

np.random.seed(4)

  from .autonotebook import tqdm as notebook_tqdm


# Matmul test - CPU VS GPU

todo: gpu显存不会释放

In [2]:
matmul_dims = (5000, 5000, 5000)

m = matmul_dims[0]
n = matmul_dims[1]
p = matmul_dims[2]

_A = np.random.randn(m, n)
_B = np.random.randn(n, p)

st = time.time()
_S = _A @ _B
ed = time.time()
total = ed - st

print("time of cpu:", total)

time of cpu: 0.882286548614502


In [6]:
A = nd.array(_A, device = nd.cuda())
B = nd.array(_B, device = nd.cuda())

st = time.time()
S = A @ B
ed = time.time()
total = ed - st

print("time of gpu:", total) # 0.0013

time of gpu: 0.01257777214050293


# Training MNIST On CPU

In [2]:
# model
def MLPNet(dim, hidden_dim=100, num_classes=10, device=ndl.cpu_numpy()):
    ### BEGIN YOUR SOLUTION
    mlpNet = nn.Sequential(
        nn.Linear(in_features=dim, out_features=hidden_dim, device=device), 
        nn.ReLU(), 
        nn.Linear(in_features=hidden_dim, out_features=num_classes, device=device))
    return mlpNet

BATCH_SIZE = 100
HIDDEN_DIM = 100

mnist_train_dataset = ndldata.MNISTDataset("data/train-images-idx3-ubyte.gz",
                                            "data/train-labels-idx1-ubyte.gz")

mnist_train_dataloader = ndldata.DataLoader(dataset=mnist_train_dataset,
                                                 batch_size=BATCH_SIZE,
                                                 shuffle=True)

mnist_test_dataset = ndldata.MNISTDataset("data/t10k-images-idx3-ubyte.gz",
                                               "data/t10k-labels-idx1-ubyte.gz")

mnist_test_dataloader = ndldata.DataLoader(dataset=mnist_test_dataset,
                                            batch_size=BATCH_SIZE,
                                            shuffle=False)



In [5]:
def train_CPU(n_epochs, optimizer, model, loss_fn, train_loader):
    model.train()
    for epoch in range(1, n_epochs + 1):
        n_sample, correct = 0, 0
        loss_list = []
        acc_list = []
        for i, batch in tqdm(enumerate(train_loader)):
            x, y = batch[0], batch[1]
            x = x.reshape((x.shape[0], -1))
            y_hat = model(x)

            loss = loss_fn(y_hat, y)
            loss_list.append(loss.cached_data.numpy())

            logit = nd.NDArray(np.argmax(y_hat.cached_data.numpy(), axis=1))
            correct += np.sum((logit == y.cached_data).numpy().astype('int'))

            n_sample += x.shape[0]
            acc_list.append(correct / x.shape[0])

            opt.reset_grad()
            loss.backward()
            opt.step()
    
            acc, loss = correct/n_sample, np.mean(loss_list)
        print('Epoch {} - loss {} - acc {}'.format(epoch, loss, acc))


model = MLPNet(784, HIDDEN_DIM)
loss_fn = nn.SoftmaxLoss()
opt = optim.Adam(model.parameters(), lr=0.001, weight_decay=0.001)

In [6]:
train_CPU(n_epochs = 5, optimizer = opt, model = model, loss_fn = loss_fn, train_loader = mnist_train_dataloader)

600it [00:05, 107.71it/s]


Epoch 1 - loss 0.3711097538471222 - acc 0.8967666666666667


600it [00:05, 104.80it/s]


Epoch 2 - loss 0.18391425907611847 - acc 0.94865


600it [00:05, 109.75it/s]


Epoch 3 - loss 0.14476287364959717 - acc 0.9604833333333334


600it [00:05, 106.82it/s]


Epoch 4 - loss 0.12549129128456116 - acc 0.9660666666666666


600it [00:05, 109.27it/s]

Epoch 5 - loss 0.1112636849284172 - acc 0.9701166666666666





# Training MNIST On GPU

In [1]:
import sys
sys.path.append('./python')

import numpy as np
import pytest
import needle as ndl
from needle import backend_ndarray as nd
import needle.nn as nn
import needle.optim as optim
from needle import data as ndldata
import needle.init as init
import needle.ops as ops
import time
from tqdm import tqdm

np.random.seed(4)

# model
def MLPNet(dim, hidden_dim=100, num_classes=10, device=ndl.cpu_numpy()):
    ### BEGIN YOUR SOLUTION
    mlpNet = nn.Sequential(
        nn.Linear(in_features=dim, out_features=hidden_dim, device=device), 
        nn.ReLU(), 
        nn.Linear(in_features=hidden_dim, out_features=num_classes, device=device))
    return mlpNet

BATCH_SIZE = 100
HIDDEN_DIM = 100

mnist_train_dataset = ndldata.MNISTDataset("data/train-images-idx3-ubyte.gz",
                                            "data/train-labels-idx1-ubyte.gz")

mnist_train_dataloader = ndldata.DataLoader(dataset=mnist_train_dataset,
                                                 batch_size=128,
                                                 shuffle=True)

def train_GPU(n_epochs, optimizer, model, loss_fn, train_loader):
    model.train()
    for epoch in range(1, n_epochs + 1):
        n_sample, correct = 0, 0
        loss_list = []
        acc_list = []
        for i, batch in tqdm(enumerate(train_loader)):
            x, y = batch[0], batch[1]
            gx = x.cuda()
            gy = y.cuda()
            gx = gx.reshape((gx.shape[0], -1))
            y_hat = model(gx)

            loss = loss_fn(y_hat, gy)
            loss_list.append(loss.cached_data.numpy())

            logit = nd.array(np.argmax(y_hat.cached_data.numpy(), axis=1), device = ndl.cuda())
            # print(logit.device)
            # print(y.cached_data.device)
            correct += np.sum((logit == gy.cached_data).numpy().astype('int'))

            n_sample += gx.shape[0]
            acc_list.append(correct / x.shape[0])

            opt.reset_grad()
            loss.backward()
            opt.step()
    
            acc, loss = correct/n_sample, np.mean(loss_list)
        print('Epoch {} - loss {} - acc {}'.format(epoch, loss, acc))

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Training
gmodel = MLPNet(784, HIDDEN_DIM, device=ndl.cuda())
loss_fn = nn.SoftmaxLoss()
opt = optim.Adam(gmodel.parameters(), lr=0.001, weight_decay=0.001)

train_GPU(n_epochs = 5, optimizer = opt, model = gmodel, loss_fn = loss_fn, train_loader = mnist_train_dataloader)

469it [00:02, 159.30it/s]


Epoch 1 - loss 0.3852233588695526 - acc 0.89455


469it [00:02, 164.79it/s]


Epoch 2 - loss 0.18729496002197266 - acc 0.9473


469it [00:02, 164.59it/s]


Epoch 3 - loss 0.1499415785074234 - acc 0.9582666666666667


469it [00:02, 164.44it/s]


Epoch 4 - loss 0.1293514370918274 - acc 0.9643


469it [00:02, 163.33it/s]

Epoch 5 - loss 0.11657559126615524 - acc 0.9679833333333333





In [3]:
# save checkpoint and new_model load
ndl.save('./weights/gmodel_5epoch', gmodel.state_dict())

gmodel_new = MLPNet(784, HIDDEN_DIM, device=ndl.cuda())
gmodel_ckpt = ndl.load('./weights/gmodel_5epoch', device=ndl.cuda())
gmodel_new.load_state_dict(gmodel_ckpt)

In [4]:
# Transfer training
loss_fn = nn.SoftmaxLoss()
opt = optim.Adam(gmodel_new.parameters(), lr=0.001, weight_decay=0.001)

train_GPU(n_epochs = 5, optimizer = opt, model = gmodel_new, loss_fn = loss_fn, train_loader = mnist_train_dataloader)

469it [00:02, 161.72it/s]


Epoch 1 - loss 0.10929657518863678 - acc 0.9702166666666666


469it [00:02, 161.81it/s]


Epoch 2 - loss 0.10059221088886261 - acc 0.9733333333333334


469it [00:02, 160.85it/s]


Epoch 3 - loss 0.09576782584190369 - acc 0.9743166666666667


469it [00:02, 160.65it/s]


Epoch 4 - loss 0.09267044812440872 - acc 0.97505


469it [00:02, 156.81it/s]

Epoch 5 - loss 0.09021073579788208 - acc 0.97635



