# Computational Graphs


**Numpy**

In [1]:
import numpy as np
np.random.seed(0)

N, D = 3, 4
x = np.random.randn(N, D)
y = np.random.randn(N, D)
z = np.random.randn(N, D)

a = x * y
b = a + z
c = np.sum(b)

grad_c = 1.0
grad_b = grad_c * np.ones((N, D))
grad_a = grad_b.copy()
grad_z = grad_b.copy()
grad_x = grad_a * y
grad_y = grad_a * x

**Tensorflow**

In [12]:
'''
# tensorflow가 업데이트 되면서 placeholder가 사라짐
# 따라서 다른 방법을 강구해야함.
import numpy as np
np.random.seed(0)
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()

tensorflow의 버전을 낮춰주는 방법

N, D =  3000, 4000

with tf.device('/gpu:0'): # run this code using gpu
  x = tf.placeholder(tf.float32)
  y = tf.placeholder(tf.float32)
  z = tf.placeholder(tf.float32)

  a = x * y
  b = a + z
  c = tf.reduce_sum(b)

grad_x, grad_y, grad_z = tf.gradients(c, [x, y, z])

with tf.Session() as sess:
  values = {
      x: np.random.randn(N, D),
      y: np.random.randn(N, D),
      z: np.random.randn(N, D)
  }
  out = sess.run([c, grad_x, grad_y, grad_z], feed_dict = values)
  c_val, grad_x_val, grad_y_val, grad_z_val = out
'''

"\n# tensorflow가 업데이트 되면서 placeholder가 사라짐\n# 따라서 다른 방법을 강구해야함.\nimport numpy as np\nnp.random.seed(0)\nimport tensorflow.compat.v1 as tf\ntf.disable_v2_behavior()\n\ntensorflow의 버전을 낮춰주는 방법\n\nN, D =  3000, 4000\n\nwith tf.device('/gpu:0'): # run this code using gpu\n  x = tf.placeholder(tf.float32)\n  y = tf.placeholder(tf.float32)\n  z = tf.placeholder(tf.float32)\n\n  a = x * y\n  b = a + z\n  c = tf.reduce_sum(b)\n\ngrad_x, grad_y, grad_z = tf.gradients(c, [x, y, z])\n\nwith tf.Session() as sess:\n  values = {\n      x: np.random.randn(N, D),\n      y: np.random.randn(N, D),\n      z: np.random.randn(N, D)\n  }\n  out = sess.run([c, grad_x, grad_y, grad_z], feed_dict = values)\n  c_val, grad_x_val, grad_y_val, grad_z_val = out\n"

**PyTorch**

In [5]:
'''
import torch
from torch.autograd import Variable

N, D = 3, 4

x = Variable(torch.randn(N, D).cuda(), requires_grad = True)
y = Variable(torch.randn(N, D).cuda(), requires_grad = True)
z = Variable(torch.randn(N, D).cuda(), requires_grad = True)

a = x * y
b = a + z
c = torch.sum(b)

c.backward()

print(x.grad.data)
print(y.grad.data)
print(z.grad.data)
'''

'\nimport torch\nfrom torch.autograd import Variable\n\nN, D = 3, 4\n\nx = Variable(torch.randn(N, D).cuda(), requires_grad = True)\ny = Variable(torch.randn(N, D).cuda(), requires_grad = True)\nz = Variable(torch.randn(N, D).cuda(), requires_grad = True)\n\na = x * y\nb = a + z\nc = torch.sum(b)\n\nc.backward()\n\nprint(x.grad.data)\nprint(y.grad.data)\nprint(z.grad.data)\n'

In [25]:
'''
# tensorflow의 버전이 바뀌면서 관련 내용도 많이 바뀐듯함.
# fc layer + Relu training network
# loss function : L2
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()

N, D, H = 64, 1000, 100
x = tf.placeholder(tf.float32, shape=(N, D))
y = tf.placeholder(tf.float32, shape=(N, D))

init = tf.contrib.layers.xavier_initializer()
h = tf.layers.dense(inputs = x, units = H, activation = tf.nn.relu,
                    kernel_initializer = init)
y_pred = tf.layer.dense(inputs=h, units=D,
                        kernel_initializer=init)

h = tf.maximum(tf.matmul(x, w1), 0)
y_pred = tf.matmul(h, w2)
diff = y_pred - y
loss = tf.losses.mean_squared_error(y_pred, y)

optimizer = tf.train.GradientDescentOptimizer(1e-3)
updates = optimizer.minimize(loss)

with tf.Session() as sess:
  sess.run(tf.global_variables_initializer())
  values = {x: np.random.randn(N, D),
            y: np.random.randn(N, D),}
  losses = []
  for t in range(50):
    loss_val, = sess.run([loss, updates], feed_dict=values)
'''

'\n# tensorflow의 버전이 바뀌면서 관련 내용도 많이 바뀐듯함.\n# fc layer + Relu training network\n# loss function : L2\nimport tensorflow.compat.v1 as tf\ntf.disable_v2_behavior()\n\nN, D, H = 64, 1000, 100\nx = tf.placeholder(tf.float32, shape=(N, D))\ny = tf.placeholder(tf.float32, shape=(N, D))\n\ninit = tf.contrib.layers.xavier_initializer()\nh = tf.layers.dense(inputs = x, units = H, activation = tf.nn.relu,\n                    kernel_initializer = init)\ny_pred = tf.layer.dense(inputs=h, units=D,\n                        kernel_initializer=init)\n\nh = tf.maximum(tf.matmul(x, w1), 0)\ny_pred = tf.matmul(h, w2)\ndiff = y_pred - y\nloss = tf.losses.mean_squared_error(y_pred, y)\n\noptimizer = tf.train.GradientDescentOptimizer(1e-3)\nupdates = optimizer.minimize(loss)\n\nwith tf.Session() as sess:\n  sess.run(tf.global_variables_initializer())\n  values = {x: np.random.randn(N, D),\n            y: np.random.randn(N, D),}\n  losses = []\n  for t in range(50):\n    loss_val, = sess.run([loss, updates],

In [27]:
'''
from keras.models import Sequential
from keras.layers.core import Dense, Activation
from keras.optimizers import SGD

N, D, H = 64, 1000, 100

model = Sequential()
model.add(Dense(input_dim=D, output_dim=H))
model.add(Activation('relu'))
model.add(Dense(input_dim=H, output_dim=D))

optimizer = SGD(lr=1e0)
model.compile(loss = "mean_squared_error",
              optimizer=optimizer)

x = np.random.randn(N, D)
y = np.random.randn(N, D)
history = model.fit(x, y, nb_epoch=50,
                    batch_size = N, verbose = 0)
'''

'\nfrom keras.models import Sequential\nfrom keras.layers.core import Dense, Activation\nfrom keras.optimizers import SGD\n\nN, D, H = 64, 1000, 100\n\nmodel = Sequential()\nmodel.add(Dense(input_dim=D, output_dim=H))\nmodel.add(Activation(\'relu\'))\nmodel.add(Dense(input_dim=H, output_dim=D))\n\noptimizer = SGD(lr=1e0)\nmodel.compile(loss = "mean_squared_error",\n              optimizer=optimizer)\n\nx = np.random.randn(N, D)\ny = np.random.randn(N, D)\nhistory = model.fit(x, y, nb_epoch=50,\n                    batch_size = N, verbose = 0)\n'

In [28]:
# 2-layer network by PyTorch tensor
import torch

dtype = torch.FloatTensor 
# if you want to use gpu
# torch.cuda.FloatTensor

N, D_in, H, D_out = 64, 1000, 100, 10
x = torch.randn(N, D_in).type(dtype)
y = torch.randn(N, D_out).type(dtype)
w1 = torch.randn(D_in, H).type(dtype)
w2 = torch.randn(H, D_out).type(dtype)

learning_rate = 1e-6
for t in range(500):
  h = x.mm(w1)
  h_relu = h.clamp(min=0)
  y_pred = h_relu.mm(w2)
  loss = (y_pred - y).pow(2).sum()

  grad_y_pred = 2.0 * (y_pred - y)
  grad_w2 = h_relu.t().mm(grad_y_pred)
  grad_h_relu = grad_y_pred.mm(w2.t())
  grad_h = grad_h_relu.clone()
  grad_h[h < 0] = 0
  grad_w1 = x.t().mm(grad_h)

  w1 -= learning_rate * grad_w1
  w2 -= learning_rate * grad_w2

In [32]:
# Autograd
'''
import torch
from torch.autograd import Variable

N, D_in, H, D_out = 64, 1000, 100, 10
x = Variable(torch.randn(N, D_in), requires_grad = False)
y = Variable(torch.randn(N, D_out), requires_grad = False)
w1 = Variable(torch.randn(D_in, H), requires_grad = True)
w2 = Variable(torch.randn(H, D_out), requires_grad = True)

learning_rate = 1e-6
for t in range(500):
  y_pred = x.mm(w1).clamp(min=0).mm(w2)
  loss = (y_pred - y).pow(2).sum()

  if w1.grad: w1.grad.data.zero_()
  if w2.grad: w2.grad.data.zero_()
  loss.backward()

  w1.data -= learning_rate * w1.grad.data
  w2.data -= learning_rate * w2.grad.data
'''

'\nimport torch\nfrom torch.autograd import Variable\n\nN, D_in, H, D_out = 64, 1000, 100, 10\nx = Variable(torch.randn(N, D_in), requires_grad = False)\ny = Variable(torch.randn(N, D_out), requires_grad = False)\nw1 = Variable(torch.randn(D_in, H), requires_grad = True)\nw2 = Variable(torch.randn(H, D_out), requires_grad = True)\n\nlearning_rate = 1e-6\nfor t in range(500):\n  y_pred = x.mm(w1).clamp(min=0).mm(w2)\n  loss = (y_pred - y).pow(2).sum()\n\n  if w1.grad: w1.grad.data.zero_()\n  if w2.grad: w2.grad.data.zero_()\n  loss.backward()\n\n  w1.data -= learning_rate * w1.grad.data\n  w2.data -= learning_rate * w2.grad.data\n'

In [34]:
# PyTorch : nn

import torch
from torch.autograd import Variable

N, D_in, H, D_out = 64, 1000, 100, 10
x = Variable(torch.randn(N, D_in))
y = Variable(torch.randn(N, D_out), requires_grad = False)

model = torch.nn.Sequential(
    torch.nn.Linear(D_in, H),
    torch.nn.ReLU(),
    torch.nn.Linear(H, D_out))
loss_fn = torch.nn.MSELoss(size_average=False)

learning_rate = 1e-4
optimizer = torch.optim.Adam(model.parameters(),
                             lr = learning_rate)

for t in range(500):
  y_pred = model(x)
  loss = loss_fn(y_pred, y)

  optimizer.zero_grad()
  loss.backward()

  optimizer.step() # model parameter update
  '''
  for param in model.parameters():
    param.data -= learning_rate * param.grad.data
  '''



In [37]:
# Define new Modules
# nn module class
from torch.utils.data import TensorDataset, DataLoader
class TwoLayerNet(torch.nn.Module):
  def __init__(self, D_in, H, D_out):
    super(TwoLayerNet, self).__init__()
    self.linear1 = torch.nn.Linear(D_in, H)
    self.linear2 = torch.nn.Linear(H, D_out)

  def forward(self, x):
    h_relu = self.linear1(x).clamp(min=0)
    y_pred = self.linear2(h_relu)
    return y_pred

N, D_in, H, D_out = 64, 1000, 100, 10

x = Variable(torch.randn(N, D_in))
y = Variable(torch.randn(N, D_out), requires_grad = False)

loader = DataLoader(TensorDataset(x, y), batch_size = 8)

model = TwoLayerNet(D_in, H, D_out)

criterion = torch.nn.MSELoss(size_average=False)
optimizer = torch.optim.SGD(model.parameters(), lr = 1e-4)

for epoch in range(10):
  for x_batch, y_batch in loader:
    x_var, y_var = Variable(x), Variable(y)
    y_pred = model(x_var)
    loss = criterion(y_pred, y_var)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()



In [38]:
# PyTorch : Pretrained Models

import torch
import torchvision

alexnet = torchvision.models.alexnet(pretrained = True)
vgg16 = torchvision.models.vgg16(pretrained = True)
resnet101 = torchvision.models.resnet101(pretrained = True)

Downloading: "https://download.pytorch.org/models/alexnet-owt-4df8aa71.pth" to /root/.cache/torch/hub/checkpoints/alexnet-owt-4df8aa71.pth


HBox(children=(FloatProgress(value=0.0, max=244418560.0), HTML(value='')))




Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to /root/.cache/torch/hub/checkpoints/vgg16-397923af.pth


HBox(children=(FloatProgress(value=0.0, max=553433881.0), HTML(value='')))




Downloading: "https://download.pytorch.org/models/resnet101-5d3b4d8f.pth" to /root/.cache/torch/hub/checkpoints/resnet101-5d3b4d8f.pth


HBox(children=(FloatProgress(value=0.0, max=178728960.0), HTML(value='')))




TF -> 2 step

1. make up graph
2. iteration this graph
(static computational graph)

PyTorch
1. make up new graph for forward pass
(dynamic computational graph)
