## 52 GPU 지원
+ Cupy = GPU로 병렬 계산, 속도가 빠르다.


### 52.1 쿠파이 설치 및 사용 방법

In [None]:
!pip install cupy

Collecting cupy
[?25l  Downloading https://files.pythonhosted.org/packages/14/2a/ef289e429be9021fab32f2a480a023efacb3cc9ff5e9496d788e98537c92/cupy-8.5.0.tar.gz (1.3MB)
[K     |████████████████████████████████| 1.3MB 5.9MB/s 
Building wheels for collected packages: cupy
  Building wheel for cupy (setup.py) ... [?25l[?25hdone
  Created wheel for cupy: filename=cupy-8.5.0-cp37-cp37m-linux_x86_64.whl size=53475804 sha256=79ba6b14b674a02d82598c6286763feb40f1e88f7d0d5f5b66eccf8d48f5335e
  Stored in directory: /root/.cache/pip/wheels/23/ed/46/fdfb361ceeab34bfbfe47a4023991e4106d81ec36f0e0115b1
Successfully built cupy
Installing collected packages: cupy
Successfully installed cupy-8.5.0


In [None]:
import cupy as cp

In [None]:
x = cp.arange(6).reshape(2,3)
print(x)

[[0 1 2]
 [3 4 5]]


In [None]:
y = x.sum(axis=1)
print(y)

[ 3 12]


In [None]:
import numpy as np
import cupy as cp


#### assert
+ assert는 가정 설정 함수로 함수나 클래스들에 특정 형식의 변수들이 필요하다. 이때 input 변수가 그 형식을 만족하는 지 확인해주는 함수이다.
+ 만약 만족하지 않는다면 AssertionError 가 발생한다.

In [None]:
#num->cu
n = np.array([1,2,3])
c = cp.array(n)
assert type(c) == cp.ndarray
c

array([1, 2, 3])

In [None]:
#cu->num
c = cp.array([1,2,3])
n = cp.asnumpy(c)
assert type(n) == np.ndarray
n

array([1, 2, 3])

In [None]:
x = np.array([1,2,3])
xp = cp.get_array_module(x)
assert xp == np

In [None]:
x = cp.array([1,2,3])
xp = cp.get_array_module(x)
assert xp == cp

#### cp.get_array_module()
+ 현재 그 변수에 쓰이고 있는 모듈의 종류를 알려주는 함수

In [None]:
xp.sin(x)

array([0.84147098, 0.90929743, 0.14112001])

### 52.2 쿠다 모듈

#### dezero/cuda.py
+ 실제코드 https://github.com/WegraLee/deep-learning-from-scratch-3/blob/master/dezero/cuda.py
+ 실제 dezero 코드를 사용해야하기에 git clone를 해야한다.
+ cuda.py 해석

In [None]:
!git clone https://github.com/WegraLee/deep-learning-from-scratch-3.git

Cloning into 'deep-learning-from-scratch-3'...
remote: Enumerating objects: 2097, done.[K
remote: Total 2097 (delta 0), reused 0 (delta 0), pack-reused 2097[K
Receiving objects: 100% (2097/2097), 32.30 MiB | 16.13 MiB/s, done.
Resolving deltas: 100% (1444/1444), done.


In [None]:
%cd deep-learning-from-scratch-3/

/content/deep-learning-from-scratch-3


In [None]:
import numpy as np
gpu_enable = True
try :
  import cupy as cp
  cupy = cp
except ImportError:
  gpu_enable = False
from dezero import Variable

#### 세 함수
+ get_array_module() : x의 대응하는 모듈
+ as_numpy() : numpy로 변환 이미 numpy라면 numpy 그대로 반환
+ as_cupy() : cupy로 변환 이미 cupy라면 cupy 그대로 반환

In [None]:
def get_array_module(x):
  if isinstance(x, Variable):
    x = x.data
  if not gpu_enable : 
    return np
  xp = cp.get_array_module(x)
  return xp

def as_numpy(x):
  if isinstance(x, Variable):
    x = x.data
  if np.isscalar(x):
    return np.array(x)
  elif isinstance(x, np.darray):
    return x
  return cp.asnumpy(x)

def as_cupy(x):
  if isinstance(x, Variable):
    x = x.data
  if not gpu_enable:
    raise Exception('쿠파이를 로드할 수 없습니다. 쿠파이를 설치해주세요!')
  return cp.asarray(x)

### 52.3 Variable/Layer/DataLoader 클래스 추가 구현
+ 각 클래스에 GPU 대응 기능을 추가.
+ 각 클래스에서 수정된 부분만을 기록

In [None]:
# import cupy가 실행되는 지 판단
# 실행 시 cupy.ndarray 또한 array_types에 추가
try:
    import cupy
    array_types = (np.ndarray, cupy.ndarray)
except ImportError:
    array_types = (np.ndarray)

class Variable:
    __array_priority__ = 200

    def __init__(self, data, name=None):
        if data is not None:
            if not isinstance(data, array_types):
                raise TypeError('{} is not supported'.format(type(data)))

        self.data = data
        self.name = name
        self.grad = None
        self.creator = None
        self.generation = 0



    def backward(self, retain_grad=False, create_graph=False):
        if self.grad is None:
        # 모듈을 확인하고 확인한 모듈로 실행
            xp = dezero.cuda.get_array_module(self.data)
            self.grad = Variable(xp.ones_like(self.data))

        funcs = []
        seen_set = set()
# 52.2에서 만든 as_numpy 와 as_cupy 이용
    def to_cpu(self):
        if self.data is not None:
            self.data = dezero.cuda.as_numpy(self.data)

    def to_gpu(self):
        if self.data is not None:
            self.data = dezero.cuda.as_cupy(self.data)


In [None]:
class Layer:
  def __init__(self):
    self._params = set()
  #cpu, gpu 사용 변경
  def to_cpu(self):
    for param in self.params():
      param.to_cpu()

  def to_gpu(self):
    for param in self.params():
      param.to_gpu()

In [None]:
import math
pil_available = True
try:
    from PIL import Image
except:
    pil_available = False
import numpy as np
from dezero import cuda

class DataLoader:
    def __init__(self, dataset, batch_size, shuffle=True, gpu=False):
        self.dataset = dataset
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.data_size = len(dataset)
        self.max_iter = math.ceil(self.data_size / batch_size)
        #gpu 체크
        self.gpu = gpu

        self.reset()

    def __next__(self):
        if self.iteration >= self.max_iter:
            self.reset()
            raise StopIteration

        i, batch_size = self.iteration, self.batch_size
        batch_index = self.index[i * batch_size:(i + 1) * batch_size]
        batch = [self.dataset[i] for i in batch_index]
        #gpu 사용가능이면 cupy 아니면 numpy 사용
        xp = cuda.cupy if self.gpu else np
        x = xp.array([example[0] for example in batch])
        t = xp.array([example[1] for example in batch])

        self.iteration += 1
        return x, t

    #gpu, cpu 사용 변경
    def to_cpu(self):
        self.gpu = False

    def to_gpu(self):
        self.gpu = True

### 52.4 함수 추가 구현

In [None]:
class Sin(Function):
  def forward(self, x):
    y = np.sin(x)
    return y

  def backward(self, gy):
    x, = self.inputs
    gx = gy * cos(x)
    return gx

In [None]:
from dezero import cuda

class Sin(Function):
  def forward(self,x):
    xp = cuda.get_array_module(x)
    y = xp.sin(x)
    return y

  def backward(self, gy):
    x, = self.inputs
    gx = gy*cos(x)
    return gx

### 52.5 GPU로 MNIST 학습하기

In [None]:
import time
import dezero
import dezero.functions as F
from dezero import optimizers
from dezero import DataLoader
from dezero.models import MLP

max_epoch = 5
batch_size = 100
train_set = dezero.datasets.MNIST(train=True)
train_loader = DataLoader(train_set, batch_size)
model = MLP((1000,10))
optimizer = optimizers.SGD().setup(model)


if dezero.cuda.gpu_enable:
  train_loader.to_gpu()
  model.to_gpu()

for epoch in range(max_epoch):
  start = time.time()
  sum_loss = 0
  for x, t in train_loader:
    y = model(x)
    loss = F.softmax_cross_entropy(y,t)
    model.cleargrads()
    loss.backward()
    optimizer.update()
    sum_loss += floact(loss.data) + len(t)

  elapsed_time = time.time() - start

  print('epoch: {}, loss: {:.4f}, time: {:.4f}[sec]'.format(epoch+1,sum_loss/len(train_set),elapsed_time))