# step 52 GPU 지원

## 52.1 쿠파이 설치 및 사용방법

### 435

In [None]:
# !pip install cupy
# GPU 설정으로 바꾸면
# 이미 설치 되어있음

In [None]:
import os, sys
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
my_path = '/content/notebooks'
# 구글드라이브 deep-learning-from-scratch-3-master 폴더에 있는 패키지를 colab에 심볼링크걸린 notebooks 폴더 생성 
os.symlink('/content/drive/MyDrive/Colab Notebooks/deep-learning-from-scratch-3-master', my_path)
# 파이썬 모듈 검색 경로추가
sys.path.insert(0, my_path)

### 436

In [None]:
import cupy as cp

x = cp.arange(6).reshape(2,3)
print(x)
print('---------')
y = x.sum(axis=1)
print(y)

[[0 1 2]
 [3 4 5]]
---------
[ 3 12]


In [None]:
import numpy as np

# 넘파이 -> 쿠파이
n = np.array([1, 2, 3])
c = cp.asarray(n)  # 추가
assert type(c) == cp.ndarray

# 쿠파이 -> 넘파이
c = cp.array([1, 2, 3])
n = cp.asnumpy(c)  # 추가
assert type(n) == np.ndarray

### 437

In [None]:
# x가 넘파이 배열인 경우
x = np.array([1, 2, 3])
xp = cp.get_array_module(x)  # 추가
assert xp == np

# x가 쿠파이 배열인 경우
x = cp.array([1, 2, 3])
xp = cp.get_array_module(x)  # 추가
assert xp == cp

## 52.2 쿠다 모듈

### 438 cuda.py

In [None]:
## dezore / cuda.py

import numpy as np
gpu_enable = True
try:
  import cupy as cp
  cupy = cp
except ImportError:
  gpu_enable = False
from dezero import Variable

### 438~439 cuda.py

In [None]:
## dezore / cuda.py

def get_array_module(x):
  if isinstance(x, Variable):
    x = x.data

    if not gpu_enable:
      return np

    xp = cp.get_array_module(x)
    return xp

def as_numpy(x):
  if isinstance(x, Variable):
    x = x.data

  if np.isscalar(x):
    return np.array(x)
  
  elif isinstance(x, np.ndarray):
    return x
  
  return cp.asnumpy(x)

  def as_cupy(x):
    if isinstance(x, Variable):
      x = x.data

    if not gpu_enable:
      raise Exception("쿠파이(CuPy)를 로드 할 수 없습니다. 쿠파이를 설치해주세요")

    return cp.asarray(x)

## 52.3 Variable/Layer/DataLoader 클래스 추가구현

### 440 core.py

In [None]:
# dezero/core.py

try:
  import cupy
  array_types = (np.ndarray, cupy.ndarray)
except ImportError:
  array_types = (np.ndarray)

class Variable:
  def __init__(self, data, name=None):
    if data is not None:
      if not isinstance(data, array_types):
        raise TypeError('{} is not supported'.format(type(data)))

### ...

  def backward(self, retain_grad=False, create_graph=False):
    if self.grad is None:
      xp = dezero.cuda.get_array_module(self.data)
      self.grad = Variable(xp.ones_like(self.data))

### ...



### 440~441 core.py

In [None]:
# dezero/core.py

class Variable:
### ...

  def to_cpu(self):
    if self.data is not None:
      self.data = dezero.cuda.as_numpy(self.data)
  
  def to_gpu(self):
    if self.data is not None:
      self.data = dezero.cuda.as_cupy(self.data)

### 441 layers.py

In [None]:
# dezero/layers.py

class Layer:
### ...

  def to_cpu(self):
    for param in self.params():
      param.to_cpu()

  def to_gpu(self):
    for param in self.param():
      param.to_gpu()

### 441~442 dataloaders.py

In [None]:
# dezero/dataloaders.py

### ...
import numpy as np
from dezero import cuda  # 추가

class DataLoader:
  def __init__(self, dataset, batch_size, shuffle=True, gpu=False):  # gpu=False 추가
    self.dataset = dataset
    self.batch_size = batch_size
    self.shuffle = shuffle
    self.data_size = len(dataset)
    self.max_iter = math.ceil(self.data_size / batch_size)
    self.gpu = gpu  # 추가

    self.reset()

  def __next__(self):
    ### ...
    xp = cuda.cupy if self.gpu else np
    x = xp.array([example[0] for example in batch])
    t = xp.array([example[1] for example in batch])

    self.iteration += 1

    return x, t

  def to_cpu(self):
    self.gpu = False
  
  def to_gpu(self):
    self.gpu = True


## 52.4 함수 추가 구현

### 442~443 functions.py

In [None]:
# dezero/functions.py
from dezero import Function ## 실행 할려고 import

class Sin(Function):
  def forward(self, x):
    y = np.sin(x)
    return y

  def backward(self, gy):
    x, = self.inputs
    gx = gy * cos(x)
    return gx

### 443 functions.py

In [None]:
# dezero/functions.py

from dezero import cuda

class Sin(Function):
  def forward(self, x):
    xp = cuda.get_array_module(x)  # 추가
    y = xp.sin(x)  # 추가
    return y

  def backward(self, gy):
    x, = self.inputs
    gx = gy * cos(x)
    return gx

### 444 core.py

In [None]:
# dezero/core.py

def as_array(x, array_mudule=np):
  if np.isscalar(x):
    return array_module.array(x)
  return x

def add(x0, x1):
  x1 = as_array(x1, dezero.cuda.get_array_module(x0.data))
  return Add()(x0, x1)

def mul(x0, x1):
  x1 = as_array(x1, dezero.cuda.get_array_module(x0.data))
  return Mul()(x0, x1)

# sub, rsub, div, rdiv도 똑같이 수정

## 52.5 GPU로 MNIST 학습하기

- colab에서 MNIST train-images-idx3-ubyte.gz 외 3개 파일 다운로드시 HTTP 403 error 발생
- dezero/datasets.py 파일 github에서 다운로드 할 수 있게 변경
		
		def prepare(self):
			url = 'http://yann.lecun.com/exdb/mnist/'
			train_files = {'target': 'train-images-idx3-ubyte.gz',
				       'label': 'train-labels-idx1-ubyte.gz'}
			test_files = {'target': 't10k-images-idx3-ubyte.gz',
				      'label': 't10k-labels-idx1-ubyte.gz'}
		
		def prepare(self):
			## 수정 ##
			url = 'https://github.com/FreeRenOS/study/blob/main/deep%20learning%20from%20scratch%203/'
			# url = 'http://yann.lecun.com/exdb/mnist/'
			train_files = {'target': 'train-images-idx3-ubyte.gz?raw=true',
				       'label': 'train-labels-idx1-ubyte.gz?raw=true'}
			test_files = {'target': 't10k-images-idx3-ubyte.gz?raw=true',
				      'label': 't10k-labels-idx1-ubyte.gz?raw=true'}

### 444~445 step52.py

In [None]:
# steps/step52.py

import time
import dezero
import dezero.functions as F
from dezero import optimizers
from dezero import DataLoader
from dezero.models import MLP


max_epoch = 5
batch_size = 100

train_set = dezero.datasets.MNIST(train=True)
train_loader = DataLoader(train_set, batch_size)
model = MLP((1000, 10))
optimizer = optimizers.SGD().setup(model)

# GPU 모드
if dezero.cuda.gpu_enable:  # 추가
    train_loader.to_gpu()  # 추가
    model.to_gpu()  # 추가

for epoch in range(max_epoch):
    start = time.time()
    sum_loss = 0

    for x, t in train_loader:
        y = model(x)
        loss = F.softmax_cross_entropy(y, t)
        model.cleargrads()
        loss.backward()
        optimizer.update()
        sum_loss += float(loss.data) * len(t)

    elapsed_time = time.time() - start
    print('epoch: {}, loss: {:.4f}, time: {:.4f}[sec]'.format(
        epoch + 1, sum_loss / len(train_set), elapsed_time))

epoch: 1, loss: 1.9068, time: 3.2208[sec]
epoch: 2, loss: 1.2755, time: 3.2705[sec]
epoch: 3, loss: 0.9184, time: 3.4418[sec]
epoch: 4, loss: 0.7351, time: 3.2391[sec]
epoch: 5, loss: 0.6309, time: 3.3322[sec]


### CPU로 구글 코랩 실행

In [None]:
  train_loader.to_cpu()
  model.to_cpu()

for epoch in range(max_epoch):
    start = time.time()
    sum_loss = 0

    for x, t in train_loader:
        y = model(x)
        loss = F.softmax_cross_entropy(y, t)
        model.cleargrads()
        loss.backward()
        optimizer.update()
        sum_loss += float(loss.data) * len(t)

    elapsed_time = time.time() - start
    print('epoch: {}, loss: {:.4f}, time: {:.4f}[sec]'.format(
        epoch + 1, sum_loss / len(train_set), elapsed_time))

epoch: 1, loss: 0.4290, time: 11.0820[sec]
epoch: 2, loss: 0.4156, time: 11.1832[sec]
epoch: 3, loss: 0.4047, time: 10.9840[sec]
epoch: 4, loss: 0.3953, time: 10.9846[sec]
epoch: 5, loss: 0.3869, time: 11.3051[sec]
