<a href="https://colab.research.google.com/github/Chihyuk/Genie/blob/master/%EB%8D%B0%EC%9D%B4%ED%84%B0%EA%B3%BC%ED%95%99_19%EC%9E%A5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 딥러닝
- 한개 이상의 은닉층을 지닌 깊은 신경망
- 간단한 신경망을 포함한 다양한 신경망

# 텐서: n차원의 배열

In [None]:
import sys
sys.path.append('/content/drive/MyDrive/ds')

In [None]:
Tensor = list

In [None]:
from typing import List

In [None]:
def shape(tensor: Tensor) -> List[int]:
  sizes: List[int] = []
  while isinstance(tensor, list):
    sizes.append(len(tensor))
    tensor = tensor[0]
  return sizes

In [None]:
assert shape([1,2,3])

In [None]:
def is_1d(tensor: Tensor) -> bool:
  return not isinstance(tensor[0], list)

In [None]:
def tensor_sum(tensor: Tensor) -> float:
  if is_1d(tensor):
    return sum(tensor)
  else:
    return sum(tensor_sum(tensor_i) for tensor_i in tensor)

In [None]:
from typing import Callable

In [None]:
def tensor_apply(f: Callable[[float], float], tensor: Tensor) -> Tensor:
  if is_1d(tensor):
    return [f(x) for x in tensor]
  else:
    return [tensor_apply(f, tensor_i) for tnesor_i in tensor]

In [None]:
def fun_ex(x: float) -> float:
  return 2*x-3

In [None]:
def zeros_like(tensor: Tensor) -> Tensor:
  return tensor_apply(lambda _: 0.0, tensor)

In [None]:
def tensor_combine(f: Callable[[float, float], float],
                   t1: Tensor,
                   t2: Tensor) -> Tensor:
    if is_1d(t1):
        return [f(x, y) for x, y in zip(t1, t2)]
    else:
        return [tensor_combine(f, t1_i, t2_i)
                for t1_i, t2_i in zip(t1, t2)]

# 층 추상화

In [None]:
from typing import Iterable, Tuple

In [None]:
class Layer:
  def forward(self, input):
    raise NotImplementedError    
  def backward(self, gradient):
    raise NotImplementedError

In [None]:
def params(self)->Iterable[Tensor]:
    return()

def grads(self)->Iterable[Tensor]:
    return()

In [None]:
from neural_networks import sigmoid

In [None]:
class Sigmoid(Layer):
    def forward(self, input: Tensor) -> Tensor:
        """
        Apply sigmoid to each element of the input tensor,
        and save the results to use in backpropagation.
        """
        self.sigmoids = tensor_apply(sigmoid, input)
        return self.sigmoids

    def backward(self, gradient: Tensor) -> Tensor:
        return tensor_combine(lambda sig, grad: sig * (1 - sig) * grad,
                              self.sigmoids,
                              gradient)

# 선형 층

In [None]:
import random

from probability import inverse_normal_cdf

def random_uniform(*dims: int) -> Tensor:
    if len(dims) == 1:
        return [random.random() for _ in range(dims[0])]
    else:
        return [random_uniform(*dims[1:]) for _ in range(dims[0])]

In [None]:
def random_normal(*dims: int,
                  mean: float = 0.0,
                  variance: float = 1.0) -> Tensor:
    if len(dims) == 1:
        return [mean + variance * inverse_normal_cdf(random.random())
                for _ in range(dims[0])]
    else:
        return [random_normal(*dims[1:], mean=mean, variance=variance)
                for _ in range(dims[0])]

In [None]:
def random_tensor(*dims: int, init: str = 'normal') -> Tensor:
    if init == 'normal':
        return random_normal(*dims)
    elif init == 'uniform':
        return random_uniform(*dims)
    elif init == 'xavier':
        variance = len(dims) / sum(dims)
        return random_normal(*dims, variance=variance)
    else:
        raise ValueError(f"unknown init: {init}")

In [None]:
from linear_algebra import dot

class Linear(Layer):
    def __init__(self, input_dim: int, output_dim: int, init: str = 'xavier') -> None:
    
        self.input_dim = input_dim
        self.output_dim = output_dim

        self.w = random_tensor(output_dim, input_dim, init=init)

        self.b = random_tensor(output_dim, init=init)

In [None]:
def forward(self, input: Tensor) -> Tensor:
        self.input = input
        return [dot(input, self.w[o]) + self.b[o]
                for o in range(self.output_dim)]

 

In [None]:
def backward(self, gradient: Tensor) -> Tensor:

        self.b_grad = gradient

        self.w_grad = [[self.input[i] * gradient[o]
                        for i in range(self.input_dim)]
                       for o in range(self.output_dim)]

        return [sum(self.w[o][i] * gradient[o] for o in range(self.output_dim))
                for i in range(self.input_dim)]

In [None]:
def params(self) -> Iterable[Tensor]:
        return [self.w, self.b]

def grads(self) -> Iterable[Tensor]:
        return [self.w_grad, self.b_grad]


# 순차적 층으로 구성된 신경망

In [None]:
from typing import List

In [None]:
class Sequential(Layer):
  
    def __init__(self, layers: List[Layer]) -> None:
        self.layers = layers

    def forward(self, input):
        for layer in self.layers:
            input = layer.forward(input)
        return input

    def backward(self, gradient):
        for layer in reversed(self.layers):
            gradient = layer.backward(gradient)
        return gradient

    def params(self) -> Iterable[Tensor]:
        return (param for layer in self.layers for param in layer.params())

    def grads(self) -> Iterable[Tensor]:
        return (grad for layer in self.layers for grad in layer.grads())

# 손실함수와 최적화

In [None]:
class Loss:
    def loss(self, predicted: Tensor, actual: Tensor) -> float:
        raise NotImplementedError

    def gradient(self, predicted: Tensor, actual: Tensor) -> Tensor:
        raise NotImplementedError

In [None]:
class SSE(Loss):
    def loss(self, predicted: Tensor, actual: Tensor) -> float:
        squared_errors = tensor_combine(
            lambda predicted, actual: (predicted - actual) ** 2,
            predicted,
            actual)

        return tensor_sum(squared_errors)
    
    def gradient(self, predicted: Tensor, actual: Tensor) -> Tensor:
        return tensor_combine(
            lambda predicted, actual: 2 * (predicted - actual),
            predicted,
            actual)

In [None]:
class Optimizer:
    def step(self, layer: Layer) -> None:
        raise NotImplementedError

In [None]:
class GradientDescent(Optimizer):
    def __init__(self, learning_rate: float = 0.1) -> None:
        self.lr = learning_rate

    def step(self, layer: Layer) -> None:
        for param, grad in zip(layer.params(), layer.grads()):
            # Update param using a gradient step
            param[:] = tensor_combine(
                lambda param, grad: param - grad * self.lr,
                param,
                grad)

In [None]:
class Momentum(Optimizer):
    def __init__(self, learning_rate: float, momentum: float = 0.9) -> None:
        self.lr = learning_rate
        self.mo = momentum
        self.updates: List[Tensor] = []  # running average

    def step(self, layer: Layer) -> None:
        if not self.updates:
            self.updates = [zeros_like(grad) for grad in layer.grads()]

        for update, param, grad in zip(self.updates, layer.params(), layer.grads()):
           
            update[:] = tensor_combine(lambda u, g: self.mo * u + (1 - self.mo) * g, update, grad)

            param[:] = tensor_combine(lambda p, u: p - self.lr * u, param, update)

# XOR 문제 다시 풀기

In [None]:
xs = [[0., 0], [0., 1], [1., 0], [1., 1]]
ys = [[0.], [1.], [1.], [0.]]

In [None]:
random.seed(0)

In [None]:
net = Sequential([
    Linear(input_dim = 2, output_dim = 2),
    Sigmoid(),
    Linear(input_dim = 2, output_dim = 1)              
])

In [None]:
import tqdm

In [None]:
optimizer = GradientDescent(learning_rate = 0.1)
loss = SSE()

with tqdm.trange(3000) as t:
  for epoch in t:
    epoch_loss = 0.0

    for x, y in zip(xs, ys):
      predicted = net.forward(x)
      epoch_loss += loss.loss(predicted, y)
      gradient = loss.gradient(predicted, y)
      net.backward(gradient)

      optimizer.step(net)
    
    t.set_description(f"xor loss {epoch_loss:.3f}")


  0%|          | 0/3000 [00:00<?, ?it/s]


NotImplementedError: ignored

#다른 활성화 함수

In [None]:
import math

In [None]:
def tanh(x: float) -> float:
    if x < -100:  return -1
    elif x > 100: return 1

    em2x = math.exp(-2 * x)
    return (1 - em2x) / (1 + em2x)

In [None]:
class Tanh(Layer):
    def forward(self, input: Tensor) -> Tensor:
        self.tanh = tensor_apply(tanh, input)
        return self.tanh

    def backward(self, gradient: Tensor) -> Tensor:
        return tensor_combine(
            lambda tanh, grad: (1 - tanh ** 2) * grad,
            self.tanh,
            gradient)

In [None]:
class Relu(Layer):
    def forward(self, input: Tensor) -> Tensor:
        self.input = input
        return tensor_apply(lambda x: max(x, 0), input)

    def backward(self, gradient: Tensor) -> Tensor:
        return tensor_combine(lambda x, grad: grad if x > 0 else 0, self.input, gradient)

# Fizz Buzz

In [None]:
from neural_networks import binary_encode, fiz_buzz_encode, argmax

In [None]:
xs = [binary_encode(n) for n in range(101, 1024)]
ys = [fiz_buzz_encode(n) for n in range(101, 1024)]

In [None]:
NUM_HIDDEN = 25

In [None]:
random.seed(0)

In [None]:
net = Sequential([  
    Linear(input_dim = 10, output_dim = NUM_HIDDEN, init = 'uniform'),
    Tanh(),
    Linear(input_dim = NUM_HIDDEN, output_dim = 4, init = 'uniform'),
    Sigmoid(), 
    ])

In [None]:
def fizzbuzz_accuracy(low: int, hi: int, net: Layer) -> float:
  num_correct = 0
  for n in range(low, hi):
    x = binary_encode(n)
    predicted = argmax(net.forward(x))
    actual = argmax(fizz_buzz_encode)
    if predicted == actual:
      num_correct += 1
  
  return num_correct / (hi - low)

In [None]:
optimizer = Momentum(learning_rate = 0.1, momentum = 0.9)
loss = SSE()

In [None]:
with tqdm.trange(3000) as t:
  for epoch in t:
    epoch_loss = 0.0

    for x, y in zip(xs, ys):
      predicted = net.forward(x)
      epoch_loss += loss.loss(predicted, y)
      gradient = loss.gredient(predicted, y)
      net.backward(gredient)

      optimizer.step(net)
    
    accuracy = fizzbuzz_accuracy(101, 1024, net)
    t.set_description(f"fb loss: {epoch_loss: .2f} acc: {accuracy: .2f}")

In [None]:
print("test result", fizzbuzz_accuracy(1, 101, net))

# SoftMax와 Cross_Entry

In [None]:
def softmax(tensor: Tensor) -> Tensor:
    """Softmax along the last dimension"""
    if is_1d(tensor):
        largest = max(tensor)
        exps = [math.exp(x - largest) for x in tensor]

        sum_of_exps = sum(exps)                 
        return [exp_i / sum_of_exps for exp_i in exps]              
    else:
        return [softmax(tensor_i) for tensor_i in tensor]

In [None]:
class SoftmaxCrossEntropy(Loss):
    def loss(self, predicted: Tensor, actual: Tensor) -> float:
        probabilities = softmax(predicted)

        likelihoods = tensor_combine(lambda p, act: math.log(p + 1e-30) * act,
                                     probabilities,
                                     actual)

        return -tensor_sum(likelihoods)

    def gradient(self, predicted: Tensor, actual: Tensor) -> Tensor:
        probabilities = softmax(predicted)

        return tensor_combine(lambda p, actual: p - actual,
                              probabilities,
                              actual)

# 드롭아웃