In [1]:
import numpy as np

## 오차제곱합

$E=\frac{1}{2} \sum_{k}(y_{k}-t_{k})^{2}$

In [2]:
def sum_squares_error(y, t):
    #### 코드 ####

In [6]:
t = [0,0,1,0,0,0,0,0,0,0]
y = [0.1,0.05,0.6,0.0,0.05,0.1,0.0,0.1,0.0,0.0]
assert sum_squares_error(np.array(y), np.array(t)) == 0.09750000000000003, '오답입니다.'

y = [0.1,0.05,0.1,0.0,0.05,0.1,0.0,0.6,0.0,0.0]
assert sum_squares_error(np.array(y), np.array(t)) == 0.5975, '오답입니다.'

## 교차 엔트로피 오차

$E=-\sum_{k} t_{k}\log y_{k}$

In [None]:
def cross_entropy_error(y, t):
    delta = 1e-7
    #### 코드 ####

In [None]:
t = [0,0,1,0,0,0,0,0,0,0]
y = [0.1,0.05,0.6,0.0,0.05,0.1,0.0,0.1,0.0,0.0]
assert cross_entropy_error(np.array(y), np.array(t)) == 0.510825457099338, '오답입니다.'

y = [0.1,0.05,0.1,0.0,0.05,0.1,0.0,0.6,0.0,0.0]
assert cross_entropy_error(np.array(y), np.array(t)) == 2.302584092994546, '오답입니다.'

## 배치용 교차 엔트로피 오차

In [None]:
def cross_entropy_error(y, t):
    delta = 1e-7
    #### 코드 ####

In [None]:
t = [0,0,1,0,0,0,0,0,0,0]
y = [0.1,0.05,0.6,0.0,0.05,0.1,0.0,0.1,0.0,0.0]
assert cross_entropy_error(np.array(y), np.array(t)) == 0.510825457099338, '오답입니다.'

y = [0.1,0.05,0.1,0.0,0.05,0.1,0.0,0.6,0.0,0.0]
assert cross_entropy_error(np.array(y), np.array(t)) == 2.302584092994546, '오답입니다.'

t = [2]
y = [0.1,0.05,0.6,0.0,0.05,0.1,0.0,0.1,0.0,0.0]
assert cross_entropy_error(np.array(y), np.array(t)) == 0.510825457099338, 't는 원-핫 벡터가 아닐 수 있습니다.'

In [1]:
import torch
from transformers import AutoTokenizer, AutoModelForQuestionAnswering

context = """
🤗 Transformers is backed by the three most popular deep learning libraries — Jax, PyTorch, and TensorFlow — with a seamless integration
between them. It's straightforward to train your models with one before loading them for inference with the other.
"""
question = "Which deep learning libraries back 🤗 Transformers?"

model_checkpoint = "distilbert-base-cased-distilled-squad"
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
model = AutoModelForQuestionAnswering.from_pretrained(model_checkpoint)

inputs = tokenizer(question, context, return_tensors="pt")
outputs = model(**inputs)

sequence_ids = inputs.sequence_ids()
# Mask everything apart from the tokens of the context
mask = [i != 1 for i in sequence_ids]
# Unmask the [CLS] token
mask[0] = False
mask = torch.tensor(mask)[None]

Downloading tokenizer_config.json:   0%|          | 0.00/29.0 [00:00<?, ?B/s]

Downloading config.json:   0%|          | 0.00/473 [00:00<?, ?B/s]

Downloading vocab.txt:   0%|          | 0.00/213k [00:00<?, ?B/s]

Downloading tokenizer.json:   0%|          | 0.00/436k [00:00<?, ?B/s]

Downloading model.safetensors:   0%|          | 0.00/261M [00:00<?, ?B/s]

In [20]:
torch.tensor([i != 1 for i in sequence_ids])[None]

tensor([[ True,  True,  True,  True,  True,  True,  True,  True,  True,  True,
         False, False, False, False, False, False, False, False, False, False,
         False, False, False, False, False, False, False, False, False, False,
         False, False, False, False, False, False, False, False, False, False,
         False, False, False, False, False, False, False, False, False, False,
         False, False, False, False, False, False, False, False, False, False,
         False, False, False, False, False, False,  True]])

## 수치 미분

$$\frac {df(x)} {dx} =\lim_{h \to 0} \frac{f(x+h)-f(x-h)}{2h}$$

In [None]:
def numerical_diff(f, x):
    h = 1e-4
    #### 코드 ####

In [None]:
def function_1(x):
    return 0.01*x**2 + 0.1*x

assert numerical_diff(function_1, 5) == 0.1999999999990898, '미분 함수를 다시 확인하세요.'
assert numerical_diff(function_1, 10) == 0.2999999999986347, '미분 함수를 다시 확인하세요.'

## 기울기

In [None]:
def numerical_gradient(f, x:np.ndarray):
    h = 1e-4
    #### 시작 ####

    #### 끝 ####
    return grad

function_2
$$f(x_{0}, x_{1})=x^{2}_{0} + x^{2}_{1}$$

In [None]:
def function_2(x):
    return x[0]**2 + x[1]**2

y = numerical_gradient(function_2, np.array([3.0, 4.0]))
assert np.allclose(y,  np.array([6., 8.])), '오답입니다.'

y = numerical_gradient(function_2, np.array([0.0, 2.0]))
assert np.allclose(y,  np.array([0., 4.])), '오답입니다.'

y = numerical_gradient(function_2, np.array([3.0, 0.0]))
assert np.allclose(y,  np.array([6., 0.])), '오답입니다.'