In [1]:
import numpy as np
import torch

# 常见loss与激活函数 (pytorch与numpy实现)

## Sigmoid

### numpy 实现

In [2]:
def Sigmoid_np(x):
    return 1 / (1 + np.exp(-x))

### torch 实现

In [3]:
def Sigmoid_ts(x):
    return 1 / (1 + torch.exp(-x))

### 代码测试

In [4]:
x_tensor = torch.rand(8)
print("x_tensor", x_tensor)
x_tensor_out = Sigmoid_ts(x_tensor)
print("x_tensor_out", x_tensor_out)


x_tensor tensor([0.0012, 0.4065, 0.5952, 0.8800, 0.3590, 0.5262, 0.9075, 0.7036])
x_tensor_out tensor([0.5003, 0.6003, 0.6446, 0.7068, 0.5888, 0.6286, 0.7125, 0.6690])


In [5]:

x_ndarray = np.random.rand(8)
print("x_ndarray:", x_ndarray)
x_ndarray_out = Sigmoid_np(x_ndarray)
print("x_ndarray_out:", x_ndarray_out)



x_ndarray: [0.35454213 0.93152769 0.52653562 0.40545007 0.40002454 0.29607753
 0.09014221 0.28949042]
x_ndarray_out: [0.5877186  0.71738512 0.62867474 0.59999639 0.59869356 0.57348336
 0.5225203  0.57187137]


## ReLU

### numpy 实现

In [6]:
def ReLU_np(x):
    return np.maximum(0, x)

### torch 实现

In [7]:
def ReLU_ts(x):
    zero = torch.empty_like(x)
    return torch.max(zero, x)

### 代码测试

In [8]:
x_ndarray_out = ReLU_np(x_ndarray)
print("x_ndarray:", x_ndarray)
print("x_ndarray_out:", x_ndarray_out)

x_ndarray: [0.35454213 0.93152769 0.52653562 0.40545007 0.40002454 0.29607753
 0.09014221 0.28949042]
x_ndarray_out: [0.35454213 0.93152769 0.52653562 0.40545007 0.40002454 0.29607753
 0.09014221 0.28949042]


In [9]:
x_tensor_out = ReLU_ts(x_tensor)
print("x_tensor", x_tensor)
print("x_tensor_out", x_tensor_out)

x_tensor tensor([0.0012, 0.4065, 0.5952, 0.8800, 0.3590, 0.5262, 0.9075, 0.7036])
x_tensor_out tensor([0.0012, 0.4065, 0.5952, 0.8800, 0.3590, 0.5262, 0.9075, 0.7036])


## swish

### numpy 实现

In [14]:
def swish_np(x, beta=1.0):
    return x * Sigmoid_np(beta * x)

### torch 实现

In [11]:
def swish_ts(x, beta=1.0):
    return x * Sigmoid_ts(beta * x)

### 代码测试

In [12]:
x_tensor_out = swish_ts(x_tensor)
print("x_tensor", x_tensor)
print("x_tensor_out", x_tensor_out)

x_tensor tensor([0.0012, 0.4065, 0.5952, 0.8800, 0.3590, 0.5262, 0.9075, 0.7036])
x_tensor_out tensor([6.1150e-04, 2.4402e-01, 3.8366e-01, 6.2199e-01, 2.1136e-01, 3.3073e-01,
        6.4662e-01, 4.7074e-01])


In [15]:
x_ndarray_out = swish_np(x_ndarray)
print("x_ndarray:", x_ndarray)
print("x_ndarray_out:", x_ndarray_out)

x_ndarray: [0.35454213 0.93152769 0.52653562 0.40545007 0.40002454 0.29607753
 0.09014221 0.28949042]
x_ndarray_out: [0.208371   0.66826411 0.33101964 0.24326858 0.23949211 0.16979554
 0.04710113 0.16555128]


## SwiGLU(虽然既不是loss也不是激活函数，但是这里还是给出np的实现)

### numpy 实现

In [32]:
def SwiGLU_np(w1, w2, w3, b1, b2, b3, x):
    bs, seq, dim = x.shape
    x = x.reshape(-1, dim)
    gate_score = np.dot(x, w1) + b1
    down_prj = np.dot(x, w3) + b3
    return np.dot(swish_np(gate_score) * down_prj, w2.T) + b2


### 代码测试

In [33]:
bs = 2
seq = 4
dim = 64
a = np.random.rand(bs, seq, dim)# bs, seq, dim
a_orishape = a.shape
w1 = np.ones((64, 128))
w2 = np.ones((64, 128))
w3 = np.ones((64, 128))
b1 = np.zeros(128)
b2 = np.zeros(64)
b3 = np.zeros(128)
a_out = SwiGLU_np(w1, w2, w3, b1, b2, b3, a).reshape(a_orishape)
a_out.shape

(2, 4, 64)

## Softmax

### numpy 实现

In [51]:
def softmax_np(x):
    ori = x.shape
    x = x.reshape(-1, ori[-1])
    exp_x = np.exp(x)
    bottom = exp_x.sum(axis=-1, keepdims = True)
    return (exp_x / bottom).reshape(ori)

### 减去最大值的标准实现

In [49]:
def softmax_np_s(x):
    ori = x.shape
    x = x.reshape(-1, ori[-1])
    x_max = np.max(x, axis=-1, keepdims=True)
    exp_x = np.exp(x - x_max)
    bottom = exp_x.sum(axis=-1, keepdims = True)
    return (exp_x / bottom).reshape(ori)

### torch 实现

In [55]:
def softmax_ts(x):
    ori = x.shape
    x.view(-1, ori[-1])
    x_max, _ = x.max(dim=-1, keepdims=True)
    exp_x = torch.exp(x - x_max)
    bottom = exp_x.sum(dim=-1, keepdims=True)
    return (exp_x / bottom).view(ori)

### 代码测试

In [56]:
x = np.array([[1, 2, 3], [4, 5, 6]])
x_out = softmax_np_s(x)
x_out

array([[0.09003057, 0.24472847, 0.66524096],
       [0.09003057, 0.24472847, 0.66524096]])

In [57]:
x = torch.Tensor([[1, 2, 3], [4, 5, 6]])
x_out = softmax_ts(x)
x_out

tensor([[0.0900, 0.2447, 0.6652],
        [0.0900, 0.2447, 0.6652]])

## Cross_Entropy

### numpy 实现

### torch 实现

## KL散度

### numpy 实现

### torch 实现

### 代码测试

## NLL

### numpy 实现

### torch 实现

### 代码测试