/
DL.jl
177 lines (148 loc) · 4.71 KB
/
DL.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
module DL
using Printf
using LinearAlgebra
import Base.*, Base.+
export (*), (+), ReLU, VariableTensor, MSE, ConstantTensor, backward, zero_grad, SGD, Linear, Tensor
# Tensor
abstract type Tensor end
abstract type LeafTensor <: Tensor end
abstract type FunctionalTensor <: Tensor end
#= 掛け算 t = t1 * t2 =#
mutable struct MulTensor <: FunctionalTensor
x
tensors::Array{Tensor, 1}
end
(*)(t3::Tensor, t4::Tensor) = abstract_operator(MulTensor, (x1, x2) -> x1 * x2, [t3, t4])
# t2 <- t1 <- t3 * t4
function grad_fn(t1::MulTensor, grad_up)
t3, t4 = t1.tensors
# dt2/dt1 * dt1/dt3 = grad_up * t4.x
# dt2/dt1 * dt1/dt4 = grad_up * t3.x
return [t4.x * grad_up, grad_up * t3.x]
end
#= 足し算 =#
mutable struct AddTensor <: FunctionalTensor
x
tensors::Array{Tensor, 1}
end
# forward
(+)(t3::Tensor, t4::Tensor) = abstract_operator(AddTensor, (x1, x2) -> x1 .+ x2, [t3, t4])
# backward t2 <- t1 <- t3 + t4
function grad_fn(t1::AddTensor, grad_up)
t3, t4 = t1.tensors
# dt2/dt1 * dt1/dt3 = grad_up * I
# dt2/dt1 * dt1/dt4 = grad_up * I
return [grad_up, grad_up]
end
#= ReLU function t = ReLU(t1) =#
mutable struct ReLUTensor <: FunctionalTensor
x
tensors::Array{Tensor, 1}
end
# forward
function ReLU(t::Tensor)
return abstract_operator(ReLUTensor, x -> x .* (x .> 0), [t])
end
# backward t2 <- t1 <- ReLU(t3)
function grad_fn(t1::ReLUTensor, grad_up)
t3 = t1.tensors[1]
# dt2/dt1 * dt1/dt3 = grad_up * d ReLU(x)/dx
return [grad_up .* (t3.x' .> 0)]
end
#= Mean Squared Error t = MSE(t1, t2) =#
mutable struct MSETensor <: FunctionalTensor
x
tensors::Array{Tensor, 1}
end
# forward
function MSE(t1::Tensor, t2::Tensor)
mse = (x1, x2) -> sum((x1 - x2).^2) / length(x1)
return abstract_operator(MSETensor, mse, [t1, t2])
end
# t2 <- t1 <- MSE(t3, t4)
function grad_fn(t1::MSETensor, grad_up)
t3, t4 = t1.tensors
# dt2/dt1 * dt1/dt3 = grad_up * d/dx sum((x - y)^2) / length(x)
N = length(t3.x)
dt1dt3 = 2*(t3.x - t4.x)' / N
dt1dt4 = 2*(t4.x - t3.x)' / N
return [grad_up * dt1dt3, grad_up * dt1dt4]
end
#= t2 <- t1 末端の部分 微分あり =#
mutable struct VariableTensor <: LeafTensor
x
grad
VariableTensor(x) = new(x, zero(x))
end
#= ただの定数 =#
mutable struct ConstantTensor <: LeafTensor
x
end
# backward 計算
# 最初
function backward(t::Tensor)
backward(t, 1)
end
# 再帰
function backward(t::Tensor, grad_up)
# 微分はtが持つTensorsの数だけあるから,リストで返すことにする.
vgrad = grad_fn(t, grad_up)
# 微分と対応するTensorのペアをつくる
pair = zip(t.tensors, vgrad)
# 掘り下げ
map(x -> backward(x[1], x[2]), pair)
end
# ConstantTensorとか
function backward(t::LeafTensor, grad_up)
end
# VariableTensorは勾配を蓄積していく
function backward(t::VariableTensor, grad_up)
t.grad += grad_up'
end
# 抽象化された関数
# 演算子を計算するときにConstant同士の演算の出力はConstantであるようにする
# Array{T, 1} where T <: Tensor という書き方でTensor以下のタイプのTのArrayを網羅できる
function abstract_operator(EnzansiTensor::DataType, f::Function, tensors::Array{T, 1} where T <: Tensor)
# Tensor.xを展開してfに代入する
y = f(map(tensor -> tensor.x, tensors)...)
return EnzansiTensor(y, tensors)
end
function abstract_operator(EnzansiTensor::DataType, f::Function, tensors::Array{ConstantTensor, 1})
y = f(map(tensor -> tensor.x, tensors)...)
return ConstantTensor(y)
end
function tensor_map(f::Function, t::Tensor)
#= 任意の関数をVariableTensorに作用させる
毎回掘り下げる必要はないと思うので考えなおす余地あり
=#
# 掘り下げていく
map(t -> tensor_map(f, t), t.tensors)
end
function tensor_map(f::Function, t::LeafTensor)
# 末端だけどVariableTensor以外のtypeはパラメータを変えない
nothing
end
function tensor_map(f::Function, t::VariableTensor)
# VariableTensorにだけ作用させる
f(t)
end
function zero_grad(tensor::Tensor)
f = t -> t.grad = zero(t.grad)
tensor_map(f, tensor)
end
function SGD(lr=1e-3)
f = t::Tensor -> t.x = t.x - lr * t.grad
optimizer = t::Tensor -> tensor_map(f, t)
return optimizer
end
function Linear(in_features::Int64, out_features::Int64)
W = VariableTensor(randn(out_features, in_features))
b = VariableTensor(zeros(out_features, 1))
@show b.x
return x -> W*x + b
end
function printarray(x)
show(stdout, "text/plain", x)
@printf "\n"
end
end