<a href="https://colab.research.google.com/github/Sasaki0Kojiro/hello-world/blob/main/pytorch1_5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np

N,D_in,H,D_out = 64,1000,100,10

x = np.random.randn(N,D_in)
y = np.random.randn(N,D_out)

w1 = np.random.randn(D_in,H)
w2 = np.random.randn(H,D_out)

learning_rate = 1e-6
for t in range(500):
  h = x.dot(w1)
  h_relu = np.maximum(h,0)
  y_pred = h_relu.dot(w2)

  loss = np.square(y_pred - y).sum()
  if t%100 == 99:
    print(t,loss)

  grad_y_pred = 2.0 * (y_pred -y)
  grad_w2 = h_relu.T.dot(grad_y_pred)
  grad_h_relu = grad_y_pred.dot(w2.T)
  grad_h = grad_h_relu.copy()
  grad_h[h<0] = 0
  grad_w1 = x.T.dot(grad_h)

  w1 -= learning_rate * grad_w1
  w2 -= learning_rate * grad_w2

99 614.9866900565057
199 4.015607218946838
299 0.04346431851052691
399 0.0005854752562892208
499 8.897009498990837e-06


In [2]:
w1_unlearned = np.random.randn(D_in,H)
w2_unlearned = np.random.randn(H,D_out)

h = x.dot(w1_unlearned)
h_relu = np.maximum(h,0)
y_pred = h_relu.dot(w2_unlearned)

print(f"学習能力:{np.round(y_pred[0],decimals=2)}")

h = x.dot(w1)
h_relu = np.maximum(h,0)
y_pred = h_relu.dot(w2)
print(f"学習後出力:{np.round(y_pred[0],decimals=2)}")

print(f"目的の主力:{np.round(y[0],decimals=2)}")

学習能力:[ 183.93   77.46  285.47  -65.16  187.69   31.24  249.5  -486.58   74.73
   34.53]
学習後出力:[-2.14 -0.59  0.5   0.97 -1.37  0.84  1.82  0.05  1.94 -1.03]
目的の主力:[-2.14 -0.59  0.5   0.97 -1.37  0.84  1.82  0.05  1.94 -1.03]


In [3]:
import torch

dtype = torch.float
device = torch.device("cpu")
N,D_in,H,D_out = 64,1000,100,10

x = torch.randn(N,D_in,device=device,dtype = dtype)
y = torch.randn(N,D_out,device=device,dtype = dtype)

w1 = torch.randn(D_in ,H, device=device,dtype=dtype)
w2 = torch.randn(H,D_out,device=device,dtype=dtype)

In [4]:
learning_rate = 1e-6
for t in range(500):
  h = x.mm(w1)
  h_relu = h.clamp(min=0)
  y_pred = h_relu.mm(w2)

  loss = (y_pred - y).pow(2).sum().item()
  if t % 100 == 99:
    print(t,loss)
  
  grad_y_pred = 2.0 * (y_pred - y)
  grad_w2 = h_relu.t().mm(grad_y_pred)
  grad_h_relu = grad_y_pred.mm(w2.t())
  grad_h = grad_h_relu.clone()
  grad_h[h<0] = 0
  grad_w1 = x.t().mm(grad_h)

  w1 -= learning_rate * grad_w1
  w2 -= learning_rate * grad_w2

99 390.0259094238281
199 0.9371556639671326
299 0.0035687668714672327
399 0.00010009153629653156
499 2.154137473553419e-05


In [9]:
import torch

dtype = torch.float
device = torch.device("cpu")

N,D_in,H,D_out = 64,1000,100,10

x = torch.randn(N,D_in, device=device,dtype=dtype,requires_grad=True)
y = torch.randn(N,D_out, device=device,dtype=dtype,requires_grad=True)

w1 = torch.randn(D_in, H , device = device,dtype = dtype,requires_grad=True)
w2 = torch.randn(H,D_out,device = device,dtype = dtype,requires_grad=True)

learning_rate = 1e-6
for t in range(500):
  # 順伝播
  y_pred = x.mm(w1).clamp(min=0).mm(w2)

  # 損失の計算と表示
  loss = (y_pred - y).pow(2).sum()
  if t % 100 == 99:
    print(t,loss.item())
  # 逆伝播
  loss.backward()
  # 確率的勾配降下法
  with torch.no_grad():
    w1 -= learning_rate * w1.grad
    w2 -= learning_rate * w2.grad
    # 手動で勾配0
    w1.grad.zero_()
    w2.grad.zero_()

99 359.0576171875
199 1.4028332233428955
299 0.011139712296426296
399 0.0003039742005057633
499 5.01127251482103e-05


In [10]:
import torch

In [11]:
class MyReLU(torch.autograd.Function):
  @staticmethod

  def forward(ctx,input):
    ctx.save_for_backward(input)
    return input.clamp(min = 0)

  @staticmethod

  def backward(ctx,grad_output):
    input, = ctx.saved_tensors
    grad_input = grad_output.clone()
    grad_input[input < 0 ] = 0
    return grad_input

In [12]:
dtype = torch.float
device = torch.device("cpu")

N,D_in,H,D_out = 64,1000,100,10
# 乱数により入力データと目標となる出力データを表すTensorを生成
x = torch.randn(N, D_in, device=device, dtype=dtype)
y = torch.randn(N, D_out, device=device, dtype=dtype)

# 乱数による重みを表すTensorの定義
w1 = torch.randn(D_in, H, device=device, dtype=dtype, requires_grad=True)
w2 = torch.randn(H, D_out, device=device, dtype=dtype, requires_grad=True)

learning_rate = 1e-6

for t in range(500):
  #関数を適用するためにapplyメソッドをつかう
  relu = MyReLU.apply

  #順伝播
  y_pred = relu(x.mm(w1)).mm(w2)

  #損失
  loss = (y_pred - y).pow(2).sum()
  if t % 100 == 99:
    print(t,loss.item())
  
  #逆伝播
  loss.backward()

  #確率的勾配降下法
  with torch.no_grad():
    w1 -= learning_rate * w1.grad
    w2 -= learning_rate * w2.grad

    w1.grad.zero_()
    w2.grad.zero_()

99 615.6991577148438
199 2.9851415157318115
299 0.025481903925538063
399 0.0004939058562740684
499 6.01283936703112e-05


In [14]:
import torch

N,D_in,H,D_out = 64,1000,100,10

x = torch.randn(N,D_in)
y = torch.randn(N,D_out)

model = torch.nn.Sequential(
    torch.nn.Linear(D_in,H),
    torch.nn.ReLU(),
    torch.nn.Linear(H,D_out),
)

loss_fn = torch.nn.MSELoss(reduction='sum')

learning_rate = 1e-4
for t in range(500):
  y_pred = model(x)

  loss = loss_fn(y_pred,y)
  if t%100 == 99:
    print(t,loss.item())
  
  model.zero_grad()

  loss.backward()

  with torch.no_grad():
    for param in model.parameters():
      param -= learning_rate * param.grad

99 2.1372592449188232
199 0.027679575607180595
299 0.0006906291819177568
399 2.116992800438311e-05
499 7.147733640522347e-07


In [15]:
import torch

N,D_in,H,D_out = 64,1000,100,10

x = torch.randn(N,D_in)
y = torch.randn(N,D_out)

model = torch.nn.Sequential(
    torch.nn.Linear(D_in,H),
    torch.nn.ReLU(),
    torch.nn.Linear(H,D_out),
)

loss_fn = torch.nn.MSELoss(reduction='sum')
learning_rate = 1e-4
optimizer = torch.optim.Adam(model.parameters(),lr = learning_rate)

for t in range(500):
  y_pred = model(x)

  loss = loss_fn(y_pred,y)
  if t%100 == 99:
    print(t,loss.item())
  
  optimizer.zero_grad()

  loss.backward()

  optimizer.step()

99 47.25872039794922
199 1.0742422342300415
299 0.019621217623353004
399 0.00024726358242332935
499 1.3597938277598587e-06


In [19]:
class TwoLayerNet(torch.nn.Module):
  def __init__(self,D_in,H,D_out):
    super(TwoLayerNet,self).__init__()
    self.linear1=torch.nn.Linear(D_in,H)
    self.linear2=torch.nn.Linear(H,D_out)
  def forward(self,x):
    h_relu = self.linear1(x).clamp(min=0)
    y_pred = self.linear2(h_relu)
    return y_pred

N,D_in,H,D_out = 64,1000,100,10

x = torch.randn(N,D_in)
y = torch.randn(N,D_out)

model = TwoLayerNet(D_in,H,D_out)

criterion = torch.nn.MSELoss(reduction='sum')
optimizer = torch.optim.SGD(model.parameters(),lr=1e-4)
for t in range(500):
  y_pred = model(x)

  loss = criterion(y_pred,y)
  if t % 100 == 99:
    print(t,loss.item())
  
  optimizer.zero_grad()
  loss.backward()
  optimizer.step()

99 3.038952112197876
199 0.06008504331111908
299 0.0028584206011146307
399 0.00020964082796126604
499 1.8087050193571486e-05
