In [80]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.datasets import fetch_california_housing #カリフォルニア
import pandas as pd
import matplotlib.pyplot as plt

import nnabla as nn

import nnabla.functions as F
import nnabla.parametric_functions as PF
import nnabla.solvers as S
import random
from nnabla.utils.data_iterator import data_iterator_simple
from sklearn.preprocessing import StandardScaler

%matplotlib inline

データの確認

In [81]:
california_housing = fetch_california_housing()

train_x = pd.DataFrame(california_housing.data, columns=california_housing.feature_names)
train_y = pd.Series(california_housing.target)

train_x.head()

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude
0,8.3252,41.0,6.984127,1.02381,322.0,2.555556,37.88,-122.23
1,8.3014,21.0,6.238137,0.97188,2401.0,2.109842,37.86,-122.22
2,7.2574,52.0,8.288136,1.073446,496.0,2.80226,37.85,-122.24
3,5.6431,52.0,5.817352,1.073059,558.0,2.547945,37.85,-122.25
4,3.8462,52.0,6.281853,1.081081,565.0,2.181467,37.85,-122.25


In [82]:
train_y.head()

0    4.526
1    3.585
2    3.521
3    3.413
4    3.422
dtype: float64

データの導入と正規化

In [83]:
dataset = fetch_california_housing()
Y = dataset.target
X = dataset.data
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.1)
len(X_train)

18576

In [84]:
scaler = StandardScaler()
X_n_train = scaler.fit_transform(X_train)
X_n_test = scaler.fit_transform(X_test)

データイテレーターを作成する

In [85]:
def batch_iterator(train, test, batch_size):
    n_samples = train.shape[0]
    indices = np.arange(n_samples)
    np.random.shuffle(indices)
    for start in range(0, n_samples, batch_size):
        end = min(start + batch_size, n_samples)
        batch_idx = indices[start:end]
        test_2d = test[batch_idx].reshape((batch_size,1))
        yield train[batch_idx], test_2d

ロジスティック回帰

In [86]:
nn.clear_parameters()
batch_size = 16
x = nn.Variable([batch_size, X_n_train.shape[1]])
with nn.parameter_scope("affine1"):
    y = PF.affine(x, 1)

In [87]:
t = nn.Variable([batch_size, 1])
loss = F.mean(F.squared_error(y, t))

In [88]:
x.d = X_n_train[0]
t.d = Y_train[0]

loss.forward()

print("Prediction score of 0-th data: {}".format(y.d[0]))
print("Loss: {}".format(loss.d))

Prediction score of 0-th data: [-0.38314193]
Loss: 8.51555347442627


In [89]:
for param in nn.get_parameters().values():
    param.grad.zero()

In [90]:
learning_rate = 1e-3
solver = S.Sgd(learning_rate)
solver.set_parameters(nn.get_parameters())

In [91]:
batches = batch_iterator(X_n_train, Y_train, batch_size)

In [92]:
for i in range(1000):
    x.d, t.d = batches.__next__()
    loss.forward()
    solver.zero_grad()
    loss.backward()
    solver.weight_decay(1e-5)
    solver.update()
    if i % 10 == 0:
        print(i, loss.d)

0 7.353587
10 6.311363
20 6.0606356
30 5.085209
40 5.3466043
50 4.92111
60 3.2472134
70 4.272411
80 7.4172106
90 4.8684754
100 3.8393502
110 5.6408
120 4.829241
130 2.1075602
140 4.7666416
150 2.2091126
160 2.8622274
170 2.3666496
180 1.3563931
190 0.92049026
200 4.550437
210 3.8182964
220 2.9058757
230 1.4384882
240 4.5377345
250 2.517311
260 1.7787886
270 1.8366418
280 1.8494825
290 3.0461195
300 1.5604349
310 2.05832
320 2.1480205
330 2.027561
340 2.1365733
350 3.3752072
360 1.4623072
370 1.2753295
380 2.4431272
390 0.89798605
400 1.7679424
410 0.95020115
420 2.5570846
430 0.8592206
440 0.96230966
450 0.6623578
460 1.5494459
470 1.8948207
480 1.1944307
490 1.7212318
500 0.72403634
510 0.903383
520 1.6106346
530 1.2284075
540 1.1429598
550 1.28247
560 1.2268145
570 0.83441204
580 1.3517497
590 1.1948903
600 0.8139111
610 1.6572385
620 0.8233595
630 0.25674552
640 1.3770931
650 0.94984776
660 0.95223516
670 0.27280295
680 1.0717738
690 1.4469321
700 1.5464377
710 0.9460999
720 0.50387

In [93]:
batches_test = batch_iterator(X_n_test, Y_test, batch_size)

In [94]:
x.d, t.d = batches_test.__next__()
y.forward()
print(t.d)
print(y.d)

[[5.00001]
 [2.181  ]
 [2.292  ]
 [2.122  ]
 [3.01   ]
 [3.75   ]
 [4.348  ]
 [1.293  ]
 [2.298  ]
 [2.293  ]
 [0.713  ]
 [2.865  ]
 [2.4    ]
 [0.675  ]
 [4.75   ]
 [2.588  ]]
[[5.1498938]
 [1.818877 ]
 [1.9236218]
 [1.6991767]
 [2.3873115]
 [1.7600789]
 [2.086279 ]
 [1.1377101]
 [1.9228973]
 [1.8751578]
 [1.6852793]
 [2.014149 ]
 [2.577976 ]
 [1.5737858]
 [1.0198708]
 [2.2828207]]


2層のニューラルネットワーク

In [None]:
nn.clear_parameters()
def Create_double_network():
    x = nn.Variable([None, X_train.shape[1]])
    with nn.parameter_scope("midle"):
        h = F.tanh(PF.affine(x, 32))
    with nn.parameter_scope("out"):
        y = PF.affine(h, 1)
    return x, y

#x, y = Create_double_network()
#print("Shapes:", x.shape, y.shape)

In [None]:
nn.get_parameters()

In [None]:
x.d = X_train
y.forward()
print(y.d)