In [None]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.datasets import fetch_california_housing #カリフォルニア
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error

import pandas as pd
import matplotlib.pyplot as plt

import nnabla as nn
import nnabla.functions as F
import nnabla.parametric_functions as PF
import nnabla.solvers as S

%matplotlib inline

データの確認

In [None]:
california_housing = fetch_california_housing()

train_x = pd.DataFrame(california_housing.data, columns=california_housing.feature_names)
train_y = pd.Series(california_housing.target)

train_x.head()

In [None]:
train_y.head()

データの導入と正規化

In [None]:
dataset = fetch_california_housing()
Y = dataset.target
X = dataset.data
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.1)
len(X_train)

In [None]:
scaler = StandardScaler()
X_n_train = scaler.fit_transform(X_train)
X_n_test = scaler.fit_transform(X_test)

データイテレーターを作成する

In [None]:
def batch_iterator(data, label, batch_size):
    n_samples = data.shape[0]
    indices = np.arange(n_samples)
    np.random.shuffle(indices)
    for start in range(0, n_samples, batch_size):
        end = min(start + batch_size, n_samples)
        batch_idx = indices[start:end]
        label_2d = label[batch_idx].reshape((batch_size, 1))
        yield data[batch_idx], label_2d

ロジスティック回帰

In [None]:
nn.clear_parameters()
batch_size = 64
x = nn.Variable([batch_size, X_n_train.shape[1]])
with nn.parameter_scope("affine"):
    y = PF.affine(x, 1)

In [None]:
t = nn.Variable([batch_size, 1])
loss = F.mean(F.squared_error(y, t))

In [None]:
x.d = X_n_train[0]
t.d = Y_train[0]

loss.forward()

In [None]:
for param in nn.get_parameters().values():
    param.grad.zero()

In [None]:
learning_rate = 1e-3
solver = S.Sgd(learning_rate)
solver.set_parameters(nn.get_parameters())

In [None]:
batches = batch_iterator(X_n_train, Y_train, batch_size)

In [None]:
for i in range(250):
    x.d, t.d = batches.__next__()
    loss.forward()
    solver.zero_grad()
    loss.backward()
    solver.weight_decay(1e-5)
    solver.update()
    if i % 10 == 0:
        print(i, loss.d)

In [None]:
batches_test = batch_iterator(X_n_test, Y_test, batch_size)
x.d, t.d = batches_test.__next__()
y.forward()
print(mean_absolute_error(t.d, y.d))

In [None]:
result = pd.DataFrame({
    "predict": np.reshape(y.d, (-1,)),
    "actual": np.reshape(t.d, (-1,))
})
limit = np.max(t.d)

result.plot.scatter(x="actual", y="predict", xlim=(0, limit), ylim=(0, limit))
plt.show()

2層のニューラルネットワーク

In [None]:
nn.clear_parameters()
def Create_double_network():
    x = nn.Variable([batch_size, X_n_train.shape[1]])
    with nn.parameter_scope("tanh"):
        h = F.tanh(PF.affine(x, 32))
    with nn.parameter_scope("affine1"):
        y = PF.affine(h, 1)
    return x, y

In [None]:
x, y = Create_double_network()
loss = F.mean(F.squared_error(y, t))

In [None]:
batches = batch_iterator(X_n_train, Y_train, batch_size)

In [None]:
def training(steps, learning_rate):
    solver = S.Sgd(learning_rate)
    solver.set_parameters(nn.get_parameters())
    for i in range(steps):
        x.d, t.d = batches.__next__()
        loss.forward()
        solver.zero_grad()  # Initialize gradients of all parameters to zero.
        loss.backward()
        solver.weight_decay(1e-5)  # Applying weight decay as an regularization
        solver.update()
        if i % 10 == 0:  # Print for each 10 iterations
            print(i, loss.d)


# Training
training(250, 1e-2)

In [None]:
batches_test = batch_iterator(X_n_test, Y_test, batch_size)
x.d, t.d = batches_test.__next__()
y.forward()
print(mean_absolute_error(t.d, y.d))

In [None]:
result = pd.DataFrame({
    "predict": np.reshape(y.d, (-1,)),
    "actual": np.reshape(t.d, (-1,))
})
limit = np.max(t.d)

result.plot.scatter(x="actual", y="predict", xlim=(0, limit), ylim=(0, limit))
plt.show()