In [1]:
import pandas as pd
import numpy as np
import torch as pt
from scipy.linalg import qr
import matplotlib.pyplot as plt
%matplotlib inline

#pt.set_default_tensor_type('torch.FloatTensor')
pt.set_default_tensor_type('torch.cuda.FloatTensor')

In [2]:
from ml_lib.Controller import Controller as Control
#from ml_lib.clusters.root_cluster.RootCluster import RootCluster as Root
from ml_lib.clusters.data_cluster.DataCluster import DataCluster as Data
from ml_lib.clusters.learn_cluster.LearnCluster import LearnCluster as Learn

In [3]:
dataset = pd.read_csv(
    'data_files/kc_house_data.csv',
    index_col = 'id', parse_dates = ['date'], date_parser = lambda x: pd.datetime.strptime(x, '%Y%m%dT%H%M%S')
)
dataset = dataset[[col for col in dataset.columns if not col.endswith('15')]]
feature_cols = ['sqft_living', 'bedrooms', 'bathrooms']
target_cols = ['price']

In [4]:
verbose = True

control = Control('regression_controller')

control.add_cluster(
    Data(
        'data_cluster',
        dataset[feature_cols + target_cols],
        splitter_kwargs = {'verbose': verbose},
        batcher_kwargs = {'verbose': verbose, 'proportion': 0.1},
        loss_kwargs = {'verbose': verbose},
        loss_combiner_kwargs = {'verbose': verbose},
        verbose = verbose
    )
)

control.link_add(
    Learn(
        'learn_cluster',
        module_kwargs = {
            'nodes': len(target_cols),
            'verbose': verbose,
            'bias_init_kwargs': {'verbose': verbose},
            'weight_init_kwargs': {'verbose': verbose},
            'combiner_kwargs': {'verbose': verbose},
            'activator_kwargs': {'verbose': verbose},
            'learner_kwargs': {'verbose': verbose}
        },
        verbose = verbose
    ),
    'data_cluster', 'input', data_cols = feature_cols
)

control.link_clusters('learn_cluster', 'data_cluster', data_cols = target_cols)

2018-12-18 21:56:00.030369 | N/A:data_cluster (DataCluster) - Data frame added, overwrite False.
2018-12-18 21:56:00.031041 | N/A:data_cluster:splitter (BaseSplit) - Splits generated: train 17290 obs, holdout 4323 obs. 21613 total obs
2018-12-18 21:56:00.031573 | regression_controller:data_cluster (DataCluster) - Cluster learn_cluster added to output links.
2018-12-18 21:56:00.031628 | regression_controller:learn_cluster (LearnCluster) - Cluster data_cluster added to input links.
2018-12-18 21:56:00.031738 | regression_controller:learn_cluster (LearnCluster) - Cluster data_cluster added to output links.
2018-12-18 21:56:00.031793 | regression_controller:data_cluster (DataCluster) - Cluster learn_cluster added to input links.


In [5]:
control.enable_network()
control.build_batch_splits()

2018-12-18 21:56:00.038345 | regression_controller:data_cluster (DataCluster) - Cluster enabled.
2018-12-18 21:56:00.038404 | regression_controller:learn_cluster (LearnCluster) - Cluster enabled.
2018-12-18 21:56:00.038705 | N/A:learn_cluster:dense_module:initialiser (FlatInit) - Tensor shape (1, 1) created with flat value 0.
2018-12-18 21:56:00.040028 | N/A:learn_cluster:dense_module:initialiser (NormalInit) - Tensor shape (3, 1) created with normal mean 0 & stdev 1.
2018-12-18 21:56:00.041930 | N/A:learn_cluster:module (DenseModule) - Generated coefficient tensor of shape (4, 1)
2018-12-18 21:56:00.042495 | N/A:data_cluster:batcher (FlatBatch) - Batch generated: train 1729 obs, holdout 432 obs.


In [6]:
control.clusters['data_cluster'].loss

2018-12-18 21:56:00.049136 | regression_controller:data_cluster (DataCluster) - Output tensor shape (2161, 3) provided.
2018-12-18 21:56:00.263947 | N/A:learn_cluster:dense_module:combiner (SimpleCombine) - Combined to produce (2161, 1) shape tensor
2018-12-18 21:56:00.264076 | N/A:learn_cluster:dense_module:activator (LinearActivate) - Linear Activation on (2161, 1) shape tensor


RuntimeError: index 17280 is out of bounds for dimension 0 with size 2161