<h1> Engine Dataset</h1>

In [1]:
from engine_dataset import generate_engine_dataset, generate_engine_datasets

from oSLRAU_run import get_data
from spn.structure.leaves.parametric.Parametric import Gaussian, Bernoulli, Categorical,  In_Latent
from spn.structure.Base import Context
from spn.algorithms.LearningWrappers import learn_parametric
from spn.io.Graphics import plot_spn
import numpy as np
from spn.algorithms.Inference import log_likelihood
from sklearn.model_selection import train_test_split
from spn.algorithms.oSLRAU import oSLRAU, oSLRAUParams
from spn.algorithms.RSPN import RSPN
from spn.algorithms.TransformStructure import Prune ,Prune_oSLRAU

from hmmlearn import hmm
from spn.algorithms.Inference import log_likelihood



In [2]:
a = generate_engine_dataset(num_samples=1000, seq_length=12)

In [3]:
np.array(a.inputs_raw).shape

(1000, 12)

In [38]:
np.array(a.observations_raw_discrete).shape

(1000, 12)

In [41]:
np.array(a.observations_raw_continuous).shape

(1000, 12, 2)

<h1> oSLRAU training funcs</h1>

In [52]:
def train_rspn_mix(train_data, test_data, rspn=None, os_args=None):

    if len(train_data.shape) == 3: train_data = np.squeeze(train_data)
    if len(test_data.shape) == 3: test_data = np.squeeze(test_data)

    if rspn is None:
    
        rspn = RSPN(num_variables=os_args["num_variables"], num_latent_variables=os_args["num_latent_variables"], num_latent_values=os_args["num_latent_values"])
        first_mini_batch = train_data[0:os_args["mini_batch_size"]]
        n = first_mini_batch.shape[1]
        # print(f"n: {n}") # = num_time_steps_mix * n_dim
        
        context = [Gaussian, Gaussian, Categorical]*(int(n/3)) # Bernoulli Categorical
        ds_context = Context(parametric_types=context).add_domains(first_mini_batch[:, :os_args["num_variables"]])
        spn, initial_template_spn, top_spn = rspn.build_initial_template(first_mini_batch, ds_context, os_args["len_sequence_varies"])

    
    no_of_minibatches = int(train_data.shape[0] / os_args["mini_batch_size"])

    print(f"no of minibatches: {no_of_minibatches}")

    
    for i in range(1, no_of_minibatches):
        mini_batch = train_data[i * os_args["mini_batch_size"]: (i+1) * os_args["mini_batch_size"]]
    
        update_template = False
        
        if i % os_args["update_after_no_min_batches"] == 0:
            print(i)
            update_template = True
    
        template_spn = rspn.learn_rspn(mini_batch, update_template, os_args["os_params"], os_args["unroll"],
                                        os_args["full_update"], os_args["update_leaves"],
                                        os_args["len_sequence_varies"])
           
            
    test_ll = np.sum(rspn.log_likelihood(test_data, os_args["unroll"], os_args["len_sequence_varies"]))


    return test_ll, rspn, template_spn

In [54]:
def final_ll_mix(data, num_epochs=1, do_plot_spn=True, os_args=None, split_data=True):
    ll = -np.inf
    rspn=None
    if split_data:
        train_data, val_data= train_test_split(data, test_size=0.1, random_state=42)
    
    else:
        train_data, val_data = data, data
    for i in range(num_epochs):
        print(train_data.shape)
        
        
        epoch_ll, rspn, template_spn = train_rspn_mix(train_data, val_data, rspn, os_args)        
        print(f"epoch_ll: {epoch_ll}")
        
        ll = epoch_ll

    
    print(f" \n\n\n final ll: {ll}\n\n\n")

    if do_plot_spn:
        plot_spn(template_spn, 'rspn_final_template.pdf')
    
    return rspn

<h1> Prepare data for oSLRAU (not including input variable)</h1>

In [46]:
train_ds_raw, test_ds_raw = generate_engine_datasets(seeds=range(42, 43))

In [55]:
def osLRAU_pipeline(ds):
    assert len(ds) == 1
    if len(ds) == 1:
        ds = ds[0]
    
    ds_raw_disc = np.array(ds.observations_raw_discrete)
    ds_raw_cont = np.array(ds.observations_raw_continuous)

    first_dim, second_dim = ds_raw_disc.shape[:2]
    print(f"==>> type(ds_raw_cont[0, 0,:]): {type(ds_raw_cont[0, 0,:])}")
    print(f"==>> type(ds_raw_disc[0, 0]): {type(ds_raw_disc[0, 0])}")
    print(ds_raw_disc[0, 0])
    print(np.append(ds_raw_cont[0, 0,:], float(ds_raw_disc[0, 0])))

    ds_comb = []

    for i in range(first_dim):
        curr_ds_comb = []
        for j in range(second_dim):
            curr_ds_comb.append(np.append(ds_raw_cont[i, j,:], float(ds_raw_disc[i, j])))
            
        
        ds_comb.append(curr_ds_comb)



    ds_comb = np.array(ds_comb)
    ds_comb = ds_comb.reshape(ds_comb.shape[0], -1)

    return ds_comb

In [56]:
train_ds = osLRAU_pipeline(train_ds_raw)
test_ds = osLRAU_pipeline(test_ds_raw)

==>> type(ds_raw_cont[0, 0,:]): <class 'numpy.ndarray'>
==>> type(ds_raw_disc[0, 0]): <class 'numpy.int64'>
1
[74.47765188 76.53556304  1.        ]
==>> type(ds_raw_cont[0, 0,:]): <class 'numpy.ndarray'>
==>> type(ds_raw_disc[0, 0]): <class 'numpy.int64'>
3
[51.01685571 63.50408668  3.        ]


In [57]:
assert len(train_ds.shape) == 2

<h2> Setup oSLRAU </h2>

In [58]:
n_states = 2
n_dim = 3

num_time_steps = 12

oSLRAU_params = oSLRAUParams(mergebatch_threshold=10, corrthresh=0.7, mvmaxscope=1, equalweight=True, currVals=True)

os_args = {
    "num_variables": num_time_steps * n_dim,
    "num_latent_variables" : n_states,
    "num_latent_values" : n_dim,
    "unroll" : "backward",
    "full_update" : False,
    "update_leaves" : True,
    "len_sequence_varies" : False,
    "mini_batch_size" : 5,
    "update_after_no_min_batches" : 5,
    "os_params": oSLRAU_params
    }


<h2> Train rspn and test on test dataset</h2>

In [60]:
rspn_engine = final_ll_mix(train_ds, num_epochs=6, do_plot_spn=False, os_args=os_args, split_data=False)

(1000, 36)
==>> data.shape: (5, 36)
len(self.meta_types): 36
self.meta_types.values(): dict_values([<MetaType.REAL: 1>, <MetaType.REAL: 1>, <MetaType.DISCRETE: 3>, <MetaType.REAL: 1>, <MetaType.REAL: 1>, <MetaType.DISCRETE: 3>, <MetaType.REAL: 1>, <MetaType.REAL: 1>, <MetaType.DISCRETE: 3>, <MetaType.REAL: 1>, <MetaType.REAL: 1>, <MetaType.DISCRETE: 3>, <MetaType.REAL: 1>, <MetaType.REAL: 1>, <MetaType.DISCRETE: 3>, <MetaType.REAL: 1>, <MetaType.REAL: 1>, <MetaType.DISCRETE: 3>, <MetaType.REAL: 1>, <MetaType.REAL: 1>, <MetaType.DISCRETE: 3>, <MetaType.REAL: 1>, <MetaType.REAL: 1>, <MetaType.DISCRETE: 3>, <MetaType.REAL: 1>, <MetaType.REAL: 1>, <MetaType.DISCRETE: 3>, <MetaType.REAL: 1>, <MetaType.REAL: 1>, <MetaType.DISCRETE: 3>, <MetaType.REAL: 1>, <MetaType.REAL: 1>, <MetaType.DISCRETE: 3>, <MetaType.REAL: 1>, <MetaType.REAL: 1>, <MetaType.DISCRETE: 3>])
len(self.meta_types.values()): 36
data[:, col]: [74.47765188 57.535537   54.65603964 50.77333836 54.86426868]
feature_meta_type: Me

  w_children_log_probs[:, i] = lls_per_node[parent_result, c.id] + np.log(node.weights[i])


type of node: <class 'spn.structure.Base.Product'>
isinstance(node, Leaf): False
type of node: <class 'spn.structure.leaves.parametric.Parametric.Categorical'>
isinstance(node, Leaf): True
x: [[1.]
 [2.]
 [0.]
 [0.]
 [0.]]
type of node: <class 'spn.structure.leaves.parametric.Parametric.Gaussian'>
isinstance(node, Leaf): True
type of node: <class 'spn.structure.leaves.parametric.Parametric.Gaussian'>
isinstance(node, Leaf): True
type of node: <class 'spn.structure.leaves.parametric.Parametric.Categorical'>
isinstance(node, Leaf): True
x: [[0.]
 [0.]
 [3.]
 [1.]
 [2.]]
type of node: <class 'spn.structure.leaves.parametric.Parametric.Gaussian'>
isinstance(node, Leaf): True
type of node: <class 'spn.structure.leaves.parametric.Parametric.Gaussian'>
isinstance(node, Leaf): True
type of node: <class 'spn.structure.leaves.parametric.Parametric.Categorical'>
isinstance(node, Leaf): True
x: [[1.]
 [1.]
 [3.]
 [3.]
 [0.]]
type of node: <class 'spn.structure.leaves.parametric.Parametric.Gaussian

In [61]:
rspn_test_ll_engine = np.sum(rspn_engine.log_likelihood(test_ds, os_args["unroll"], os_args["len_sequence_varies"]))
rspn_test_ll_engine

Length of the sequence in mini_batch: 1
==>> data.shape: (1000, 36)
==>> self.num_variables: 36
==>> self.len_sequence: 1
Evaluating rspn bottom up


  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)


-110162.59110700403

<h1> Prepare data for oSLRAU (including input variable) </h1>

In [34]:
train_ds_raw, test_ds_raw = generate_engine_datasets(seeds=range(42, 43))

In [21]:
train_ds_raw[0].inputs_raw

[[1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0],
 [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 [1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0],
 [0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0],
 [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0],
 [1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0],
 [1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1],
 [1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0],
 [0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1],
 [0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0],
 [0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0],
 [0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0],
 [0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0],
 [0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0],
 [1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0],
 [0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0],
 [0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0],
 [0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0],
 [1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0],
 [1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0],
 [0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1],
 [1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1],
 [1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0],
 [0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0],
 [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1],
 [1, 0, 0, 0, 1, 1, 0, 0,

In [35]:
def osLRAU_pipeline(ds):
    assert len(ds) == 1
    if len(ds) == 1:
        ds = ds[0]
    
    ds_raw_disc = np.array(ds.observations_raw_discrete)
    ds_raw_cont = np.array(ds.observations_raw_continuous)
    ds_input_raw = np.array(ds.inputs_raw)

    first_dim, second_dim = ds_raw_disc.shape[:2]
    print(f"==>> type(ds_raw_cont[0, 0,:]): {type(ds_raw_cont[0, 0,:])}")
    print(f"==>> type(ds_raw_disc[0, 0]): {type(ds_raw_disc[0, 0])}")
    print(ds_raw_disc[0, 0])

    ds_comb = []

    for i in range(first_dim):
        curr_seq = []
        for j in range(second_dim):
            curr_ds_comb = [*ds_raw_cont[i, j,:], float(ds_raw_disc[i, j]), float(ds_input_raw[i, j])] 
            curr_seq.append(curr_ds_comb)

        ds_comb.append(curr_seq)

    ds_comb = np.array(ds_comb)

    print(ds_comb.shape)
    ds_comb = ds_comb.reshape(ds_comb.shape[0], -1)

    print(ds_comb.shape)
    return ds_comb

In [36]:
train_ds = osLRAU_pipeline(train_ds_raw)
test_ds = osLRAU_pipeline(test_ds_raw)

==>> type(ds_raw_cont[0, 0,:]): <class 'numpy.ndarray'>
==>> type(ds_raw_disc[0, 0]): <class 'numpy.int64'>
1
(1000, 12, 4)
(1000, 48)
==>> type(ds_raw_cont[0, 0,:]): <class 'numpy.ndarray'>
==>> type(ds_raw_disc[0, 0]): <class 'numpy.int64'>
3
(1000, 12, 4)
(1000, 48)


In [12]:
assert len(train_ds.shape) == 2

In [40]:
n_states = 3
n_dim = 4

num_time_steps = 12

oSLRAU_params = oSLRAUParams(mergebatch_threshold=10, corrthresh=0.7, mvmaxscope=1, equalweight=True, currVals=True)

os_args = {
    "num_variables": num_time_steps * n_dim,
    "num_latent_variables" : n_states,
    "num_latent_values" : n_dim,
    "unroll" : "backward",
    "full_update" : False,
    "update_leaves" : True,
    "len_sequence_varies" : False,
    "mini_batch_size" : 5,
    "update_after_no_min_batches" : 5,
    "os_params": oSLRAU_params
    }


In [41]:

rspn_engine = final_ll_mix(train_ds, num_epochs=3, do_plot_spn=False, os_args=os_args)

(900, 48)
==>> data.shape: (5, 48)
len(self.meta_types): 48
self.meta_types.values(): dict_values([<MetaType.REAL: 1>, <MetaType.REAL: 1>, <MetaType.DISCRETE: 3>, <MetaType.BINARY: 2>, <MetaType.REAL: 1>, <MetaType.REAL: 1>, <MetaType.DISCRETE: 3>, <MetaType.BINARY: 2>, <MetaType.REAL: 1>, <MetaType.REAL: 1>, <MetaType.DISCRETE: 3>, <MetaType.BINARY: 2>, <MetaType.REAL: 1>, <MetaType.REAL: 1>, <MetaType.DISCRETE: 3>, <MetaType.BINARY: 2>, <MetaType.REAL: 1>, <MetaType.REAL: 1>, <MetaType.DISCRETE: 3>, <MetaType.BINARY: 2>, <MetaType.REAL: 1>, <MetaType.REAL: 1>, <MetaType.DISCRETE: 3>, <MetaType.BINARY: 2>, <MetaType.REAL: 1>, <MetaType.REAL: 1>, <MetaType.DISCRETE: 3>, <MetaType.BINARY: 2>, <MetaType.REAL: 1>, <MetaType.REAL: 1>, <MetaType.DISCRETE: 3>, <MetaType.BINARY: 2>, <MetaType.REAL: 1>, <MetaType.REAL: 1>, <MetaType.DISCRETE: 3>, <MetaType.BINARY: 2>, <MetaType.REAL: 1>, <MetaType.REAL: 1>, <MetaType.DISCRETE: 3>, <MetaType.BINARY: 2>, <MetaType.REAL: 1>, <MetaType.REAL: 1>, <

  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)


type of node: <class 'spn.structure.Base.Product'>
isinstance(node, Leaf): False
type of node: <class 'spn.structure.Base.Product'>
isinstance(node, Leaf): False
type of node: <class 'spn.structure.leaves.parametric.Parametric.Bernoulli'>
isinstance(node, Leaf): True
type of node: <class 'spn.structure.leaves.parametric.Parametric.Bernoulli'>
isinstance(node, Leaf): True
type of node: <class 'spn.structure.leaves.parametric.Parametric.Bernoulli'>
isinstance(node, Leaf): True
type of node: <class 'spn.structure.leaves.parametric.Parametric.Gaussian'>
isinstance(node, Leaf): True
type of node: <class 'spn.structure.leaves.parametric.Parametric.Gaussian'>
isinstance(node, Leaf): True
type of node: <class 'spn.structure.leaves.parametric.Parametric.Categorical'>
isinstance(node, Leaf): True
x: [[0.]
 [0.]
 [3.]
 [3.]]
type of node: <class 'spn.structure.leaves.parametric.Parametric.Bernoulli'>
isinstance(node, Leaf): True
type of node: <class 'spn.structure.leaves.parametric.Parametric.Gau

  x = np.asarray((x - loc)/scale, dtype=dtyp)
  x = np.asarray((x - loc)/scale, dtype=dtyp)


AssertionError: 

In [25]:
rspn_test_ll_engine = np.sum(rspn_engine.log_likelihood(test_ds, os_args["unroll"], os_args["len_sequence_varies"]))
rspn_test_ll_engine

NameError: name 'rspn_engine' is not defined