In [1]:
import cgpm.utils.general as general
import json
import itertools
import pandas as pd
import yaml
import numpy as np
import copy
from cgpm.crosscat.state import State

In [2]:
import matplotlib.pyplot as plt
import scipy

In [3]:
with open('data/cgpm/complete/sample.0.json') as unrefined_metadata_file:
    unrefined_metadata = json.load(unrefined_metadata_file)

with open('data/cgpm/refined/sample.0.json') as refined_metadata_file:
    refined_metadata = json.load(refined_metadata_file)

with open('data/numericalized.csv') as data_file:
    df = pd.read_csv(data_file)

In [4]:
columns_transition = [i for i in range(df.shape[1])]

In [5]:
none_to_nan = lambda x: float('nan') if x is None else x
list_fmap = lambda f: lambda xs: list(map(f, xs))

In [6]:
def refine_crp_hyper_grids(state, n=30):
    final_hyper = state.crp.hypers['alpha']
    new_grid = np.linspace(0.80 * final_hyper, 1.2 * final_hyper, n)
    state.crp.hyper_grids['alpha'] = new_grid

def refine_view_hyper_grids(state, cols, n=30):
    for idx, view in state.views.items():
        final_hyper = view.crp.hypers['alpha']
        new_grid = np.linspace(0.80 * final_hyper, 1.2 * final_hyper, n)
        state.views[idx].crp.hyper_grids['alpha'] = new_grid

def refine_dim_hyper_grids(state, cols, n=30):
    for col in cols:
        final_hypers = state.dim_for(col).hypers
        for name, val in final_hypers.items():
            new_grid = np.linspace(0.80 * val, 1.2 * val, n) 
            state.dim_for(col).hyper_grids[name] = new_grid

In [7]:
def have_same_hyper_grids(x, y):
    assert x.hyper_grids.keys() == y.hyper_grids.keys()
    return all(all(x.hyper_grids[param] == y.hyper_grids[param]) for param in x.hyper_grids.keys())

In [8]:
def have_same_crp_hyper_grids(s1, s2):
    return have_same_hyper_grids(s1.crp, s2.crp)

def have_same_view_hyper_grids(s1, s2):
    assert s1.views.keys() == s2.views.keys()
    return all(have_same_crp_hyper_grids(s1.views[i], s2.views[i]) for i in s1.views.keys())

def have_same_dim_hyper_grids(s1, s2, cols):
    return all(have_same_hyper_grids(s1.dim_for(col), s2.dim_for(col)) for col in cols)

In [9]:
def have_same_overall_hyper_grids(s1, s2, cols):
    return all([have_same_crp_hyper_grids(s1, s2),
                have_same_view_hyper_grids(s1, s2),
                have_same_dim_hyper_grids(s1, s2, cols)])

In [10]:
unrefined_metadata['X'] = list_fmap(list_fmap(none_to_nan))(unrefined_metadata['X'])
refined_metadata['X'] = list_fmap(list_fmap(none_to_nan))(refined_metadata['X'])

In [11]:
unrefined_model = State.from_metadata(unrefined_metadata)
refined_model = State.from_metadata(refined_metadata)

In [12]:
have_same_overall_hyper_grids(unrefined_model, refined_model, columns_transition)

True

The above cell is `True` because when CGPM creates a state from a metadata file it sets the hyper-parameter grids to pre-defined default values. The following cells are there so we can check that after we refine these grids, the parameter update moves do not change them back to their default values.

In [13]:
# unrefined_model.crp.hyper_grids

In [14]:
# refine_crp_hyper_grids(unrefined_model)
# unrefined_model.crp.hyper_grids

In [15]:
# refined_model.crp.hypers, unrefined_model.crp.hypers

In [16]:
# refined_model.views[2].crp.hypers, unrefined_model.views[2].crp.hypers

In [17]:
# unrefined_model.dim_for(0).hyper_grids

In [18]:
# [(refined_model.dim_for(c).hypers, unrefined_model.dim_for(c).hypers) for c in columns_transition]

In [19]:
refine_crp_hyper_grids(refined_model)   
refine_view_hyper_grids(refined_model, columns_transition)
refine_dim_hyper_grids(refined_model, columns_transition)

In [20]:
have_same_overall_hyper_grids(unrefined_model, refined_model, columns_transition)

False

In [21]:
refined_model.transition_crp_alpha()

In [22]:
have_same_crp_hyper_grids(refined_model, unrefined_model)

False

In [23]:
have_same_overall_hyper_grids(unrefined_model, refined_model, columns_transition)

False

In [24]:
# refined_model.crp.hyper_grids['alpha'], unrefined_model.crp.hyper_grids['alpha']

In [25]:
refined_model.transition_view_alphas(views=None, cols=columns_transition)

In [26]:
have_same_view_hyper_grids(unrefined_model, refined_model)

False

In [27]:
have_same_overall_hyper_grids(unrefined_model, refined_model, columns_transition)

False

In [28]:
refined_model.transition_dim_hypers(cols=columns_transition)

In [29]:
have_same_dim_hyper_grids(unrefined_model, refined_model, columns_transition)

False

In [30]:
have_same_overall_hyper_grids(unrefined_model, refined_model, columns_transition)

False

In [31]:
refined_model.transition_view_rows(views=None, cols=columns_transition, rows=None)

In [32]:
have_same_overall_hyper_grids(unrefined_model, refined_model, columns_transition)

False

In [33]:
refined_model.transition_dims(cols=columns_transition)

In [34]:
have_same_dim_hyper_grids(unrefined_model, refined_model, columns_transition)

False

In [35]:
have_same_overall_hyper_grids(unrefined_model, refined_model, columns_transition)

False