In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
sys.path.insert(0, '..')
import numpy as np
import matplotlib.pyplot as plt
from scipy.io import loadmat

from instance_neg_log_likelyhood import *
import helper
import solution


# Part 1
## Stochastic Gradient Descent

In [3]:
X = loadmat("./data/Train1X.mat")['Train1X']
y = loadmat("./data/Train1Y.mat")['Train1Y'].squeeze()

In [4]:
theta = helper.lr_train(X, y, 0)

In [5]:
pred = helper.lr_predict(X, theta)

In [6]:
helper.lr_accuracy(y, pred)

0.96

### Test set

In [7]:
X_test = loadmat("./data/Test1X.mat")['Test1X']
y_test = loadmat("./data/Test1Y.mat")['Test1Y'].squeeze()

In [8]:
pred_test = helper.lr_predict(X_test, theta)
helper.lr_accuracy(y_test, pred_test)

0.9125

### LR Search Lambdas

In [9]:
X_val = loadmat("./data/Validation1X.mat")['Validation1X']
y_val = loadmat("./data/Validation1Y.mat")['Validation1Y'].squeeze()
lambdas = loadmat("./data/Part1Lambdas.mat")['Part1Lambdas'].squeeze()
expected_out = loadmat("./data/ValidationAccuracy.mat")['ValidationAccuracy'].squeeze()

In [10]:
acc = solution.lr_search_lambda_sgd(X, y, X_val, y_val, lambdas)

In [11]:
np.allclose(acc, expected_out)

True

# Part 2

In [12]:
data = loadmat("./data/Part2Sample.mat", simplify_cells=True)

sample_calibrated = [Factor.from_matlab(clique) for clique in data['sampleCalibratedTree']['cliqueList']]
sample_calibrated_tree = {'clique_list': sample_calibrated, 'adj_list': {}}
sample_uncalibrated = [Factor.from_matlab(clique) for clique in data['sampleUncalibratedTree']['cliqueList']]
sample_uncalibrated_tree = {'clique_list': sample_uncalibrated, 'adj_list': {}}
sample_logZ = data['sampleLogZ']

edges = data['sampleUncalibratedTree']['edges']
for i in range(len(edges)):
    sample_uncalibrated_tree['adj_list'][i] = set()
    sample_calibrated_tree['adj_list'][i] = set()
    for j in range(len(edges)):
        if edges[i, j] == 1:
            sample_uncalibrated_tree['adj_list'][i].add(j)
            sample_calibrated_tree['adj_list'][i].add(j)
            
#all_data = loadmat("./data/Part2FullDataset.mat", simplify_cells=True)

## Check LogZ (Clique Tree Calibrate: Implement it in solution.py)

In [13]:
out_calib, out_logZ = solution.clique_tree_calibrate(sample_uncalibrated_tree, True)

In [14]:
out_calib == sample_calibrated, abs(out_logZ - sample_logZ ) < 1e-10

(True, True)

## Instance Neg Log Likelyhood (Implement it in instance_neg_log_likelyhood.py)

In [15]:
sample_x = data['sampleX']-1
sample_y = data['sampleY']-1
sample_theta = data['sampleTheta']
sample_params = data['sampleModelParams']
model_param = {'num_hidden_states': sample_params['numHiddenStates'], 
               'num_observed_states': sample_params['numObservedStates'],
               'lambda': sample_params['lambda']}
model_param

{'num_hidden_states': 26, 'num_observed_states': 2, 'lambda': 0.003}

In [16]:
nll, grad = instance_neg_log_likelyhood(sample_x, sample_y, sample_theta, model_param)

In [17]:
nll, data['sampleNLL'], abs(nll - data['sampleNLL']) < 1e-8

(14.126845092942453, 14.126845092942455, True)

In [18]:
np.allclose(grad, data['sampleGrad'])

True