In [13]:
%load_ext autoreload

%autoreload 2

import numpy as np
%matplotlib widget
import matplotlib.pyplot as plt
import matplotlib.tri as mtri
from math import sqrt, log
import scipy as sc
from scipy.stats import chi2

from ruspy.data.data_reading import data_reading
from ruspy.data.data_processing import data_processing

from ruspy.estimation.estimation import estimate
from ruspy.estimation.estimation_transitions import estimate_transitions

from ruspy.simulation.robust_sim import draw_trans_probs_mulitvar
from ruspy.simulation.robust_sim import calc_cov_multinomial

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [2]:
data_reading()

In [3]:
#Initilize a dictionary with values needed on the way.
np.random.seed(123)
beta = 0.9999
init_dict = {
    'groups': 'group_4',
    'beta': beta,
    'maint_func': 'linear',
    'binsize': 5000,
    'states': 90,
}

In [4]:
repl_data = data_processing(init_dict)

In [6]:
trans_results, cost_results = estimate(init_dict, repl_data)
print(trans_results, cost_results)

{'x': array([0.39189189, 0.59529357, 0.01281454]), 'fun': 3140.5705570938244, 'trans_count': [1682, 2555, 55]}       fun: 163.585839925182
 hess_inv: <2x2 LbfgsInvHessProduct with dtype=float64>
      jac: array([-9.94759830e-05,  3.41060513e-05])
  message: b'CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH'
     nfev: 42
      nit: 12
   status: 0
  success: True
        x: array([10.07770591,  2.29413601])


In [7]:
p_ml = trans_results['x']

In [8]:
roh = chi2.ppf(0.95, len(trans_results['x']) - 1) / (2 * 4292)
roh

0.0006979804924403517

In [21]:
size = 1000
count = 0
monte = 1000
n = 4292
for i in range(monte):
    probs = draw_trans_probs_mulitvar(n, p_ml, size=size)
    for p in probs:
        if roh - Kullback_Leibler(p, p_ml) >= 0:
            count+= 1
count/(size * monte)

0.949335

In [22]:
cov = calc_cov_multinomial(n, p_ml)
np.sqrt(cov[0, 0])

0.0074514995482480514

In [8]:
# Specify simplex
l_simp = sqrt(4 / 3)

In [9]:
def get_x(p, l_simp):
    return l_simp - p[0] * sqrt( 1 + l_simp ** 2 / 4) - 0.5 * l_simp * p[1]

In [10]:
def create_set(p_ml, roh, l_simp, step, min_grid_p_0, min_grid_p_1, max_grid_p_0, max_grid_p_1, set_method='Kullback'):
    if set_method == 'Kullback':
        set_cond = Kullback_Leibler
    else:
        raise ValueError('Set measure not supported.')
    set_coordinates_x = []
    set_coordinates_y = []
    set_coordinates = np.empty(shape=(1, 2))
    set_probs = np.empty(shape=(1, len(p_ml)))
    for p_0 in np.arange(min_grid_p_0, max_grid_p_0 , step):
        for p_1 in np.arange(min_grid_p_1, max_grid_p_1, step):
            p = np.array([p_0, p_1, 1 - p_0 - p_1])
            if p[(p > 0) & (p < 1)].size == p.size: 
                if roh - Kullback_Leibler(p, p_ml) >= 0:
                    x = get_x(p, l_simp)
                    set_coordinates_x += [x]
                    set_coordinates_y += [p_1]
                    set_coordinates = np.append(set_coordinates, [[x, p_1]], axis=0)
                    set_probs = np.append(set_probs, p.reshape(1, len(p_ml)), axis=0)
    return set_coordinates_x, set_coordinates_y, set_coordinates[1:], set_probs[1:]

In [10]:
def Kullback_Leibler(p, p_ml):
    val = 0
    for i, q in enumerate(p):
        val += q * log(q / p_ml[i])
    return val

In [12]:
draws_y = []
draws_x = []
for p in probs:
    draws_y += [p[1]]
    draws_x += [get_x(p, l_simp)]

In [13]:
min_grid_p_0 = min_grid_p_1 = 0
max_grid_p_0 = max_grid_p_1 = 1
step = 0.001
x_set_pre, y_set_pre, set_coord_pre, set_probs_pre = create_set(
    p_ml, roh, l_simp, step, min_grid_p_0, min_grid_p_1, max_grid_p_0, max_grid_p_1, set_method='Kullback')

In [14]:
min_grid_p_0 = set_probs_pre[:, 0].min() - step
max_grid_p_0 = set_probs_pre[:, 0].max() + step
min_grid_p_1 = set_probs_pre[:, 1].min() - step
max_grid_p_1 = set_probs_pre[:, 1].max() + step

In [15]:
step = 0.0001
x_set, y_set, set_coord, set_probs = create_set(
    p_ml, roh, l_simp, step, min_grid_p_0, min_grid_p_1, max_grid_p_0, max_grid_p_1, set_method='Kullback')

In [16]:
hull = sc.spatial.ConvexHull(set_coord)

In [17]:
# Create triangulation.range(
x = np.array([0, 0.5 * l_simp, l_simp])
y = np.array([0, 1, 0])
triang = mtri.Triangulation(x, y)

# Set up thered figure
fig = plt.figure(figsize=(10, 5))
ax = fig.add_subplot(111)
ax.text(-0.05, -0.03, "Increase by 0")
ax.text(l_simp - 0.06, -0.03, "Increase by 2")
ax.text(0.5 * l_simp - 0.05, 1.02, "Increase by 1")
# ax.plot(x_set, y_set, color='grey')
for simplex in hull.simplices:
    plt.plot(set_coord[simplex, 0], set_coord[simplex, 1], 'k-')
ax.plot([get_x(p_ml, l_simp)], [p_ml[1]], marker='o', markersize=2, color="red")
ax.get_xaxis().set_visible(False)
ax.get_yaxis().set_visible(False)

ax.scatter(draws_x, draws_y, s=10, marker='o', color="green")
ax.triplot(triang)

FigureCanvasNbAgg()

[<matplotlib.lines.Line2D at 0x7f31ee4734e0>,
 <matplotlib.lines.Line2D at 0x7f31ee1ea3c8>]

In [18]:
# Create triangulation.range(
x = np.array([0, 0.5 * l_simp, l_simp])
y = np.array([0, 1, 0])
triang = mtri.Triangulation(x, y)

# Set up the figure
fig = plt.figure(figsize=(10, 5))
ax = fig.add_subplot(111)
ax.text(-0.05, -0.03, "Increase by 0")
ax.text(l_simp - 0.06, -0.03, "Increase by 2")
ax.text(0.5 * l_simp - 0.05, 1.02, "Increase by 1")
ax.plot([get_x(p_ml, l_simp)], [p_ml[1]], marker='o', markersize=1, color="red")
ax.get_xaxis().set_visible(False)
ax.get_yaxis().set_visible(False)

# Plot the triangulation.
#axs.tricontourf(triang, z)

# ax.set_title('Probability simplex')
# Plot heatmap
# ax2 = ax.twinx()
ax.hist2d(np.array(draws_x), np.array(draws_y), bins=2000, range=[[0, l_simp], [0, 1]])
ax.triplot(triang)

FigureCanvasNbAgg()

[<matplotlib.lines.Line2D at 0x7f31ee25d390>,
 <matplotlib.lines.Line2D at 0x7f31ee0c7ac8>]