# Морфологічний аналіз

In [1]:
%matplotlib inline
import numpy as np
import pandas as pd
import seaborn as sns
sns.set_style('whitegrid')
# Jupyter rendering stuff
from IPython.display import display, Math, Latex, HTML, clear_output

In [2]:
import itertools

## Input data generation

In [3]:
def generate_probabilities(*sizes):
    result = [np.random.random(size) for size in sizes]
    for vec in result:
        vec /= np.sum(vec)
    return result

In [4]:
def generate_cross_relation(size1, size2):
    return np.random.random(size=(size1, size2)) * 2 - 1

In [5]:
outer_f_s = (4,2,4,3,1,2,3)
outer_f_p = generate_probabilities(*outer_f_s)
inner_f_s = (5,4)
inner_f_p = generate_probabilities(*inner_f_s)

In [6]:
outer_cross_rel = {(i, j):generate_cross_relation(s1, s2) for (i,s1), (j,s2) in itertools.combinations(enumerate(outer_f_s), 2)}
inner_cross_rel = {(i, j):generate_cross_relation(s1, s2) for (i,s1), (j,s2) in itertools.combinations(enumerate(inner_f_s), 2)}
out_in_cross_rel = {(i, j):generate_cross_relation(s1, s2) for (i,s1), (j,s2) in itertools.product(enumerate(outer_f_s), enumerate(inner_f_s))}

In [7]:
#presenation example
# outer_f_s = (3, 4, 2)
# outer_f_p = [np.array([0.3, 0.5, 0.2]), np.array([0.4, 0.3, 0.1, 0.2]), np.array([0.3, 0.7])]
# outer_cross_rel = {(i, j):np.zeros((s1, s2)) for (i,s1), (j,s2) in itertools.combinations(enumerate(outer_f_s), 2)}
# outer_cross_rel[(0,1)][0,0] = 0.5
# outer_cross_rel[(0,1)][2,1] = -0.5
# outer_cross_rel[(0,2)][0,0] = 0.2
# outer_cross_rel[(0,2)][1,0] = 0.3
# outer_cross_rel[(1,2)][0,0] = 0.5
# outer_cross_rel[(1,2)][2,1] = -1

## Calculating table for outer factors

In [8]:
df_outer = pd.DataFrame(index=pd.MultiIndex.from_product(tuple(range(size) for size in outer_f_s),
                                                         names=[str(i + 1) for i in range(len(outer_f_s))]))

In [9]:
def calculate_correlation(df, cross_rel):
    factors = (label for label in df.index.labels)
    result = 1
    for (f1,a1), (f2, a2) in itertools.combinations(enumerate(factors), 2):
        result *= cross_rel[f1, f2][a1, a2] + 1
    df['C'] = result

In [10]:
def calculate_cond_probs(df, factor_p):
    factors = (label for label in df.index.labels)
    probs = np.array([factor_p[i][factor] for i, factor in enumerate(factors)])
    probs = np.prod(probs, axis = 0) / probs
    for i in range(len(probs)):
        df_outer['P%d_' % (i + 1)] = probs[i] * df['C']

In [11]:
def normalize_cond_probs(df, factor_s):
    sn = [slice(None)] * len(factor_s)
    for i in range(len(factor_s)):
        sn_cur = sn.copy()
        df['P%d' % (i + 1)] = np.zeros(df.shape[0])
        for j in range(outer_f_s[i]):
            sn_cur[i] = j
            df['P%d' % (i + 1)].loc[tuple(sn_cur)] = \
            df['P%d_' % (i + 1)].loc[tuple(sn_cur)].as_matrix() / df['P%d_' % (i + 1)].loc[tuple(sn_cur)].sum()

In [12]:
def generate_prob_matrices(df, factor_s):
    sn = [slice(None)] * len(factor_s)
    p_matrices = []
    for left in range(len(factor_s)):
        right = (left + 1) % len(factor_s)
        p = np.zeros((factor_s[left], factor_s[right]))
        for i, j in itertools.product(range(factor_s[left]), range(factor_s[right])):
            sn_cur = sn.copy()
            sn_cur[left], sn_cur[right] = i, j
            p[i,j] = df.loc[tuple(sn_cur),:]['P%d' % (right + 1)].sum()
        p_matrices.append(p)
    return p_matrices

In [13]:
def create_matrix_set(df, factor_s, factor_p, cross_rel):
    calculate_correlation(df, cross_rel)
    calculate_cond_probs(df, factor_p)
    normalize_cond_probs(df, factor_s)
    return generate_prob_matrices(df, factor_s)

In [14]:
def generate_cycle_product(matrices):
    result = [mat.copy() for mat in matrices]
    cycle_iter = itertools.cycle(matrices)
    for i in range(len(matrices) - 1):
        next(cycle_iter)
        for i in range(len(matrices)):
            result[i] = result[i] @ next(cycle_iter)
    return result

In [15]:
def find_eigenvec_near_one(matrices):
    return []

In [16]:
outer_matrices = create_matrix_set(df_outer, outer_f_s, outer_f_p, outer_cross_rel)
outer_matrices_cycled = generate_cycle_product(outer_matrices)
outer_real_probs = find_eigenvec_near_one(outer_matrices_cycled)

In [17]:
[np.linalg.eig(outer_matrix) for outer_matrix in outer_matrices_cycled]

[(array([  1.11022302e-16,   1.00000000e+00,  -4.91283771e-17,
           4.10976243e-17]),
  array([[-0.8351615 , -0.06487572,  0.01313498,  0.00387257],
         [ 0.15086533, -0.27372036,  0.44447465,  0.48472132],
         [ 0.49316845, -0.89477315,  0.36177391, -0.81248188],
         [ 0.19112771, -0.34676984, -0.81938354,  0.32388799]])),
 (array([  1.00000000e+00,  -1.38777878e-17]),
  array([[ 0.99622879, -0.70710678],
         [ 0.08676523,  0.70710678]])),
 (array([  1.00000000e+00,   2.77555756e-17,   1.21824897e-17,
          -3.58542388e-18]),
  array([[ 0.98952467, -0.82064189, -0.454816  , -0.47344045],
         [ 0.05615796,  0.22229289, -0.42220971,  0.57272484],
         [ 0.01962366,  0.07767732,  0.09917944, -0.52023315],
         [ 0.13153754,  0.52067168,  0.77784626,  0.42094876]])),
 (array([  1.00000000e+00,  -1.09323606e-16,   1.58875511e-17]),
  array([[-0.10455724, -0.09507559, -0.61994387],
         [-0.83608982,  0.74983437, -0.15019347],
         [-0.5385