In [None]:
import pandas as pd
import numpy as np

# MATRIZ DE TRANSICIÓN DE ESTADOS PARA EL ENGAGEMENT

df = pd.read_csv("/Users/joshchaidez/Desktop/Globant_Motivation_Prediction/data/data_globant.csv")

# Discretizar el engagement
bins = np.arange(0, 5.25, 1)
labels = np.arange(1, 5.25, 1)

df["Engagement_bin"] = pd.cut(df["Engagement"], bins=bins, labels=labels, include_lowest=True)

df = df.sort_values(by=["Name", "Month", "Day"])

transition_counts = pd.DataFrame(
    0,
    index=labels,
    columns=labels,
    dtype=int
)

for name, group in df.groupby("Name"):
    states = group["Engagement_bin"].dropna().astype(float).values
    
    for (s1, s2) in zip(states[:-1], states[1:]):
        transition_counts.loc[s1, s2] += 1

transition_prob = transition_counts.div(transition_counts.sum(axis=1).replace(0, np.nan), axis=0)

# Eliminar valores menores a un treshold en la matriz de conteos
threshold2 = 0
transition_counts = transition_counts.where(transition_counts >= threshold2, 0)

# Eliminar probabilidades menores a un umbral en la matriz de probabilidades y normalizar
threshold = 0.05
transition_prob = transition_prob.where(transition_prob >= threshold, 0)

# Normalizar nuevamente
transition_prob = transition_prob.div(transition_prob.sum(axis=1).replace(0, np.nan), axis=0)

transition_counts, transition_prob

(     1.0  2.0   3.0   4.0   5.0
 1.0  262   33   200   131    28
 2.0   34  314   125    19     5
 3.0  190  137  2949   927    30
 4.0  146   15   951  2999   340
 5.0   32    5    23   359  1018,
           1.0       2.0       3.0       4.0       5.0
 1.0  0.418530  0.052716  0.319489  0.209265  0.000000
 2.0  0.071882  0.663848  0.264271  0.000000  0.000000
 3.0  0.000000  0.000000  0.760836  0.239164  0.000000
 4.0  0.000000  0.000000  0.221678  0.699068  0.079254
 5.0  0.000000  0.000000  0.000000  0.260712  0.739288)

In [4]:
# Revisar si es diagonalizable si o no 
eigenvalues, eigenvectors = np.linalg.eig(transition_prob.fillna(0).values)
is_diagonalizable = np.linalg.matrix_rank(eigenvectors) == transition_prob.shape[0]
is_diagonalizable

np.True_

In [16]:
# Descomposición espectral y multiplicacion a la n potencia
n = 1  # número de pasos
D = np.diag(eigenvalues)
D_n = np.linalg.matrix_power(D, n)
P_inv = np.linalg.inv(eigenvectors)
transition_prob_n = eigenvectors @ D_n @ P_inv
transition_prob_n = pd.DataFrame(transition_prob_n, index=transition_prob.index, columns=transition_prob.columns)
# Convertir a probabilidades mayores a 0.001
transition_prob_n[transition_prob_n < 0.001] = 0
# Redondear a 5 decimales
transition_prob_n = transition_prob_n.round(3)
# Multiplicar por 100 para obtener porcentajes
transition_prob_n = transition_prob_n * 100

transition_prob_n

Unnamed: 0,1.0,2.0,3.0,4.0,5.0
1.0,41.9,5.3,31.9,20.9,0.0
2.0,7.2,66.4,26.4,0.0,0.0
3.0,0.0,0.0,76.1,23.9,0.0
4.0,0.0,0.0,22.2,69.9,7.9
5.0,0.0,0.0,0.0,26.1,73.9


In [17]:
# Calcular distribución estacionaria
eigvals, eigvecs = np.linalg.eig(transition_prob.fillna(0).values.T)
stationary_dist = np.real(eigvecs[:, np.isclose(eigvals, 1)])
stationary_dist = stationary_dist / stationary_dist.sum()
stationary_dist = pd.Series(stationary_dist.flatten(), index=transition_prob.index)

# Convertir a probabilidades mayores a 0.001
stationary_dist[stationary_dist < 0.001] = 0
# Redondear a 5 decimales
stationary_dist = stationary_dist.round(5)
# Multiplicar por 100 para obtener porcentajes
stationary_dist = stationary_dist * 100

stationary_dist

1.0     0.000
2.0     0.000
3.0    41.548
4.0    44.825
5.0    13.627
dtype: float64