In [None]:
from activity_tracker.pipeline import subject, visit, measurement

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import warnings
warnings.filterwarnings("ignore")

pd.set_option('display.max_columns', 200)
pd.set_option('display.max_rows', 200)

df_subject = subject.Subject.fetch(format="frame").reset_index()
df_visit = visit.Visit.fetch(format="frame").reset_index()
df_measurement = measurement.DailyMeasurement.fetch(format="frame").reset_index()
df_frailty = measurement.Frailty.fetch(format="frame").reset_index()

In [None]:
# Count visits per subject
visits_per_subject = df_visit.groupby("subject_id").count()["visit_id"]

# Get histograms
plt.figure(figsize=(4, 3))
counts, bins = np.histogram(visits_per_subject, bins=range(1, 7))
plt.hist(visits_per_subject, bins=range(1, 7), edgecolor="black", alpha=1)
plt.xlabel("# of Visits")
plt.ylabel("# of Subjects")

# Add count labels
for i, count in enumerate(counts):
    if count > 0:  
        plt.text(bins[i] + 0.5, count + 1, str(int(count)), ha="center", va="bottom")
plt.xticks([1.5, 2.5, 3.5, 4.5, 5.5], ["1", "2", "3", "4", "5"])
plt.ylim(0, 70)
plt.grid(False)
sns.despine()
plt.show()

In [None]:
# Get the first visit data for each subject
first_visit_data = df_frailty.groupby('subject_id').first().reset_index()

# Count frail vs non-frail at first visit
frail_at_start = (first_visit_data['ffp_status_binary'] == 'frail').sum()
non_frail_at_start = (first_visit_data['ffp_status_binary'] == 'no_frail').sum()
total_subjects = len(first_visit_data)

print(f"Total patients: {total_subjects}")
print(f"Patients who started as FRAIL: {frail_at_start} ({frail_at_start/total_subjects*100:.1f}%)")
print(f"Patients who started as NON-FRAIL: {non_frail_at_start} ({non_frail_at_start/total_subjects*100:.1f}%)")

In [None]:
# Transition probability analysis
# Note: Limited to 1-5 visits per subject, which may affect transition estimates
# Assuming df_frailty is already loaded and has 'subject_id', 'visit_id', 'ffp_status_binary'
df = df_frailty[['subject_id', 'visit_id', 'ffp_status_binary']].dropna(subset=['ffp_status_binary'])
df = df.sort_values(['subject_id', 'visit_id'])

transitions = {
    ('frail', 'frail'): 0,
    ('frail', 'no_frail'): 0,
    ('no_frail', 'frail'): 0,
    ('no_frail', 'no_frail'): 0
}

for _, group in df.groupby('subject_id'):
    states = group['ffp_status_binary'].tolist()
    for i in range(len(states) - 1):
        if pd.notna(states[i]) and pd.notna(states[i+1]):
            transitions[(states[i], states[i+1])] += 1

transition_matrix = pd.DataFrame(
    [[transitions[('frail', 'frail')], transitions[('frail', 'no_frail')]],
     [transitions[('no_frail', 'frail')], transitions[('no_frail', 'no_frail')]]],
    index=['frail', 'no_frail'],
    columns=['frail', 'no_frail']
)
print("\n\nTransition matrix\n")
print(transition_matrix)
print("\n\n\nProbability matrix\n")
prob_matrix = transition_matrix.div(transition_matrix.sum(axis=1), axis=0)
print(prob_matrix)

eigvals, eigvecs = np.linalg.eig(prob_matrix.T)
stationary = np.real(eigvecs[:, np.isclose(eigvals, 1)])
stationary = stationary / stationary.sum()
print("\n\nStationary distribution:", stationary.flatten())

In [None]:
import matplotlib.pyplot as plt
from matplotlib.patches import Ellipse, FancyArrowPatch

# Transition probabilities
probs = {
    ('F', 'F'): 0.630,
    ('F', 'N'): 0.370,
    ('N', 'F'): 0.158,
    ('N', 'N'): 0.842
}

# Node positions
node_pos = {'F': (0, 0), 'N': (3, 0)}

fig, ax = plt.subplots(figsize=(9, 5))
ax.set_xlim(-2, 5)
ax.set_ylim(-2, 2)
ax.axis('off')

ellipse_F = Ellipse(xy=node_pos['F'], width=1.2, height=1.2,
                    edgecolor='orange', facecolor='#ffe5cc', lw=3)
ellipse_N = Ellipse(xy=node_pos['N'], width=1.2, height=1.2,
                    edgecolor='deepskyblue', facecolor='#e0f7ff', lw=3)
ax.add_patch(ellipse_F)
ax.add_patch(ellipse_N)
ax.text(*node_pos['F'], 'F', fontsize=28, ha='center', va='center')
ax.text(*node_pos['N'], 'N', fontsize=28, ha='center', va='center')

# Arrow style
arrowprops = dict(arrowstyle='-|>', color='black', lw=2, mutation_scale=20)

# Self-loop F
arrow = FancyArrowPatch((node_pos['F'][0] - 0.2, node_pos['F'][1] + 0.6),
                        (node_pos['F'][0] - 0.6, node_pos['F'][1] + 0.2),
                        connectionstyle="arc3,rad=0.8", **arrowprops)
ax.add_patch(arrow)
ax.text(node_pos['F'][0] - 0.9, node_pos['F'][1] + 0.85,
        f"{probs[('F','F')]:.2f}", fontsize=18)

# Self-loop N
arrow = FancyArrowPatch((node_pos['N'][0] + 0.2, node_pos['N'][1] + 0.6),
                        (node_pos['N'][0] + 0.6, node_pos['N'][1] + 0.2),
                        connectionstyle="arc3,rad=-0.7", **arrowprops)
ax.add_patch(arrow)
ax.text(node_pos['N'][0] + 0.4, node_pos['N'][1] + 0.85,
        f"{probs[('N','N')]:.2f}", fontsize=18)

# F → N
arrow = FancyArrowPatch((node_pos['F'][0] + 0.75, node_pos['F'][1] + 0.15),
                        (node_pos['N'][0] - 0.75, node_pos['N'][1] + 0.15),
                        connectionstyle="arc3,rad=0.0", **arrowprops)
ax.add_patch(arrow)
ax.text(1.5, 0.4, f"{probs[('F','N')]:.2f}", fontsize=18, ha='center')

# N → F
arrow = FancyArrowPatch((node_pos['N'][0] - 0.75, node_pos['N'][1] - 0.15),
                        (node_pos['F'][0] + 0.75, node_pos['F'][1] - 0.15),
                        connectionstyle="arc3,rad=0.0", **arrowprops)
ax.add_patch(arrow)
ax.text(1.5, -0.5, f"{probs[('N','F')]:.2f}", fontsize=18, ha='center')

plt.tight_layout()
plt.show()