## Prep

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns

pd.options.display.max_rows = None

In [None]:
mesa = pd.read_csv('../mesa/data_processed/Y_BaselineX_raw_full.csv')
income_mapping = {1:1,2:1,3:1,
                 4:2,5:2,6:2,
                 7:3,8:3,9:3,10:3, 11:3,
                 12:4,13:4}
mesa['income'] = mesa['income'].replace(income_mapping)

gender_mapping = {0: 'Female', 1: 'Male'}
mesa['gender'] = mesa['gender'].replace(gender_mapping)
mesa = mesa.rename(columns={'gender':'sex'})

mesa_bla = mesa[mesa['race'] == 3]

In [None]:
jhs_outcome = pd.read_csv('../jhs/data/processed/jhs_cox_base.csv')
jhs_covar = pd.read_csv('../jhs/data/processed/jhs_raw_full.csv')

## Outcome

In [None]:
mesa_outcome = mesa[['cvda','cvdatt']]

mesa_bla_outcome = mesa_bla[['cvda','cvdatt']]

jhs_outcome = jhs_outcome[['event','time']]

In [None]:
plt.hist(jhs_outcome['time'][jhs_outcome['event'] == 1], bins=10, alpha=0.5, label='JHS Event Time')
plt.hist(mesa_outcome['cvdatt'][mesa_outcome['cvda'] == 1], bins=10, alpha=0.5, label='MESA Event Time')
plt.hist(mesa_bla_outcome['cvdatt'][mesa_bla_outcome['cvda'] == 1], bins=10, alpha=0.5, label='MESA_bla Event Time')

# Set axis labels and legend
plt.xlabel('Time')
plt.ylabel('Frequency of Events')
plt.legend()

# Show the plot
plt.title('Distribution of Event Times for JHS and MESA')
plt.show()

In [None]:
sns.kdeplot(jhs_outcome['time'][jhs_outcome['event'] == 1], label='JHS', color='blue')
sns.kdeplot(mesa_outcome['cvdatt'][mesa_outcome['cvda'] == 1], label='MESA', color='red')
sns.kdeplot(mesa_bla_outcome['cvdatt'][mesa_bla_outcome['cvda'] == 1], label='MESA_bla', color='green')

# Set axis labels and legend
plt.xlabel('Value')
plt.ylabel('Density')
plt.legend()

# Show the plot
plt.title('Density Plot of Event Time')
plt.show()


## nb features

In [None]:
sns.kdeplot(jhs_covar['G_bla_rk'], label='JHS', color='blue')
sns.kdeplot(mesa['G_bla_rk'], label='MESA', color='red')
sns.kdeplot(mesa_bla['G_bla_rk'], label='MESA_bla', color='green')

# Set axis labels and legend
plt.xlabel('Value')
plt.ylabel('Density')
plt.legend()

# Show the plot
plt.title('Density Plot of G_bla_rk')
plt.show()


## ind exposures

In [None]:
plt.hist([jhs_covar['PA3cat'], mesa['chphysact'], mesa_bla['chphysact']], bins=[0, 1, 2, 3], alpha=0.5, label=['JHS', 'MESA', 'MESA Bla'])

# Set axis labels and legend
plt.xlabel('Categorization')
plt.ylabel('Frequency')
plt.legend()

tick_positions = [0, 1, 2]
tick_labels = ['poor', 'intermediate', 'ideal']
plt.xticks([pos + 0.5 for pos in tick_positions], tick_labels)


# Show the plot
plt.title('physical Activity')
plt.show()

In [None]:
## income

plt.hist([jhs_covar['fmlyinc'], mesa['income'], mesa_bla['income']], bins=[ 1, 2, 3,4,5], alpha=0.5, label=['JHS', 'MESA', 'MESA Bla'])

# Set axis labels and legend
plt.ylabel('Frequency')
plt.legend()

tick_positions = [ 1, 2,3,4]
tick_labels = ['$0-11,999', '$12,000-24,999', '$25,000-74,999','$75,000+']
plt.xticks([pos + 0.5 for pos in tick_positions], tick_labels)


# Show the plot
plt.title('Family Income')
plt.show()

## Covariates

In [None]:
# continuous covariates

sns.kdeplot(jhs_covar['sbp'], label='JHS', color='blue')
sns.kdeplot(mesa['sbp'], label='MESA', color='red')
sns.kdeplot(mesa_bla['sbp'], label='MESA bla', color='green')

# Set axis labels and legend
plt.xlabel('Value')
plt.ylabel('Density')
plt.legend()

# Show the plot
plt.title('Density Plot of Baseline sbp')
plt.show()


In [None]:
# race

mesa['race'].value_counts()

In [None]:
# gender

plt.hist([jhs_covar['sex'], mesa['sex'], mesa_bla['sex']], bins=[0, 1, 2, 3], alpha=0.5, label=['JHS', 'MESA', 'MESA Bla'])

# Set axis labels and legend
plt.xlabel('Sex')
plt.ylabel('Frequency')
plt.legend()

# Move the x-axis labels towards the right
tick_positions = [0.5, 1.5]
tick_labels = ['female', 'male']
plt.xticks(tick_positions, tick_labels)

# Show the plot
plt.title('Histogram of Sex')
plt.show()

In [None]:
# binary covariates

plt.hist([jhs_covar['Diabetes'], mesa['diabet'], mesa_bla['diabet']], bins=[0, 1, 2, 3], alpha=0.5, 
         label=['JHS', 'MESA', 'MESA Bla'])

# Set axis labels and legend
plt.xlabel('Diabetes status')
plt.ylabel('Frequency')
plt.legend()

# Move the x-axis labels towards the right
tick_positions = [0.5, 1.5]
tick_labels = ['1', '0']
plt.xticks(tick_positions, tick_labels)

# Show the plot
plt.title('Histogram of Diabetes Status')
plt.show()