In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import scipy
from scipy.stats import pearsonr
from scipy.stats import permutation_test

from scripts.preprocess_utils import closest_value,find_centile,find_exact_percentile_return_number

input_annotation_file = 'data/pop_norms.csv'
df = pd.read_csv(input_annotation_file, header=0)
df.loc[df.Age > 30, 'Age'] = 30

df_centile_boys = pd.read_csv('data/percentiles_chart_boys.csv',header=0)
df_centile_girls = pd.read_csv('data/percentiles_chart_girls.csv',header=0)

## Long - 28 

In [None]:
csv_path = 'data/t1_mris/long_analysis/csa_population_28.csv'
df_individual_healthy=pd.read_csv(csv_path, delimiter="," , header=0)
csv_path = '/media/sda/Anna/28andme/ds002674-download/participants.tsv'
df_csa =pd.read_csv("data/percentiles_chart_girls_csa.csv", header=0)
df_individual_healthy_tsv=pd.read_csv(csv_path, delimiter="\t" , header=0)
df_individual_healthy_tsv['Session'] = df_individual_healthy_tsv['session_id'].str.split("-").str[1].astype(int)

In [None]:
cohort_centiles = []
timestamps = []
kkals = []
for i in range(0, len(df_individual_healthy)):
    age = 23
    centile_healthy = find_exact_percentile_return_number(df_individual_healthy['TMT PRED AVG filtered'].iloc[i],
                                                              age, df_centile_girls)
    centile_csa_healthy = find_exact_percentile_return_number(df_individual_healthy['CSA PRED AVG filtered'].iloc[i],
                                                              age, df_csa)
    timestamp = int(df_individual_healthy['ID'].iloc[i].split("_")[1].split("-")[1])
    #print(centile_healthy,find_centile(df_individual_healthy['TMT PRED AVG w line filtered'].iloc[i],
    #                                                          age, df_centile_girls))
        
    kkals.append(df_individual_healthy_tsv[df_individual_healthy_tsv['Session']==timestamp]['total_calorie_intake'])   
    cohort_centiles.append(centile_healthy)
    timestamps.append(timestamp)
    
df_individual_healthy['Centile'] = cohort_centiles
df_individual_healthy['Centile CSA'] =centile_csa_healthy
df_individual_healthy['Session'] = timestamps
df_individual_healthy['Daily cal intake'] = kkals
df_individual_healthy=df_individual_healthy.sort_values(by=['Session'])  


In [None]:
plt.rcParams["figure.figsize"] = [10, 5]
plt.rcParams["figure.autolayout"] = True

speed = np.array(df_individual_healthy['TMT PRED AVG filtered'])
acceleration = np.array(df_individual_healthy['Centile'])
#acceleration2 = np.array(df_individual_healthy['Centile CSA'])

ax1 = plt.subplot()
l1, = ax1.plot(speed, color='red')
ax2 = ax1.twinx()
l2, = ax2.plot(acceleration, color='orange')
#l2, = ax2.plot(acceleration2, color='blue')
ax1.set_ylabel('iTMT [MM]')
ax2.set_ylabel('Centile',rotation=270, labelpad=20)
ax1.set_xlabel('Session, day')
ax1.axvline(x = 30, color = 'black')

ax1.set_ylim([6, 21])
ax2.set_ylim([0, 100])

plt.legend([l1, l2], ["iTMT", "Centile"])

plt.show()

In [None]:
plt.rcParams["figure.figsize"] = [10, 5]
plt.rcParams["figure.autolayout"] = True

speed = np.array(df_individual_healthy['CSA PRED AVG filtered'])
acceleration = np.array(df_individual_healthy['Centile'])
#acceleration2 = np.array(df_individual_healthy['Centile CSA'])

ax1 = plt.subplot()
l1, = ax1.plot(speed, color='red')
ax2 = ax1.twinx()
l2, = ax2.plot(acceleration, color='orange')
#l2, = ax2.plot(acceleration2, color='blue')
ax1.set_ylabel('CSA [MM]')
ax2.set_ylabel('Centile',rotation=270, labelpad=20)
ax1.set_xlabel('Session, day')
ax1.axvline(x = 30, color = 'black')

ax1.set_ylim([100, 1000])
ax2.set_ylim([0, 101])

plt.legend([l1, l2], ["CSA", "Centile"])

plt.show()

# ABCD long

In [None]:
def magnitude_(x_values,y_values):
    alpha = 25
    if (x_values[1]-x_values[0])>=0 and (y_values[1]-y_values[0])>=0:
        if abs(y_values[1]-y_values[0])>=alpha:
            return_val='r'
            transparency=0.2
        else:
            return_val='k'
            transparency=0.01
    elif (y_values[1]-y_values[0])<=0 and (x_values[1]-x_values[0])<=0:
        if abs(y_values[1]-y_values[0])>=alpha:
            return_val='r'
            transparency=0.2
        else:
            return_val='k'
            transparency=0.01
    else:
        if abs(y_values[1]-y_values[0])>=alpha:
            return_val='b'
            transparency=0.2
        else:
            return_val='k'
            transparency=0.01
    return return_val, transparency

In [None]:
create_centile_track = [] 
# Vector origin location
X_age = []
Y_tmt = []
  
# Directional vectors
U_age = []  
V_tmt = []
ids =[]
id_list = df[df['Dataset']=='ABCD']['ID'].str.split("_", expand=True)[0]
for i in range(0, len(id_list)):
    id_pat = id_list.iloc[i]
    if len(df[df['ID'].str.contains(id_pat)])>1:
        gender = df[df['ID'].str.contains(id_pat)].iloc[0]['Gender']
        #print(len(df[df['ID'].str.contains(id_pat)]))
        if gender == 1:
            first_age = df[df['ID'].str.contains(id_pat)].iloc[0]['Age']
            second_age = df[df['ID'].str.contains(id_pat)].iloc[1]['Age']
            first = find_exact_percentile_return_number(df[df['ID'].str.contains(id_pat)].iloc[0]['TMT PRED AVG filtered'], 
                                    first_age, 
                                    df_centile_boys)
            second = find_exact_percentile_return_number(df[df['ID'].str.contains(id_pat)].iloc[1]['TMT PRED AVG filtered'], 
                                    second_age, 
                                    df_centile_boys)
        else:
            first_age = df[df['ID'].str.contains(id_pat)].iloc[0]['Age']
            second_age = df[df['ID'].str.contains(id_pat)].iloc[1]['Age']
            first = find_exact_percentile_return_number(df[df['ID'].str.contains(id_pat)].iloc[0]['TMT PRED AVG filtered'], 
                                    first_age, 
                                    df_centile_girls)
            second = find_exact_percentile_return_number(df[df['ID'].str.contains(id_pat)].iloc[1]['TMT PRED AVG filtered'], 
                                    second_age, 
                                    df_centile_girls)
        ids.append(id_pat)
        X_age.append(first_age)
        Y_tmt.append(first)
        U_age.append(second_age)
        V_tmt.append(second)
    #if i > 2000:
    #    break
    
X_age = np.asarray(X_age)
Y_tmt = np.asarray(Y_tmt)
U_age = np.asarray(U_age)
V_tmt = np.asarray(V_tmt)           


In [None]:
#plt.grid()
sns.set(rc={'figure.figsize':(6.7,4.27),'axes.facecolor':'white', 'figure.facecolor':'white'},font_scale= 1.5)
count = 0
ids_extreme = []
for i in range(0,len(X_age)):
    x_values = [X_age[i], U_age[i]]
    y_values = [Y_tmt[i], V_tmt[i]]
    col,transparency = magnitude_(x_values,y_values)
    if transparency>0.1:
        count+=1
        ids_extreme.append([ids[i],y_values,x_values])
    plt.plot(x_values, y_values, col, linestyle="-",alpha=transparency)  
    
for item in (ax.get_xticklabels() + ax.get_yticklabels()):
    item.set_fontsize(16)
for item in ([ax.title, ax.xaxis.label, ax.yaxis.label]):
    item.set_fontsize(35)
    
print(count)
plt.xlim(8, 13)
plt.ylim(0, 101)
plt.xlabel('Age [Years]')
plt.ylabel('Centile')
# do some color encoding

In [None]:
def magnitude_aplha(x_values,y_values):
    alpha = 25
    mag = abs(y_values[1]-y_values[0])
    if (x_values[1]-x_values[0])>=0 and (y_values[1]-y_values[0])>=0:
        if abs(y_values[1]-y_values[0])>=alpha:
            return_val='r'
            transparency=0.2
        else:
            return_val='k'
            transparency=0.01
    elif (y_values[1]-y_values[0])<=0 and (x_values[1]-x_values[0])<=0:
        if abs(y_values[1]-y_values[0])>=alpha:
            return_val='r'
            transparency=0.2
        else:
            return_val='k'
            transparency=0.01
    else:
        if abs(y_values[1]-y_values[0])>=alpha:
            return_val='b'
            transparency=0.2
        else:
            return_val='k'
            transparency=0.01
            
    return return_val, transparency, mag

#plt.grid()
sns.set(rc={'figure.figsize':(6.7,4.27),'axes.facecolor':'white', 'figure.facecolor':'white'},font_scale= 1.5)
count = 0
thickness_computation=[]
ids_extreme = []
ids_norma =[]
ages =[]
mag=[]
for i in range(0,len(X_age)):
    x_values = [X_age[i], U_age[i]]
    y_values = [Y_tmt[i], V_tmt[i]]
    col,transparency, m = magnitude_aplha(x_values,y_values)
    mag.append(m)
    ages.append(X_age[i])
    if transparency>0.1:
        count+=1
        ids_extreme.append([ids[i],y_values,x_values]) 
    else:
        ids_norma.append([ids[i],y_values,x_values]) 

# do some color encoding

In [None]:
df_std= pd.DataFrame([])
df_std['Age']=ages
df_std['Magnitude']=mag
df_std['density'] = (df['Age'].map(df['Age'].value_counts())).astype(int)
df_std['Magnitude binned']=df_std['Magnitude']//10
df_std['density binned'] = df_std['density']//100


sns.set({'axes.facecolor':'white', 'figure.facecolor':'white'})
#ax = plt.subplot()
sns.set_style("whitegrid")
ax = sns.displot(df_std, x="Magnitude", binwidth=1)
plt.axvline(x=df_std['Magnitude'].median(),color='gray',ls='--', 
            lw=2.5)
sns.displot(df_std, x="Magnitude", kind="kde")#, multiple="stack",hue='Age')
plt.axvline(x=df_std['Magnitude'].median(),color='gray',ls='--', 
            lw=2.5)
ax.set(xlabel="x-axis", ylabel="y-axis")
 
# visualizing illustration
plt.show()

In [None]:
# BMI drops 
for id_pat,x,y in ids_extreme:
    new_bmi = df_ant[df_ant['id'].str.contains(id_pat.split("-")[1])].sort_values(by='Age')
    if len(new_bmi)==2:
        delta_bmi = new_bmi['BMI'].iloc[1]-new_bmi['BMI'].iloc[0]
        if delta_bmi < -7 :
            print(id_pat.split("-")[1],delta_bmi,x,y)
            #break

In [None]:
# BMI drops, ITMT same 
for id_pat,x,y in ids_norma:
    new_bmi = df_ant[df_ant['id'].str.contains(id_pat.split("-")[1])].sort_values(by='Age')
    if len(new_bmi)==2:
        delta_bmi = new_bmi['BMI'].iloc[1]-new_bmi['BMI'].iloc[0]
        if delta_bmi < -7 and abs(x[1]-x[0])<10:
            print(id_pat.split("-")[1],delta_bmi,x,y)
            #break

In [None]:
# BMI grows
for id_pat,x,y in ids_norma:
    new_bmi = df_ant[df_ant['id'].str.contains(id_pat.split("-")[1])].sort_values(by='Age')
    if len(new_bmi)==2:
        delta_bmi = new_bmi['BMI'].iloc[1]-new_bmi['BMI'].iloc[0]
        if delta_bmi > 7:
            print(id_pat.split("-")[1],delta_bmi,x,y)
            #break

In [None]:
print(df[df['ID'].str.contains('sub-NDARINVLABWKL63')][['ID','Slice label']])
df_ant[df_ant['id'].str.contains('NDARINVLABWKL63')].sort_values(by='Age')

In [None]:
i = 4
age_arr=[[9,10],[9,11],[10,12],[10,12],[8,11]]
bmi_arr = [[19.7,20.1],[20,32.8],[23,32],[28.5,19.8],[35.6,20.9]]
centile_arr=[[63.9,66.7],[27.6,90],[47.2,45.6],[70,34.1],[41.7,39.4]]

plt.rcParams["figure.figsize"] = [4, 2]
plt.rcParams["figure.autolayout"] = True

bmi = np.array(bmi_arr[i])
centile = np.array(centile_arr[i])
age= np.array(age_arr[i])
#acceleration2 = np.array(df_individual_healthy['Centile CSA'])

print(age,bmi)
ax1 = plt.subplot()
l1, = ax1.plot(age, bmi, color='red')
ax2 = ax1.twinx()
l2, = ax2.plot(age, centile, color='orange')
ax1.set_ylabel('BMI')
ax2.set_ylabel('Centile',rotation=270, labelpad=20)
#ax1.set_xlabel('Age,years')

ax1.set_ylim([15, 35])
ax2.set_ylim([0, 100])
ax1.set_xlim([8, 12])
ax1.set_xticks(range(8,13,1))

#plt.legend([l1, l2], ["BMI", "Centile"])

plt.show()