In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import platform
import pathlib

In [None]:
## Paths Input Here
if platform.uname().system == 'Darwin': #------------------------------Mac OS X---------------------------------------------------------------
    conn_path = r'/Users/cu135/Dropbox (Partners HealthCare)/memory/analyses/roi-roi_correl/matrix_corrMx_AvgR.csv'
    clin_path = r'/Users/cu135/Dropbox (Partners HealthCare)/memory/patient_data/AD_Clinical_Data_CDR_ADAS_COG_13.xlsx'
    # clin_path = 'path to clinical values'
    out_dir = r'/Users/cu135/Dropbox (Partners HealthCare)/memory/analyses/roi-roi_correl/figures/multidimensional_scatterplots'
    #out_dir = r'path to out dir here'
    x_roi_names = r'/Users/cu135/Dropbox (Partners HealthCare)/memory/analyses/roi-roi_correl/matrix_corrMx_names.csv'
    #roi_names = '<path to roi name location>'
    print('I have set pathnames in the Mac style')
else: #----------------------------------------------------------------Windows----------------------------------------------------------------
    conn_path = r'C:\Users\calvin.howard\Dropbox (Partners HealthCare)\memory\analyses\roi-roi_correl\matrix_corrMx_AvgR.csv'
    clin_path = r'C:\Users\calvin.howard\Dropbox (Partners HealthCare)\memory\patient_data\AD_Clinical_Data_CDR_ADAS_COG_13.xlsx'
    # clin_path = 'path to clinical values'
    print(pathlib.Path(os.path.join(base,conn_path)))
    out_dir = r'C:\Users\calvin.howard\Dropbox (Partners HealthCare)\memory\analyses\AD_to_memory_net'
    #out_dir = r'path to out dir here'
    x_roi_names = r'C:\Users\calvin.howard\Dropbox (Partners HealthCare)\memory\analyses\roi-roi_correl\matrix_corrMx_names.csv'
    #roi_names = '<path to roi name location>'
    print('I have set pathnames in the Windows style')

## Import Connectivity Values

In [None]:
try:
    name_df = pd.read_csv(x_roi_names, names=['arb'], header=None)
    name_df.tail(5)
    colnames = name_df.arb.values.tolist()
    newname = []
    for name in colnames:
        arb = os.path.basename(name).split('.nii')[0]
        arb = arb[0:8]
        newname.append(arb)
    #print('NAMES: ', newname)

    x_df = pd.read_csv(conn_path, names=newname, header=None)#, ignore_index=True)
    x_df.index = newname
    x_df = x_df.iloc[8:,:8]
    x_df = x_df.reset_index(drop=True)
except:
    print('excepted')
    x_df = pd.read_csv(conn_path)
    colnames = x_df.columns.values
    newname = []
    for name in colnames:
        arb = os.path.basename(name).split('.nii')[0]
        arb = arb[0:8]
        newname.append(arb)
    x_df = x_df.set_axis(newname, axis=1, inplace=False)
    try:
        x_df.pop('Unnamed:')
    except:
        print('no x_df.pop(<name>) column to pop')


corr_df = x_df
corr_df.tail(3)

In [None]:
#Assess connectivity values fundamentally
corr_description = corr_df.describe().transpose()
display(corr_description)

In [None]:
#Visualize fundamental relations of the connectivity values
# corrfig = sns.pairplot(corr_df)
# corrfig

## Import Clinical Outcomes

In [None]:
#important metrics: 
# pt_id, randomization arm, age at DOS,
# baseline adas cog 11, baseline CDR
# %change adas cog 11, # change CDR


sheet_name = 'AD_Clinical_Scores'
alphab_cols = 'C, D, E, F, G, J, V'
clin_df = pd.read_excel(clin_path, sheet_name=sheet_name, usecols=alphab_cols, nrows=50)
print('Num NaNs: ', clin_df.isna().sum().sum())
# clin_df.tail(5)
display(clin_df[::2])

In [None]:
## Organize the clinical dataframe
clin_df = clin_df.sort_values(by=['Patient # CDR, ADAS'], kind='quicksort', axis=0, ascending=True, ignore_index=True)
clin_df.tail(10)

In [None]:
##One-hot-encode the dataframe | sham=0 stim=1 
shams = (clin_df['Randomization Arm'] == 'sham-stim')
clin_df.loc[shams, 'Randomization Arm'] = 0
stims = (clin_df['Randomization Arm'] == 'stim-sham')
clin_df.loc[stims, 'Randomization Arm'] = 1
clin_df.tail(10)

In [None]:
## Develop Understanding of the Clinical Data
try:
    clin_description = clin_df.describe().transpose()
    display(clin_description)
except:
    print('Failed to describe clinical dataframe, unknown cause')

In [None]:
## Visualize the Data Relationships
# clinfig = sns.pairplot(clin_df)
# clinfig

## Merge DFs

In [None]:
corrd_df = corr_df.reset_index()
total_df = pd.concat([clin_df, corr_df], axis=1)
display(total_df)

In [None]:
#Handle NaNs
total_df = total_df.fillna(method='ffill')
print('Num NaNs: ', total_df.isna().sum().sum())

In [None]:
#Generate metrics for udnerstanding
try:
    total_desc = total_df.describe().transpose()
    total_desc
except:
    print('Failed to generate total metrics, unkown cause')

In [None]:
#Generate visual relationship of total metrics
# total_pairplot = sns.pairplot(total_df)
# total_pairplot

## Generate Subrouping Metric

In [None]:
#subgrouping by age
metric = 'Age at DOS'
metric_val = 65

index_one = (total_df[metric] > metric_val) #example, all individuals over 65
index_two = (total_df[metric] <= metric_val) #example, all individuals under/equal to 65

##Example of how to use these indices to manipulate data:
#### clin_df.loc[index_one, '<names of relevant columns>'] = 0 ## can use iloc too. 

## Decide What to Plot

In [None]:
## Choose variables of interest.
x_name = '06_front'
y_name = '04_ventr'
z_name = '% Change from baseline (ADAS-Cog11)' #--------------------------------------------------------This variable is the outcome variable

x_vals = total_df[x_name]
y_vals = total_df[y_name]
z_vals = total_df[z_name]#------------------------------------------------------------------------------This variable is the outcome variable


In [None]:
#2D Line of Best Fit Code
def lin_regression(x, y, z, degree):
    from sklearn.preprocessing import PolynomialFeatures
    from sklearn.linear_model import LinearRegression
    from scipy.stats import pearsonr

    #----Generate Model
    #Prep data
    x = x
    y = y
    z = z
    input_matrix = np.stack([x, y]).T

    #Linear Regression
    poly = PolynomialFeatures(degree=degree)
    model = LinearRegression()
    in_features = poly.fit_transform(input_matrix)
    model.fit(in_features, z)
    predicted_z = model.predict(poly.transform(input_matrix))

    #----Performance Metrics
    #Pearson of Predicted Z to Actual Z
    r, p = pearsonr(predicted_z, z) #need to find the prediction points at the x/y for each z_actual
    print('r: ', r)
    print('p: ', p)
    #Coefficients of the Regression
    coefficients = dict(zip(poly.get_feature_names_out(), model.coef_.round(4)))
    print('coeff: ', coefficients)
    #Check Fit
    r_squared = model.score(poly.transform(input_matrix), z)
    print('r2: ', r_squared)

    #----Generate Planes for Plotting
    x_lin=np.linspace(np.min(x), np.max(x), 100)
    y_lin=np.linspace(np.min(y), np.max(y), 100)
    X_plane,Y_plane=np.meshgrid(x_lin,y_lin,copy=False)
    input_planes=np.stack([X_plane.flatten(),Y_plane.flatten()]).T
    assert(input_planes.shape==(100*100, 2)) #unsure what shape 400,2 refers to
    predicted_plane = model.predict(poly.transform(input_planes))

    return coefficients, r_squared, r, p, X_plane, Y_plane, predicted_plane.reshape(100,100)


## 3D Plots (No Subgroups)

In [None]:
#Plot
plt.style.use('default')

#setup
fig = plt.figure(figsize=(30,7))
ax1 = fig.add_subplot(131, projection = '3d')
ax1.view_init(elev=0, azim=270)
ax2 = fig.add_subplot(132, projection = '3d')
ax2.view_init(elev=0, azim=135)
ax3 = fig.add_subplot(133, projection = '3d')
ax3.view_init(elev=0, azim=0)

ax1.set_zlabel(z_name, fontsize='10')
axes = [ax1, ax2, ax3]
for ax in axes:
    ax.scatter3D(x_vals, y_vals, z_vals, c=x_vals, cmap='Reds', zorder=15, marker='o')

    ax.locator_params(nbins=4, axis='x')
    ax.set_xlabel(x_name, fontsize='10')
    ax.set_ylabel(y_name, fontsize='10')

    #Perform each regression
    coeffic1, r_sq1, r1, p1, x_plane1, y_plane1, z_plane1 = lin_regression(x_vals, y_vals, z_vals, degree=1)

    ax.contour(x_plane1, y_plane1, z_plane1, 100, cmap='Reds', alpha=0.95)#, zorder=15, marker=',')
fig.tight_layout()
fig.suptitle(f'{x_name} vs {y_name} R2:{r_sq1}|r:{r1}|p:{p1}')

In [None]:
#Store the associated coefficients and outputs in a CSV 
coeff_df = pd.DataFrame({f'Coefficients': coeffic1})
names_df = pd.DataFrame({'Coefficient Name': ['intersept', x_name, y_name]})
coeff_df = coeff_df.reset_index()
coeff_df = pd.concat([coeff_df, names_df], axis=1)
display(coeff_df)

In [None]:
##Save the figure and coefficients
#Save
figname = '3D_' + x_name + '_by_' + y_name + '_explain_' + z_name
if os.path.isdir(out_dir) != True:
    os.mkdir(out_dir)
fig.savefig(os.path.join(out_dir, (figname+'_figure.png')))
coeff_df.to_csv(os.path.join(out_dir, (figname+'_coefficients.csv')))

print(f'{figname} saved to: \n {out_dir}')

## 4D Plots (Subgroupped)

In [None]:

#Plot
plt.style.use('default')

#setup
fig = plt.figure(figsize=(30,7))
ax1 = fig.add_subplot(131, projection = '3d')
ax1.view_init(elev=0, azim=270)
ax2 = fig.add_subplot(132, projection = '3d')
ax2.view_init(elev=0, azim=135)
ax3 = fig.add_subplot(133, projection = '3d')
ax3.view_init(elev=0, azim=0)

ax1.set_zlabel(z_name, fontsize='10')
axes = [ax1, ax2, ax3]
for ax in axes:
    ax.scatter3D(x_vals.loc[index_one], y_vals.loc[index_one], z_vals.loc[index_one], c=x_vals.loc[index_one], cmap='Reds', zorder=15, marker='o')
    ax.scatter3D(x_vals.loc[index_two], y_vals.loc[index_two], z_vals.loc[index_two], c=x_vals.loc[index_two], cmap='Blues', zorder=15, marker='o')

    ax.locator_params(nbins=4, axis='x')
    ax.locator_params(nbins=4, axis='x')

    ax.set_xlabel(x_name, fontsize='10')
    ax.set_ylabel(y_name, fontsize='10')

    #Perform each regression
    coeffic1, r_sq1, r1, p1, x_plane1, y_plane1, z_plane1 = lin_regression(x_vals.loc[index_one], y_vals.loc[index_one], z_vals.loc[index_one], degree=1)
    coeffic2, r_sq2, r2, p2, x_plane2, y_plane2, z_plane2 = lin_regression(x_vals.loc[index_two], y_vals.loc[index_two], z_vals.loc[index_two], degree=1)

    ax.contour(x_plane1, y_plane1, z_plane1, 100, cmap='Reds', alpha=0.95)#, zorder=15, marker=',')
    ax.contour(x_plane2, y_plane2, z_plane2, 100, cmap='Blues', alpha=0.95)#, zorder=15, marker=',')
fig.tight_layout()
fig.suptitle(f'{metric}>{metric_val} R2:{r_sq1}|r:{r1}|p:{p1} \n {metric}<{metric_val} R2:{r_sq2}|r:{r2}|p{p2}')


In [None]:
#Store the associated coefficients and outputs in a CSV 
coeff_df = pd.DataFrame({f'{metric}>{metric_val}': coeffic1, f'{metric}<{metric_val}': coeffic2})
name_df = pd.DataFrame({'Coeff Names': ['intersept', x_name, y_name]})
coeff_df = coeff_df.reset_index()
coeff_df = pd.concat([coeff_df, name_df], axis=1)
display(coeff_df)

In [None]:
#Save
figname = '4D_' + x_name + '_by_' + y_name + '_explain_' + z_name
if os.path.isdir(out_dir) != True:
    os.mkdir(out_dir)
fig.savefig(os.path.join(out_dir, (figname+'_figure.png')))
coeff_df.to_csv(os.path.join(out_dir, (figname+'_coefficients.csv')))
print(f'{figname} saved to: \n {out_dir}')