In [None]:
import pandas as pd
import numpy as np
import matplotlib as plt
import seaborn as sns
import os

In [None]:
## Paths Input Here
conn_path = 'path to connect values'
clin_path = r'C:\Users\Calvin Howard\Dropbox (Partners HealthCare)\memory\AD_Clinical_Data_CDR_ADAS_COG_13.xlsx'
# clin_path = 'path to clinical values'

out_dir = r'path to out dir here'

## Import Connectivity Values

In [None]:
corr_df = pd.read_csv(conn_path)
corr_df.tail(3)

In [None]:
#Assess connectivity values fundamentally
corr_description = corr_df.describe.T
display(corr_description)

In [None]:
#Visualize fundamental relations of the connectivity values
corrfig = sns.pairplot(corr_df)
corrfig

## Import Clinical Outcomes

In [None]:
#important metrics: 
# pt_id, randomization arm, age at DOS,
# baseline adas cog 11, baseline CDR
# %change adas cog 11, # change CDR


sheet_name = 'AD_Clinical_Scores'
alphab_cols = 'C, D, E, F, G, J, V'
clin_df = pd.read_excel(clin_path, sheet_name=sheet_name, usecols=alphab_cols, nrows=50)
print('Num NaNs: ', clin_df.isna().sum().sum())
# clin_df.tail(5)
display(clin_df[::2])

In [None]:
## Organize the clinical dataframe
clin_df = clin_df.sort_values(by=['Patient # CDR, ADAS'], kind='quicksort', axis=0, ascending=True, ignore_index=True)
clin_df.tail(10)

In [None]:
##One-hot-encode the dataframe | sham=0 stim=1 
shams = (clin_df['Randomization Arm'] == 'sham-stim')
clin_df.loc[shams, 'Randomization Arm'] = 0
stims = (clin_df['Randomization Arm'] == 'stim-sham')
clin_df.loc[stims, 'Randomization Arm'] = 1
clin_df.tail(10)

In [None]:
## Develop Understanding of the Clinical Data
try:
    clin_description = clin_df.describe.T
    display(clin_description)
except:
    print('Failed to describe clinical dataframe, unknown cause')

In [None]:
## Visualize the Data Relationships
clinfig = sns.pairplot(clin_df)
clinfig

## Merge DFs

In [None]:
total_df = pd.merge([clin_df, corr_df])
display(total_df)

In [None]:
#Generate metrics for udnerstanding
try:
    total_desc = total_df.description.T
    total_desc
except:
    print('Failed to generate total metrics, unkown cause')

In [None]:
#Generate visual relationship of total metrics
total_pairplot = sns.pairplot(total_df)
total_pairplot

## Generate Subrouping Metric

In [None]:
#subgrouping by age
metric = 'Age at DOS'
metric_val = 65

index_one = (total_df[metric] > metric_val) #example, all individuals over 65
index_two = (total_df[metric] <= metric_val) #example, all individuals under/equal to 65

##Example of how to use these indices to manipulate data:
#### clin_df.loc[index_one, '<names of relevant columns>'] = 0 ## can use iloc too. 

## Decide What to Plot

In [None]:
## Choose variables of interest.
x_vals = total_df['00_memor']
z_vals = total_df['% Change from baseline (ADAS-Cog11)']
y_vals = total_df['Baseline ADAS-Cog11']

In [None]:
#2D Line of Best Fit Code
def lin_regression(x, y, z, degree):
    from sklearn.preprocessing import PolynomialFeatures
    from sklearn.linear_model import LinearRegression
    from scipy.stats import pearsonr

    #----Generate Model
    #Prep data
    x = x
    y = y
    z = z
    input_matrix = np.stack([x, y]).T

    #Linear Regression
    poly = PolynomialFeatures(degree=degree)
    model = LinearRegression()
    in_features = poly.fit_transform(input_matrix)
    model.fit(in_features, z)
    predicted_z = model.predict(poly.transform(input_matrix))

    #----Performance Metrics
    #Pearson of Predicted Z to Actual Z
    r, p = pearsonr(predicted_z, z) #need to find the prediction points at the x/y for each z_actual
    print('r: ', r)
    print('p: ', p)
    #Coefficients of the Regression
    coefficients = dict(zip(poly.get_features_out(), model.coef_.round(4)))
    print('coeff: ', coefficients)
    #Check Fit
    r_squared = model.score(poly.transform(input_matrix), z)
    print('r2: ', r_squared)

    #----Generate Planes for Plotting
    x_lin=np.linspace(np.min(x), np.max(x), 100)
    y_lin=np.linpsace(np.min(y), np.max(y), 100)
    X_plane,Y_plane=np.meshgrid(x_lin,y_lin,copy=False)
    input_planes=np.stack([X_plane,Y_plane]).T
    assert(input_planes.shape==(100*100, 2)) #unsure what shape 400,2 refers to
    predicted_plane = model.predict(poly.transform(input_planes))


    return coefficients, r_squared, r, p, X_plane, Y_plane, predicted_plane


In [None]:
#Plot
plt.style.use('default')

#setup
fig = plt.figure(figsize=(30,20))
ax1 = fig.add_subplot(121, projection = '3d')
ax1.view_init(elev=13, azim=100)
ax2 = fig.add_subplot(132, projection = '3d')
ax2.view_init(elev=13, azim=145)
ax3 = fig.add_subplot(133, projection = '3d')
ax3.view_init(elev=13, azim=190)

ax1.set_zlabel(z_vals.columns.values, fontsize='10')
axes = [ax1, ax2, ax3]
for ax in axes:
    ax.plot(x=x_vals.loc[index_one], y=y_vals.loc[index_one], z=z_vals.loc[index_one], color='r', zorder=15, alpha=0.5, marker='o')
    ax.plot(x=x_vals.loc[index_two], y=y_vals.loc[index_two], z=z_vals.loc[index_two], color='b', zorder=15, alpha=0.5, marker='o')

    ax.locator_params(nbins=4, axis='x')
    ax.locator_params(nbins=4, axis='x')

    ax.set_xlabel(x_vals.columns.values, fontsize='10')
    ax.set_ylabel(y_vals.columns.values, fontsize='10')

    #Perform each regression
    coeffic1, r_sq1, r1, p1, x_plane1, y_plane1, z_plane1 = lin_regression(x=x_vals.loc[index_one], y=y_vals.loc[index_one], z=z_vals.loc[index_one], degree=1)
    coeffic2, r_sq2, r2, p2, x_plane2, y_plane2, z_plane2 = lin_regression(x=x_vals.loc[index_two], y=y_vals.loc[index_two], z=z_vals.loc[index_two], degree=1)

    ax.plot(x_plane1, y_plane1, z_plane1, color='r', zorder=15, marker=',')
    ax.plot(x_plane2, y_plane2, z_plane2, color='r', zorder=15, marker=',')

fig.suptitle(f'{metric}>{metric_val} R2:{r_sq1}|r:{r1}|p:{p1} \n {metric}<{metric_val} R2:{r_sq2}|r:{r2}|p{p2}')


In [None]:
#Store the associated coefficients and outputs in a CSV 
coeff_df = pd.DataFram({f'{metric}>{metric_val}': coeffic1, f'{metric}<{metric_val}': coeffic2})
display(coeff_df)

In [None]:
#Save
figname = '3D_' + x_vals.columns.values + '_by_' + y_vals.columns.values + '_explain_' + z_vals.columns.values
if os.path.isdir(out_dir) != True:
    os.mkdir(out_dir)
fig.savefg(os.path.join(out_dir, (figname+'_figure.png')))
coeff_df.to_csv(os.path.join(out_dir, (figname+'_coefficients.csv')))