In [None]:
import platform 
import os

analysis = 'elastic_net/regression'
if platform.uname().system == 'Darwin': #------------------------------Mac OS X---------------------------------------------------------------
    data_path = r'/Users/cu135/Dropbox (Partners HealthCare)/memory/analyses/composite_analyses/non_fl_vtas_yeo_rois/yeo_rois_with_clinical_data_for_regression.csv'
    out_dir = os.path.join(os.path.dirname(data_path), f'{analysis}')
    #out_dir = r'path to out dir here'
    
    print('I have set pathnames in the Mac style')
    print('I will save to :', out_dir)
else: #----------------------------------------------------------------Windows----------------------------------------------------------------
    data_path = r'C:\Users\calvin.howard\Dropbox (Partners HealthCare)\memory\analyses\roi-roi_correl\addbs_vta_to_memory_net_maxima\linear_regression_addbs_vta_to_memory_net_maxima\prepared_data.csv'
    out_dir = rf'C:\Users\calvin.howard\Dropbox (Partners HealthCare)\memory\analyses\roi-roi_correl\addbs_vta_to_memory_net_maxima\linear_regression_addbs_vta_to_memory_net_maxima\{analysis}'
    #out_dir = r'path to out dir here'

    print('I have set pathnames in the Windows style')

In [None]:
import numpy as np
import pandas as pd
# Load data into a pandas DataFrame 
df = pd.read_csv(data_path)

#Remove columns
try:
    df.pop('% Change from baseline (CDR)')
    df.pop('Baseline CDR (sum of squares)')
    df.pop('Unnamed: 0')
    df.pop('Patient # CDR, ADAS')
    df.pop('Randomization Arm')
    df.pop('Age')
    df.pop('baseline')
    # df.pop('Somatomotor')
    # df.pop('Dorsal_Attention')
    # df.pop('Visual')
    # df.pop('Somatomotor')

except:
    print('cannot pop')

#Remove outlier
outlier_index=[11, 47, 48, 49]
df = df.drop(index=outlier_index)

#Standardize the data
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler() 
df[df.columns] = scaler.fit_transform(df[df.columns])

display(df)

In [None]:
import statsmodels.api as sm
from sklearn.linear_model import ElasticNet
from sklearn.model_selection import GridSearchCV
from calvin_utils.statistical_measurements import calculate_vif
from calvin_utils.statistical_functions import generate_interaction_features

# Split data into features and target variable
df1 = df.copy()
y = df1.pop('perc_improvement')
X = df1

#Generate interaction effects
X_interactions = generate_interaction_features(X)
# X = X_interactions

# Create an elastic net regression model
elastic_net = ElasticNet(fit_intercept=True)

# Set up the hyperparameter grid
param_grid = {
    'alpha': np.linspace(0.01, 1, 10),  # range of regularization strength
    'l1_ratio': np.linspace(0.01, 1, 10)  # range of l1_ratio
}

# Create a GridSearchCV object to search for the best hyperparameters
grid_search = GridSearchCV(
    elastic_net, param_grid=param_grid, scoring='neg_mean_squared_error'
)

# Fit the GridSearchCV object to the data
grid_search.fit(X, y)

# Get the best hyperparameters
best_alpha = grid_search.best_params_['alpha']
best_l1_ratio = grid_search.best_params_['l1_ratio']

# Train the optimal model
lin_reg = sm.OLS(y, X)
optimal_model = lin_reg.fit_regularized(alpha=best_alpha, L1_wt=best_l1_ratio, refit=True)

# Print the summary of the optimal model
print(f'L1 Weight: {best_l1_ratio}, alpha level: {best_alpha} \n \n {calculate_vif(X)}')


In [None]:
print(optimal_model.summary())


In [None]:
display(optimal_model.summary())

In [None]:
## Generate Diagnostics
from calvin_utils.statistical_measurements import model_diagnostics
cls = model_diagnostics(optimal_model)


In [None]:
import seaborn as sns

sns.pairplot(df)