In [None]:
import os
import statsmodels as sm
import pandas as pd #pandas for data frame management and descriptives (in addition to numpy)
import numpy as np
from HLR import HierarchicalLinearRegression

#check and set working directory
cwd = os.getcwd()  # Get the current working directory (cwd)
files = os.listdir(cwd)  # Get all the files in that directory

os.chdir('c:\\Users\\j_m289\\Pictures\\PHD\\7. Data Analysis\HMR\Clean')  # Directory set to 'clean' folder

#try to use relative path for files (tied to directory and nesting within folders)
path = "/Users/j_m289\Pictures\PHD\7. Data Analysis\HMR\Clean"
  
start = "/Users/j_m289"

relative_path = os.path.relpath(path, start)

print(cwd)

# Load datasets
anorexia = pd.read_csv('AnorexiaDataset_Final.csv')
bulimia = pd.read_csv('BulimiaDataset_Final.csv')
BingeEating = pd.read_csv('BingeEatingDataset_Final.csv')
OSFED = pd.read_csv('OSFEDDataset_Final.csv')
healthy = pd.read_csv('healthyDataset_Final.csv')

# Recode Education to code education less than grade 12 as 0 and above as 1
anorexia['Education2'] = anorexia['Education'].apply(lambda x: 1 if x >= 1 and x <= 2 else 2)
bulimia['Education2'] = bulimia['Education'].apply(lambda x: 1 if x >= 1 and x <= 2 else 2)
BingeEating['Education2'] = BingeEating['Education'].apply(lambda x: 1 if x >= 1 and x <= 2 else 2)
OSFED['Education2'] = OSFED['Education'].apply(lambda x: 1 if x >= 1 and x <= 2 else 2)
healthy['Education2'] = healthy['Education'].apply(lambda x: 1 if x >= 1 and x <= 2 else 2)

# Set directory to 'Data Output'
os.chdir('c:\\Users\\j_m289\\Pictures\\PHD\\7. Data Analysis\HMR\Data Output')  

# Anorexia Nervosa

### Model 1

In [None]:
# Model 1
X = {1: ['Age', 'Sex', 'BMI'], 
     2: ['Age', 'Sex', 'BMI', 'EmReg_CR', 'EmReg_ES','CFI_Alt', 'CFI_Ctrl'], 
     3: ['Age', 'Sex', 'BMI','EmReg_CR', 'EmReg_ES','CFI_Alt', 'CFI_Ctrl','YPSQ_EFSA', 
	     'YPSQ_S', 'YPSQ_EC', 'YPSQ_O','YPSQ_EOS', 'YPSQ_SCRE', 'YPSQ_DS', 'YPSQ_SB', 'YPSQ_HSC']}

# Define the outcome variable
y = 'EDE_Q_G'

# Initiate the HLR object (missing_data and ols_params are optional parameters)
hreg = HierarchicalLinearRegression(anorexia, X, y)

In [None]:
# Output Summary
anorexia_results = hreg.summary()
#anorexia_results.to_csv('anorexia_results.csv')

In [None]:
# Diagnostics
diagnostics = hreg.diagnostics(verbose=True)
#diagnostics = pd.DataFrame.from_dict(diagnostics)
#diagnostics.to_csv('anorexia_diagnostics.csv')

In [None]:
# Plots
hreg.plot_studentized_residuals_vs_fitted()
hreg.plot_qq_residuals()
hreg.plot_influence()
hreg.plot_std_residuals()
hreg.plot_histogram_std_residuals()
hreg.plot_partial_regression()

### Shuffled Model

In [None]:
# Shuffled to verify spatial autocorrelation
anorexia_shuffled = anorexia.sample(frac=1)

In [None]:
# Shuffled Model 1
X = {1: ['Age', 'Sex', 'BMI'], 
     2: ['Age', 'Sex', 'BMI', 'EmReg_CR', 'EmReg_ES','CFI_Alt', 'CFI_Ctrl'], 
     3: ['Age', 'Sex', 'BMI','EmReg_CR', 'EmReg_ES','CFI_Alt', 'CFI_Ctrl','YPSQ_EFSA', 
	     'YPSQ_S', 'YPSQ_EC', 'YPSQ_O','YPSQ_EOS', 'YPSQ_SCRE', 'YPSQ_DS', 'YPSQ_SB', 'YPSQ_HSC']}

# Define the outcome variable
y = 'EDE_Q_G'

# Initiate the HLR object (missing_data and ols_params are optional parameters)
hreg = HierarchicalLinearRegression(anorexia_shuffled, X, y, ols_params=None)

In [None]:
# Output Summary
anorexia_shuffled_results = hreg.summary()
anorexia_shuffled_results.to_csv('anorexia_shuffled_results.csv')

In [None]:
# Diagnostics
diagnostics = hreg.diagnostics(verbose=True)
diagnostics = pd.DataFrame.from_dict(diagnostics)
diagnostics.to_csv('anorexia_shuffled_diagnostics.csv')

# Bulimia Nervosa

In [None]:
# Bulimia Nervosa
X = {1: ['Age', 'Sex', 'BMI'], 
     2: ['Age', 'Sex', 'BMI', 'EmReg_CR', 'EmReg_ES','CFI_Alt', 'CFI_Ctrl'], 
     3: ['Age', 'Sex', 'BMI', 'EmReg_CR', 'EmReg_ES','CFI_Alt', 'CFI_Ctrl','YPSQ_EFSA', 
		'YPSQ_S', 'YPSQ_EC', 'YPSQ_O','YPSQ_EOS', 'YPSQ_SCRE', 'YPSQ_DS', 'YPSQ_SB', 'YPSQ_HSC']}

# Define the outcome variable
y = 'EDE_Q_G'

# Initiate the HLR object (missing_data and ols_params are optional parameters)
hreg = HierarchicalLinearRegression(bulimia, X, y, ols_params=None)

In [None]:
# Output Summary
bulimia_results = hreg.summary()
bulimia_results.to_csv('bulimia_results.csv')

In [None]:
# Diagnostics
diagnostics = hreg.diagnostics(verbose=True)
diagnostics = pd.DataFrame.from_dict(diagnostics)
diagnostics.to_csv('bulimia_diagnostics.csv')

In [None]:
# Plots
hreg.plot_studentized_residuals_vs_fitted()
hreg.plot_qq_residuals()
hreg.plot_influence()
hreg.plot_std_residuals()
hreg.plot_histogram_std_residuals()
hreg.plot_partial_regression()

# Binge Eating

### Model 1

In [None]:
#Drop highly multi-collinear variables
BingeEating_dropped = BingeEating.drop(columns=['YPSQ_EFSA','YPSQ_O','YPSQ_S','YPSQ_SB'])

In [None]:
# Model 1
X = {1: ['Age', 'Sex', 'BMI'], 
     2: ['Age', 'Sex', 'BMI', 'EmReg_CR', 'EmReg_ES','CFI_Alt', 'CFI_Ctrl'], 
     3: ['Age', 'Sex', 'BMI', 'EmReg_CR', 'EmReg_ES','CFI_Alt', 'CFI_Ctrl','YPSQ_EC',
		 'YPSQ_EOS', 'YPSQ_SCRE', 'YPSQ_DS', 'YPSQ_HSC']}

# Define the outcome variable
y = 'EDE_Q_G'

# Initiate the HLR object (missing_data and ols_params are optional parameters)
hreg = HierarchicalLinearRegression(BingeEating_dropped, X, y, ols_params=None)

In [None]:
# Output Summary
binge_results = hreg.summary()
binge_results.to_csv('binge_results.csv')

In [None]:
# Diagnostics
diagnostics = hreg.diagnostics(verbose=True)
diagnostics = pd.DataFrame.from_dict(diagnostics)
diagnostics.to_csv('binge_diagnostics.csv')

In [None]:
# Plots
hreg.plot_studentized_residuals_vs_fitted()
hreg.plot_qq_residuals()
hreg.plot_influence()
hreg.plot_std_residuals()
hreg.plot_histogram_std_residuals()
hreg.plot_partial_regression()

### Shuffled Model

In [None]:
# Shuffled to verify spatial autocorrelation
binge_shuffled = BingeEating.sample(frac=1)

In [None]:
#Drop highly multi-collinear variables
binge_shuffled = binge_shuffled.drop(columns=['YPSQ_EFSA','YPSQ_O','YPSQ_S','YPSQ_SB'])

In [None]:
# Shuffled Model 1
X = {1: ['Age', 'Sex', 'BMI'], 
     2: ['Age', 'Sex', 'BMI', 'EmReg_CR', 'EmReg_ES','CFI_Alt', 'CFI_Ctrl'], 
     3: ['Age', 'Sex', 'BMI', 'EmReg_CR', 'EmReg_ES','CFI_Alt', 'CFI_Ctrl','YPSQ_EC',
		 'YPSQ_EOS', 'YPSQ_SCRE', 'YPSQ_DS', 'YPSQ_HSC']}

# Define the outcome variable
y = 'EDE_Q_G'

# Initiate the HLR object (missing_data and ols_params are optional parameters)
hreg = HierarchicalLinearRegression(binge_shuffled, X, y, ols_params=None)

In [None]:
# Output Summary
binge_shuffled_results = hreg.summary()
binge_shuffled_results.to_csv('binge_shuffled_results.csv')

In [None]:
# Diagnostics
diagnostics = hreg.diagnostics(verbose=True)
diagnostics = pd.DataFrame.from_dict(diagnostics)
diagnostics.to_csv('binge_shuffled_diagnostics.csv')

# OSFED

### Model 1

In [None]:
# Model 1
X = {1: ['Age', 'Sex', 'BMI'], 
     2: ['Age', 'Sex', 'BMI', 'EmReg_CR', 'EmReg_ES','CFI_Alt', 'CFI_Ctrl'], 
     3: ['Age', 'Sex', 'BMI', 'EmReg_CR', 'EmReg_ES','CFI_Alt', 'CFI_Ctrl','YPSQ_EFSA', 
		 'YPSQ_S', 'YPSQ_EC', 'YPSQ_O','YPSQ_EOS', 'YPSQ_SCRE', 'YPSQ_DS', 'YPSQ_SB', 'YPSQ_HSC']}

# Define the outcome variable
y = 'EDE_Q_G'

# Initiate the HLR object (missing_data and ols_params are optional parameters)
hreg = HierarchicalLinearRegression(OSFED, X, y, ols_params=None)

In [None]:
# Output Summary
osfed_results = hreg.summary()
osfed_results.to_csv('osfed_results.csv')

In [None]:
# Diagnostics
diagnostics = hreg.diagnostics(verbose=True)
diagnostics = pd.DataFrame.from_dict(diagnostics)
diagnostics.to_csv('osfed_diagnostics.csv')

In [None]:
# Plots
hreg.plot_studentized_residuals_vs_fitted()
hreg.plot_qq_residuals()
hreg.plot_influence()
hreg.plot_std_residuals()
hreg.plot_histogram_std_residuals()
hreg.plot_partial_regression()

### Shuffled Model

In [None]:
# Shuffled to verify spatial autocorrelation
osfed_shuffled = OSFED.sample(frac=1)

In [None]:
# Shuffled Model 1
X = {1: ['Age', 'Sex', 'BMI'], 
     2: ['Age', 'Sex', 'BMI', 'EmReg_CR', 'EmReg_ES','CFI_Alt', 'CFI_Ctrl'], 
     3: ['Age', 'Sex', 'BMI','EmReg_CR', 'EmReg_ES','CFI_Alt', 'CFI_Ctrl','YPSQ_EFSA', 
	     'YPSQ_S', 'YPSQ_EC', 'YPSQ_O','YPSQ_EOS', 'YPSQ_SCRE', 'YPSQ_DS', 'YPSQ_SB', 'YPSQ_HSC']}

# Define the outcome variable
y = 'EDE_Q_G'

# Initiate the HLR object (missing_data and ols_params are optional parameters)
hreg = HierarchicalLinearRegression(osfed_shuffled, X, y, ols_params=None)

In [None]:
# Output Summary
osfed_shuffled_results = hreg.summary()
osfed_shuffled_results.to_csv('osfed_shuffled_results.csv')

In [None]:
# Diagnostics
diagnostics = hreg.diagnostics(verbose=True)
diagnostics = pd.DataFrame.from_dict(diagnostics)
diagnostics.to_csv('osfed_shuffled_diagnostics.csv')

# Healthy

### Model 1

In [None]:
# Model 1
X = {1: ['Age', 'Sex', 'BMI'], 
     2: ['Age', 'Sex', 'BMI', 'EmReg_CR', 'EmReg_ES','CFI_Alt', 'CFI_Ctrl'], 
     3: ['Age', 'Sex', 'BMI', 'EmReg_CR', 'EmReg_ES','CFI_Alt', 'CFI_Ctrl','YPSQ_EFSA', 
		 'YPSQ_S', 'YPSQ_EC', 'YPSQ_O','YPSQ_EOS', 'YPSQ_SCRE', 'YPSQ_DS', 'YPSQ_SB', 'YPSQ_HSC']}

# Define the outcome variable
y = 'EDE_Q_G'

# Initiate the HLR object (missing_data and ols_params are optional parameters)
hreg = HierarchicalLinearRegression(healthy, X, y, ols_params=None)

In [None]:
# Output Summary
healthy_results = hreg.summary()
healthy_results.to_csv('healthy_results.csv')

In [None]:
# Diagnostics
diagnostics = hreg.diagnostics(verbose=True)
diagnostics = pd.DataFrame.from_dict(diagnostics)
diagnostics.to_csv('healthy_diagnostics.csv')

In [None]:
# Plots
hreg.plot_studentized_residuals_vs_fitted()
hreg.plot_qq_residuals()
hreg.plot_influence()
hreg.plot_std_residuals()
hreg.plot_histogram_std_residuals()
hreg.plot_partial_regression()

### Shuffled Model

In [None]:
# Shuffled to verify spatial autocorrelation
healthy_shuffled = healthy.sample(frac=1)

In [None]:
# Shuffled Model 1
X = {1: ['Age', 'Sex', 'BMI'], 
     2: ['Age', 'Sex', 'BMI', 'EmReg_CR', 'EmReg_ES','CFI_Alt', 'CFI_Ctrl'], 
     3: ['Age', 'Sex', 'BMI','EmReg_CR', 'EmReg_ES','CFI_Alt', 'CFI_Ctrl','YPSQ_EFSA', 
	     'YPSQ_S', 'YPSQ_EC', 'YPSQ_O','YPSQ_EOS', 'YPSQ_SCRE', 'YPSQ_DS', 'YPSQ_SB', 'YPSQ_HSC']}

# Define the outcome variable
y = 'EDE_Q_G'

# Initiate the HLR object (missing_data and ols_params are optional parameters)
hreg = HierarchicalLinearRegression(healthy_shuffled, X, y, ols_params=None)

In [None]:
# Output Summary
healthy_shuffled_results = hreg.summary()
healthy_shuffled_results.to_csv('healthy_shuffled_results.csv')

In [None]:
# Diagnostics
diagnostics = hreg.diagnostics(verbose=True)
diagnostics = pd.DataFrame.from_dict(diagnostics)
diagnostics.to_csv('healthy_shuffled_diagnostics.csv')