In [13]:
import pandas as pd
from sklearn.preprocessing import PowerTransformer
from scipy.stats import zscore

# Load the data
scales = pd.read_excel("scales.xlsx")

# Define the skewed columns
skewed_columns = ["UPPSP_PMD", "UPPSP_PSV", "ASI_P", "ASI_C", "MSSB_POS", "MSSB_NEG", "MSSB_DES"]

# Copy the original dataframe
scales_transformed = scales.copy()

# Apply PowerTransformer to the skewed columns
pt = PowerTransformer(method='box-cox', standardize=False)
scales_transformed[skewed_columns] = pt.fit_transform(scales_transformed[skewed_columns] + 1e-6)

# Define the columns to apply z-score normalization
metrics_columns = scales_transformed.drop(["EPRIME_CODE", "SUBJECT_CODE", "Age", "Unnamed: 0"], axis=1).columns.values

# Apply z-score normalization, ignoring NaN values
scales_transformed[metrics_columns] = scales_transformed[metrics_columns].apply(lambda x: zscore(x, nan_policy='omit'))


In [15]:
scales_transformed.drop("Unnamed: 0",axis=1,inplace=True)

In [16]:
scales_transformed

Unnamed: 0,SUBJECT_CODE,EPRIME_CODE,Age,PA,NA.,ERQ_CR,ERQ_ES,UPPSP_NU,UPPSP_PU,UPPSP_SS,...,RRQ_Ref,ASI_P,ASI_C,ASI_S,SPQ,SPQ_IR,MSSB_POS,MSSB_NEG,MSSB_DES,ASI_T
0,PREDWELL_RETOS_DLPFC20HZ_S001,PREDWELL_RETOS-1-1,19,1.542411,0.529655,1.447650,2.112677,0.046156,0.526490,0.378148,...,0.904431,0.561757,0.206791,2.739950,,,,,,1.277450
1,PREDWELL_RETOS_DLPFC20HZ_S002,PREDWELL_RETOS-2-1,22,-0.039008,-0.605320,0.498470,-1.581572,-1.272577,-1.288993,-1.310014,...,0.904431,-0.316128,-0.094034,-1.342143,,,,,,-1.090217
2,PREDWELL_RETOS_DLPFC20HZ_S003,PREDWELL_RETOS-3-1,19,0.751701,-0.983645,-1.004398,-0.609401,-1.602260,-0.835122,-0.972381,...,-1.365664,-0.726321,-1.790260,-1.156593,,,,,,-1.177909
3,PREDWELL_RETOS_DLPFC20HZ_S004,PREDWELL_RETOS-4-1,21,-1.146002,1.097142,0.125578,-1.192703,-1.272577,-1.288993,-1.310014,...,0.336907,1.582578,0.885724,-1.156593,0.954388,0.740868,0.746335,-1.497080,0.676246,0.663611
4,PREDWELL_RETOS_DLPFC20HZ_S005,PREDWELL_RETOS-5-1,26,-0.829718,0.151330,0.498470,1.140506,0.375839,1.434232,0.715781,...,-0.423575,0.849545,0.206791,0.884453,-0.124873,-0.926085,-1.476319,0.756895,0.855745,0.575919
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
145,BEBRASK_LeftVCSHAM50HZ_S024,PREDWELL_RETOS-1124-1,21,-0.039008,-0.037832,-1.381057,0.557204,-0.283527,-0.381251,0.040516,...,-1.085686,-0.028669,0.684112,0.327804,0.954388,1.852169,1.029633,0.874956,0.508313,0.137462
146,BEBRASK_LeftVCSHAM50HZ_S025,PREDWELL_RETOS-1125-1,20,-1.304144,0.340492,-1.946045,2.112677,-1.602260,-1.742864,-0.634749,...,0.238537,-2.297831,0.405880,-0.785494,1.494018,1.852169,0.594392,1.380000,0.855745,-0.827143
147,BEBRASK_LeftVCSHAM50HZ_S026,PREDWELL_RETOS-1126-1,18,1.858695,-0.037832,-1.004398,0.751638,0.705522,0.980361,0.715781,...,-0.801924,0.712548,0.206791,-0.414394,0.954388,-0.926085,0.356080,-1.497080,0.779792,-0.125612
148,BEBRASK_LeftVCSHAM50HZ_S027,PREDWELL_RETOS-1127-1,20,0.277275,-0.226995,2.008872,-0.414967,-1.272577,0.072619,1.053413,...,-0.991099,-0.028669,0.558614,-0.414394,2.573279,0.740868,0.594392,-1.497080,0.676246,-0.300995


In [17]:
scales_transformed.to_excel('../Clustering_Predictive_Processing/scales_transformed.xlsx', index=True)
