In [45]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import math

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn import metrics
from sklearn.preprocessing import StandardScaler
import statsmodels.api as sm
from sklearn.preprocessing import PowerTransformer
from itertools import combinations
from sklearn.metrics import mean_squared_error

import seaborn as sns



In [46]:
df = pd.read_csv("po2_data.csv")

# Separate explanatory variables (x) from the response variable (y)
df = df.drop(['subject#', 'test_time'],axis=1)
x_1 = df.iloc[:, : -2]
x_2 = df.iloc[:, : -2]
# remove multicollinearity
# x = x.drop(['subject#','shimmer(apq3)', 'shimmer(%)', 'shimmer(apq5)', 'shimmer(apq11)', 'hnr'], axis=1)



In [47]:
y_1 = df.iloc[:, -2]
y_2 = df.iloc[:, -1]




# Apply cross validation to get the optimal ratio of splitting the dataset
def cross_validation(X, y):
    # Define a range of test set sizes
    test_size_range = [0.1, 0.2, 0.3, 0.4, 0.5]

    # Initialize an empty list to store mean performance scores
    mean_scores = []

    for test_size in test_size_range:
        # Initialize an empty list to store cross-validation scores for each fold
        fold_scores = []

        # Perform k-fold cross-validation for each test set size
        k = 5  # Number of folds
        for _ in range(k):
            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=0)
            model = LinearRegression()
            model.fit(X_train, y_train)
            scores = model.score(X_test, y_test)  # You can use any appropriate performance metric here
            fold_scores.append(scores)

        # Calculate the mean score for this test set size
        mean_score = np.mean(fold_scores)
        mean_scores.append(mean_score)

    # Choose the best test set size based on the mean performance scores
    best_test_size = test_size_range[np.argmax(mean_scores)]
    print(best_test_size)
        
    return best_test_size


ratio_1 = cross_validation(x_1, y_1)
ratio_2 = cross_validation(x_2, y_2)



def test(x_1, x_2, y_1, y_2, ratio_1=0.4, ratio_2=0.4):
    x_train_1, x_test_1, y_train_1, y_test_1 = train_test_split(x_1, y_1, test_size=ratio_1, random_state=0)
    x_train_2, x_test_2, y_train_2, y_test_2 = train_test_split(x_2, y_2, test_size=ratio_2, random_state=0)


    # Build a linear regression model
    model_1 = LinearRegression()

    # Train (fit) the linear regression model using the training data
    model_1.fit(x_train_1, y_train_1)

    # print(f"Intercept: {model_1.intercept_}")
    # print(f"Coefficient: {model_1.coef_}")


    model_2 = LinearRegression()

    # Train (fit) the linear regression model using the training data
    model_2.fit(x_train_2, y_train_2)

    print(f"Intercept: {model_2.intercept_}")
    print(f"Coefficient: {model_2.coef_}")

    # Use linear regression to predict the values of (y) in the test set
    # based on the values of x in the test set
    y_pred_1 = model_1.predict(x_test_1)
    y_pred_2 = model_2.predict(x_test_2)

    # Optional: Show the predicted values of (y) next to the actual values of (y)
    df_pred_1 = pd.DataFrame({"Actual": y_test_1, "Predicted": y_pred_1})
    df_pred_2 = pd.DataFrame({"Actual": y_test_2, "Predicted": y_pred_2})
    print(x_train_2)
    print(df_pred_2)

    # Compute standard performance metrics of the linear regression:

    # Mean Absolute Error
    mae = metrics.mean_absolute_error(y_test_1, y_pred_1)
    # Mean Squared Error
    mse = metrics.mean_squared_error(y_test_1, y_pred_1)
    # Root Mean Square Error
    rmse =  math.sqrt(metrics.mean_squared_error(y_test_1, y_pred_1))
    # Normalised Root Mean Square Error
    y_max = y_test_1.max()
    y_min = y_test_1.min()
    rmse_norm = rmse / (y_max - y_min)
    # R-Squared
    r_2 = metrics.r2_score(y_test_1, y_pred_1)
    
    print("MAE: ", mae)
    print("MSE: ", mse)
    print("RMSE: ", rmse)
    print("RMSE (Normalised): ", rmse_norm)
    print("R_squared: ",r_2)

    
    mae = metrics.mean_absolute_error(y_test_2, y_pred_2)
    # Mean Squared Error
    mse = metrics.mean_squared_error(y_test_2, y_pred_2)
    # Root Mean Square Error
    rmse =  math.sqrt(metrics.mean_squared_error(y_test_2, y_pred_2))
    # Normalised Root Mean Square Error
    y_max = y_test_2.max()
    y_min = y_test_2.min()
    rmse_norm = rmse / (y_max - y_min)

    # R-Squared
    r_2 = metrics.r2_score(y_test_2, y_pred_2)

    print("MAE: ", mae)
    print("MSE: ", mse)
    print("RMSE: ", rmse)
    print("RMSE (Normalised): ", rmse_norm)
    print("R_squared: ",r_2)


test(x_1, x_2, y_1, y_2, ratio_1, ratio_2)

0.2
0.2
Intercept: 40.200703177943964
Coefficient: [ 2.98701983e-01 -2.77392516e+00  1.49501242e+01 -5.60323704e+04
 -4.54563782e+04 -2.91973063e+02  1.53977073e+04  1.34471145e+02
 -8.81023519e+00  1.53977911e+04 -5.31303065e+01  5.29940647e+01
 -5.19959091e+03 -1.21498515e+01 -5.53700443e-01  2.23352731e+00
 -3.13898520e+01  1.96457700e+01]
      age  sex  jitter(%)  jitter(abs)  jitter(rap)  jitter(ppq5)  \
4061   78    0    0.00387     0.000026      0.00136       0.00156   
866    68    1    0.00743     0.000037      0.00393       0.00396   
5601   66    1    0.00340     0.000014      0.00141       0.00173   
38     65    0    0.00251     0.000019      0.00101       0.00129   
158    55    0    0.00289     0.000019      0.00136       0.00160   
...   ...  ...        ...          ...          ...           ...   
4931   58    1    0.00299     0.000017      0.00146       0.00145   
3264   71    0    0.00773     0.000051      0.00327       0.00425   
1653   61    0    0.00359     0.00

In [48]:
"""
APPLY Z-SCORE STANDARDISATION
"""
scaler = StandardScaler()
# standardisation lower the r squared significantly -> skip

# Drop the previously added constant

# Apply z-score standardisation to all explanatory variables
# std_x_1 = scaler.fit_transform(x_1.values)
# std_x_2 = scaler.fit_transform(x_2.values)

# # Restore the column names of each explanatory variable
# std_x_1_df = pd.DataFrame(std_x_1, index=x_1.index, columns=x_1.columns)
# std_x_2_df = pd.DataFrame(std_x_2, index=x_2.index, columns=x_2.columns)

# ratio_1 = cross_validation(x_1, y_1)
# ratio_2 = cross_validation(x_2, y_2)


# test(std_x_1_df, std_x_2_df, y_1, y_2)

std_x_1_df = x_1
std_x_2_df = x_2



In [49]:
df_1 = pd.concat([std_x_1_df, y_1], axis=1)
df_2 = pd.concat([std_x_2_df, y_2], axis=1)

print(df_1.corr())
print(df_2.corr())

# Log transform

var = ['jitter(%)', 'shimmer(dda)']
for v in var:
# for v in std_x_1_df.columns:
    zero_or_negative_mask = std_x_1_df[v] <= 0
    std_x_1_df.loc[zero_or_negative_mask, v] = 1e-6  # Replace with a small positive value
    std_x_1_df[v + "_log"] = std_x_1_df[v].apply(np.log)
    std_x_1_df = std_x_1_df.drop([v], axis=1)

    zero_or_negative_mask = std_x_2_df[v] <= 0
    std_x_2_df.loc[zero_or_negative_mask, v] = 1e-6  # Replace with a small positive value
    std_x_2_df[v + "_log"] = std_x_2_df[v].apply(np.log)
    std_x_2_df = std_x_2_df.drop([v], axis=1)


ratio_1 = cross_validation(x_1, y_1)
ratio_2 = cross_validation(x_2, y_2)

# print(std_x_df)
test(std_x_1_df, std_x_2_df, y_1, y_2)


                     age       sex  jitter(%)  jitter(abs)  jitter(rap)  \
age             1.000000 -0.042396   0.022855     0.035596     0.010046   
sex            -0.042396  1.000000   0.051250    -0.154873     0.076570   
jitter(%)       0.022855  0.051250   1.000000     0.865489     0.984175   
jitter(abs)     0.035596 -0.154873   0.865489     1.000000     0.844538   
jitter(rap)     0.010046  0.076570   0.984175     0.844538     1.000000   
jitter(ppq5)    0.012992  0.087839   0.968216     0.790464     0.947178   
jitter(ddp)     0.010049  0.076555   0.984178     0.844542     1.000000   
shimmer(%)      0.101572  0.058753   0.709686     0.648779     0.681623   
shimmer(abs)    0.111076  0.056398   0.716584     0.655645     0.685423   
shimmer(apq3)   0.098951  0.044954   0.664005     0.623542     0.650101   
shimmer(apq5)   0.089861  0.064684   0.693854     0.621184     0.659671   
shimmer(apq11)  0.135076  0.023099   0.645878     0.589923     0.602968   
shimmer(dda)    0.098952 

In [50]:
scaler = PowerTransformer()

# Apply the transformer to make all explanatory variables more Gaussian-looking
std_x_1 = scaler.fit_transform(std_x_1_df.values)

# Restore column names of explanatory variables
std_x_1_df = pd.DataFrame(std_x_1, index=std_x_1_df.index, columns=std_x_1_df.columns)
std_x_2 = scaler.fit_transform(std_x_2_df.values)

# Restore column names of explanatory variables
std_x_2_df = pd.DataFrame(std_x_2, index=std_x_2_df.index, columns=std_x_2_df.columns)


ratio_1 = cross_validation(x_1, y_1)
ratio_2 = cross_validation(x_2, y_2)

test(std_x_1_df, std_x_2_df, y_1, y_2)

0.2
0.2
Intercept: 28.991072816162866
Coefficient: [  2.63429565  -1.71550395  -3.07294124 -39.50952053  -0.65247671
  41.57690137   1.98400171  -0.30935303   5.09340692  -3.33563011
   3.10042352  -1.9824568   -3.35546157   0.3038866   -2.43564083
   0.97791922   2.28014784  -7.76385533]
           age       sex  jitter(abs)  jitter(rap)  jitter(ppq5)  jitter(ddp)  \
663   0.083121  1.462987    -0.421285     1.295855      0.942059     1.295299   
4855 -0.033262 -0.683533    -1.388413    -1.496341     -2.151139    -1.499599   
5503  1.063925  1.462987     0.114276     1.116134      1.127343     1.116315   
482  -0.033262 -0.683533    -1.090604    -1.537013     -1.177512    -1.545083   
3840 -0.913715  1.462987    -0.812723    -0.755111     -0.640705    -0.758578   
...        ...       ...          ...          ...           ...          ...   
4931 -0.808622  1.462987    -1.166925    -0.888769     -1.130463    -0.885271   
3264  0.685822 -0.683533     0.611180     0.638772      1.0304

In [51]:
# applying the forward stepwise selection, and the r square increase is neglecting 
def forward_stepwise_selection(X, y, ratio):
    X_train_1, X_test_1, y_train_1, y_test_1 = train_test_split(X, y, test_size=ratio, random_state=0)

    selected_features = []
    best_error = np.inf  # Initialize with a large value

    while len(selected_features) < len(X.columns):
        remaining_features = [feature for feature in X.columns if feature not in selected_features]
        errors = []
        
        for feature in remaining_features:
            current_features = selected_features + [feature]
            X_train_current = X_train_1[current_features]
            X_test_current = X_test_1[current_features]
            
            # Fit a model using statsmodels
            model = sm.OLS(y_train_1, sm.add_constant(X_train_current)).fit()
            
            # Make predictions and calculate error
            y_pred = model.predict(sm.add_constant(X_test_current))
            error = mean_squared_error(y_test_1, y_pred)
            errors.append((feature, error))
        
        # Find the feature that results in the lowest error
        best_feature, best_error_for_feature = min(errors, key=lambda x: x[1])
        
        # If the new feature improves the model, add it to the selected features
        if best_error_for_feature < best_error:
            selected_features.append(best_feature)
            best_error = best_error_for_feature
        else:
            break  # If no improvement, break the loop

    return selected_features

ratio_1 = cross_validation(std_x_1_df, y_1)
ratio_2 = cross_validation(std_x_2_df, y_2)

selected_features_1 = forward_stepwise_selection(std_x_1_df, y_1, ratio_1)
selected_features_2 = forward_stepwise_selection(std_x_2_df, y_2, ratio_2)


keep_1 = selected_features_1
keep_2 = selected_features_2
print(keep_1)
print(keep_2)

# keep = ['age', 'test_time', 'jitter(%)', 'jitter(abs)', 'jitter(rap)', 'jitter(ddp)', 'shimmer(%)', 'shimmer(apq3)', 'shimmer(apq5)', 'shimmer(apq11)', 'shimmer(dda)', 'nhr', 'hnr', 'dfa']
# ['age', 'sex', 'test_time', 'jitter(%)', 'jitter(abs)', 'jitter(rap)', 'jitter(ddp)', 'shimmer(%)', 'shimmer(apq3)', 'shimmer(apq5)', 'shimmer(apq11)', 'shimmer(dda)', 'nhr', 'hnr', 'dfa']
std_x_1_df = std_x_1_df[[x for x in keep_1]]
std_x_2_df = std_x_2_df[[x for x in keep_2]]

ratio_1 = cross_validation(std_x_1_df, y_1)
ratio_2 = cross_validation(std_x_2_df, y_2)


test(std_x_1_df, std_x_2_df, y_1, y_2, ratio_1, ratio_2)
# def best_subset_selection(X, y, model):
#     best_subset = []
#     best_score = float('inf')
    
#     for k in range(1, len(X.columns) + 1):
#         for subset in combinations(X.columns, k):
#             X_subset = X[list(subset)]
            
#             X_train, X_val, y_train, y_val = train_test_split(X_subset, y, test_size=0.4    , random_state=42)
#             model.fit(X_train, y_train)
#             y_pred = model.predict(X_val)
#             mse = mean_squared_error(y_val, y_pred)
            
#             if mse < best_score:
#                 best_score = mse
#                 best_subset = list(subset)
    
#     return best_subset

# best_model = None
# best_subset = None
# best_error = np.inf
# X_train_1, X_test_1, y_train_1, y_test_1 = train_test_split(std_x_1_df, y_1, test_size=ratio_1, random_state=0)
# X_train_2, X_test_2, y_train_2, y_test_2 = train_test_split(std_x_2_df, y_2, test_size=ratio_2, random_state=0)

# model = LinearRegression()
# best_features_1 = best_subset_selection(X_train_1, y_train_1, model)
# best_features_2 = best_subset_selection(X_train_2, y_train_2, model)



0.2
0.2
['age', 'hnr', 'dfa', 'shimmer(dda)_log', 'shimmer(apq11)', 'shimmer(apq5)', 'shimmer(apq3)', 'jitter(abs)', 'jitter(ppq5)', 'sex', 'jitter(%)_log', 'rpde', 'shimmer(abs)', 'jitter(ddp)', 'shimmer(%)']
['age', 'hnr', 'dfa', 'sex', 'shimmer(apq5)', 'shimmer(apq11)', 'jitter(abs)', 'jitter(ddp)', 'rpde', 'nhr', 'jitter(%)_log', 'jitter(ppq5)', 'shimmer(abs)', 'shimmer(%)', 'jitter(rap)']
0.2
0.2
Intercept: 28.926688529702446
Coefficient: [  2.5985765   -3.15202119  -2.19786495  -1.86338312  -5.33296107
   4.3242985   -3.3040398   26.01416034   0.48954972  -1.39509309
   2.04338315   0.990704    -1.16920388   0.95951373 -24.89328058]
           age       hnr       dfa       sex  shimmer(apq5)  shimmer(apq11)  \
4061  1.587080  0.506541 -0.763347 -0.683533       0.476378        0.749319   
866   0.320057 -0.058358  0.321104  1.462987      -0.464492       -0.131722   
5601  0.083121 -0.948003 -0.652442  1.462987      -0.008593       -0.206421   
38   -0.033262  1.497830 -0.908913 -0

In [52]:
# keep_1 = best_features_1
# keep_1 = ['jitter(abs)', 'jitter(rap)', 'jitter(ppq5)', 'jitter(ddp)', 'shimmer(%)', 'shimmer(apq3)', 'shimmer(apq5)', 'shimmer(apq11)', 'nhr', 'hnr', 'rpde', 'dfa', 'ppe', 'shimmer(dda)_log']

# keep_2 = ['jitter(rap)', 'jitter(ppq5)', 'jitter(ddp)', 'shimmer(%)', 'shimmer(apq5)', 'shimmer(apq11)', 'nhr', 'hnr', 'rpde', 'dfa', 'ppe', 'shimmer(dda)_log']

keep_1 = selected_features_1
keep_2 = selected_features_2

# keep_2 = best_features_2

print(keep_1)
print(keep_2)
# keep = ['age', 'test_time', 'jitter(%)', 'jitter(abs)', 'jitter(rap)', 'jitter(ddp)', 'shimmer(%)', 'shimmer(apq3)', 'shimmer(apq5)', 'shimmer(apq11)', 'shimmer(dda)', 'nhr', 'hnr', 'dfa']
# ['age', 'sex', 'test_time', 'jitter(%)', 'jitter(abs)', 'jitter(rap)', 'jitter(ddp)', 'shimmer(%)', 'shimmer(apq3)', 'shimmer(apq5)', 'shimmer(apq11)', 'shimmer(dda)', 'nhr', 'hnr', 'dfa']
std_x_1_df = std_x_1_df[[x for x in keep_1]]
std_x_2_df = std_x_2_df[[x for x in keep_2]]

ratio_1 = cross_validation(std_x_1_df, y_1)
ratio_2 = cross_validation(std_x_2_df, y_2)


test(std_x_1_df, std_x_2_df, y_1, y_2, ratio_1, ratio_2)

['age', 'hnr', 'dfa', 'shimmer(dda)_log', 'shimmer(apq11)', 'shimmer(apq5)', 'shimmer(apq3)', 'jitter(abs)', 'jitter(ppq5)', 'sex', 'jitter(%)_log', 'rpde', 'shimmer(abs)', 'jitter(ddp)', 'shimmer(%)']
['age', 'hnr', 'dfa', 'sex', 'shimmer(apq5)', 'shimmer(apq11)', 'jitter(abs)', 'jitter(ddp)', 'rpde', 'nhr', 'jitter(%)_log', 'jitter(ppq5)', 'shimmer(abs)', 'shimmer(%)', 'jitter(rap)']
0.2
0.2
Intercept: 28.926688529702446
Coefficient: [  2.5985765   -3.15202119  -2.19786495  -1.86338312  -5.33296107
   4.3242985   -3.3040398   26.01416034   0.48954972  -1.39509309
   2.04338315   0.990704    -1.16920388   0.95951373 -24.89328058]
           age       hnr       dfa       sex  shimmer(apq5)  shimmer(apq11)  \
4061  1.587080  0.506541 -0.763347 -0.683533       0.476378        0.749319   
866   0.320057 -0.058358  0.321104  1.462987      -0.464492       -0.131722   
5601  0.083121 -0.948003 -0.652442  1.462987      -0.008593       -0.206421   
38   -0.033262  1.497830 -0.908913 -0.683533 

In [53]:
# print the p-value to delete variables with p-values higher than 0.05
std_x_1_df = sm.add_constant(std_x_1_df)
model_1 = sm.OLS(y_1,std_x_1_df).fit()
pred = model_1.predict(std_x_1_df)
model_details_1 = model_1.summary()
print(model_details_1)

std_x_2_df = sm.add_constant(std_x_2_df)
model_2 = sm.OLS(y_2,std_x_2_df).fit()
pred = model_2.predict(std_x_2_df)
model_details_2 = model_2.summary()
print(model_details_2)



                            OLS Regression Results                            
Dep. Variable:            motor_updrs   R-squared:                       0.184
Model:                            OLS   Adj. R-squared:                  0.182
Method:                 Least Squares   F-statistic:                     87.97
Date:                Thu, 05 Oct 2023   Prob (F-statistic):          1.35e-244
Time:                        10:58:16   Log-Likelihood:                -20011.
No. Observations:                5863   AIC:                         4.005e+04
Df Residuals:                    5847   BIC:                         4.016e+04
Df Model:                          15                                         
Covariance Type:            nonrobust                                         
                       coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------
const               21.2897      0.096  


                            OLS Regression Results                            
Dep. Variable:            total_updrs   R-squared:                       0.179
Model:                            OLS   Adj. R-squared:                  0.177
Method:                 Least Squares   F-statistic:                     85.12
Date:                Thu, 05 Oct 2023   Prob (F-statistic):          4.29e-237
Time:                        10:58:16   Log-Likelihood:                -21642.
No. Observations:                5863   AIC:                         4.332e+04
Df Residuals:                    5847   BIC:                         4.342e+04
Df Model:                          15                                         
Covariance Type:            nonrobust                                         
                     coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------------------
const             29.0141      0.127    228

In [54]:
# After removing high-p value, the r squared increased
std_x_1_df = std_x_1_df.drop(['jitter(ddp)'], axis=1)
std_x_2_df = std_x_2_df.drop(['jitter(rap)', 'jitter(ddp)', 'shimmer(%)', 'shimmer(abs)'], axis=1)

ratio_1 = cross_validation(std_x_1_df, y_1)
ratio_2 = cross_validation(std_x_2_df, y_2)


test(std_x_1_df, std_x_2_df, y_1, y_2, ratio_1, ratio_2)

0.2
0.2
Intercept: 28.9232040737445
Coefficient: [ 0.          2.591849   -3.12821491 -2.16650977 -1.77250998 -5.07910459
  3.90542639 -3.28319279  0.47142771 -1.46937297  2.54431862  1.59374706]
      const       age       hnr       dfa       sex  shimmer(apq5)  \
4061    1.0  1.587080  0.506541 -0.763347 -0.683533       0.476378   
866     1.0  0.320057 -0.058358  0.321104  1.462987      -0.464492   
5601    1.0  0.083121 -0.948003 -0.652442  1.462987      -0.008593   
38      1.0 -0.033262  1.497830 -0.908913 -0.683533      -1.266697   
158     1.0 -1.119598  0.804485 -1.487412 -0.683533      -0.379608   
...     ...       ...       ...       ...       ...            ...   
4931    1.0 -0.808622  0.913594  0.119052  1.462987      -1.227502   
3264    1.0  0.685822 -0.887912  1.266274 -0.683533       0.029487   
1653    1.0 -0.484796  0.681701 -1.239601 -0.683533      -0.833121   
2607    1.0  1.587080  0.494799 -0.146688 -0.683533       0.906079   
2732    1.0 -0.913715  0.180126  1

In [55]:
# feature engineering
# std_x_1_df['doshn'] = std_x_1_df['hnr']**2 - std_x_1_df['nhr']
std_x_2_df['doshn'] = std_x_2_df['hnr']**2 - std_x_2_df['nhr']
# std_x_1_df['dshn'] = std_x_1_df['hnr']**2 / std_x_1_df['nhr']
std_x_2_df['dshn'] = std_x_2_df['hnr']**2 / std_x_2_df['nhr']



# std_x_1_df['rsj'] = std_x_1_df['shimmer(%)'] / std_x_1_df['jitter(%)']
# std_x_2_df['doshn'] = std_x_2_df['shimmer(%)']**2 - std_x_2_df['jitter(%)']
# print(std_x_1_df.head())
ratio_1 = cross_validation(std_x_1_df, y_1)
ratio_2 = cross_validation(std_x_2_df, y_2)
test(std_x_1_df, std_x_2_df, y_1, y_2, ratio_1, ratio_2)


0.2
0.2
Intercept: 29.295257139365162
Coefficient: [ 0.00000000e+00  2.47444824e+00 -2.78327507e+00 -2.27799800e+00
 -1.54408802e+00 -5.05608184e+00  4.09759623e+00 -2.85226814e+00
  4.60382718e-01 -1.55161262e+00  1.78793679e+00  1.94009113e+00
 -3.63746848e-01 -7.95714237e-05]
      const       age       hnr       dfa       sex  shimmer(apq5)  \
4061    1.0  1.587080  0.506541 -0.763347 -0.683533       0.476378   
866     1.0  0.320057 -0.058358  0.321104  1.462987      -0.464492   
5601    1.0  0.083121 -0.948003 -0.652442  1.462987      -0.008593   
38      1.0 -0.033262  1.497830 -0.908913 -0.683533      -1.266697   
158     1.0 -1.119598  0.804485 -1.487412 -0.683533      -0.379608   
...     ...       ...       ...       ...       ...            ...   
4931    1.0 -0.808622  0.913594  0.119052  1.462987      -1.227502   
3264    1.0  0.685822 -0.887912  1.266274 -0.683533       0.029487   
1653    1.0 -0.484796  0.681701 -1.239601 -0.683533      -0.833121   
2607    1.0  1.58708