In [8]:
import pandas as pd
import numpy as np
from scipy.stats import pearsonr
import statsmodels.api as sm
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression

# --- 1. Load dataset ---
file_path = r"C:\Users\suman\Desktop\Jinee\Research Paper\SOC Maturity\Synthesised data.xlsx"
df = pd.read_excel(file_path)

# --- 2. Log transform time-based metrics ---
df["log_MTTD"] = np.log(df["MTTD (hrs)"])
df["log_MTTR"] = np.log(df["MTTR (hrs)"])
df["log_MTTC"] = np.log(df["MTTC (hrs)"])

# --- 3. Correlation Analysis ---
variables = ["AAS", "log_MTTD", "log_MTTR", "log_MTTC", "FPR (%)"]
corr_matrix = pd.DataFrame(index=variables, columns=variables, dtype=float)
pvals = pd.DataFrame(index=variables, columns=variables, dtype=float)

for i in variables:
    for j in variables:
        r, p = pearsonr(df[i], df[j])
        corr_matrix.loc[i, j] = r
        pvals.loc[i, j] = p

corr_matrix = corr_matrix.round(3)
pvals = pvals.round(3)

# --- 4. Regression Analysis (Unstandardized Coefficients) ---
X = df[["CTI", "Detection", "Emulation", "Automation"]]
y = df["log_MTTR"]
X_const = sm.add_constant(X)

ols_model = sm.OLS(y, X_const).fit()
regression_summary = ols_model.summary()

# --- 5. Regression Analysis (Standardized Betas) ---
scaler = StandardScaler()
X_std = scaler.fit_transform(X)
y_std = StandardScaler().fit_transform(y.values.reshape(-1, 1)).ravel()

std_model = LinearRegression().fit(X_std, y_std)
betas = pd.Series(std_model.coef_, index=["CTI", "Detection", "Emulation", "Automation"])

# --- 6. Display Outputs ---
print("=== Correlation Matrix (Pearson r) ===")
print(corr_matrix)
print("\n=== P-values Matrix ===")
print(pvals)

print("\n=== Regression Model (Unstandardized Coefficients) ===")
print(regression_summary)

print("\n=== Standardized Betas (β) ===")
print(betas.round(3))


=== Correlation Matrix (Pearson r) ===
            AAS  log_MTTD  log_MTTR  log_MTTC  FPR (%)
AAS       1.000    -0.926    -0.954    -0.962   -0.820
log_MTTD -0.926     1.000     0.951     0.943    0.822
log_MTTR -0.954     0.951     1.000     0.979    0.794
log_MTTC -0.962     0.943     0.979     1.000    0.812
FPR (%)  -0.820     0.822     0.794     0.812    1.000

=== P-values Matrix ===
          AAS  log_MTTD  log_MTTR  log_MTTC  FPR (%)
AAS       0.0       0.0       0.0       0.0      0.0
log_MTTD  0.0       0.0       0.0       0.0      0.0
log_MTTR  0.0       0.0       0.0       0.0      0.0
log_MTTC  0.0       0.0       0.0       0.0      0.0
FPR (%)   0.0       0.0       0.0       0.0      0.0

=== Regression Model (Unstandardized Coefficients) ===
                            OLS Regression Results                            
Dep. Variable:               log_MTTR   R-squared:                       0.911
Model:                            OLS   Adj. R-squared:                  0