In [1]:
import numpy as np
import pandas as pd
from patsy import dmatrices
from itertools import combinations
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score
from sklearn.preprocessing import label_binarize
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from statsmodels.stats.outliers_influence import variance_inflation_factor

# Load the dataset
df=pd.read_csv('Final_Cleaned.csv')


df = df.drop(columns=['ID'])


df = df.rename(columns={'T Score': 'T_Score', 'Vit D': 'Vit_D'})

df = df.dropna()


predictor_columns = ['Gender', 'Age', 'BMD', 'T_Score', 'DM', 'HTN', 'Vit_D', 'Calcium', 'Menopause']


main_effects = ' + '.join(predictor_columns)
interaction_terms = ' + '.join([f'{a}:{b}' for a, b in combinations(predictor_columns, 2)])
formula = f'Diagnosis ~ {main_effects} + {interaction_terms}'


y, X = dmatrices(formula, df, return_type='dataframe')


vif_data = pd.DataFrame()
vif_data["feature"] = X.columns
vif_data["VIF"] = [variance_inflation_factor(X.values, i) for i in range(X.shape[1])]
print(vif_data)


y_bin = label_binarize(df['Diagnosis'], classes=['Normal', 'Penia', 'Porosis'])


model = make_pipeline(StandardScaler(), LogisticRegression(multi_class='ovr', solver='lbfgs', C=1.0, max_iter=1000))
model.fit(X, df['Diagnosis'])


probs = model.predict_proba(X)


probs_df = pd.DataFrame(probs, columns=model.classes_)


auc_scores = []
for i, class_name in enumerate(model.classes_):
    auc = roc_auc_score(y_bin[:, i], probs_df[class_name])
    auc_scores.append(auc)

average_auc = np.mean(auc_scores)

print(f'AUC scores for each class: {auc_scores}')
print(f'Average AUC score: {average_auc}')

  return 1 - self.ssr/self.centered_tss
  vif = 1. / (1. - r_squared_i)
  return 1 - self.ssr/self.centered_tss


              feature          VIF
0           Intercept     0.000000
1         Gender[T.M]          inf
2           DM[T.Pre]          inf
3   DM[T.Unavailable]          inf
4           DM[T.Yes]          inf
..                ...          ...
94          BMD:Vit_D   296.536550
95        BMD:Calcium  2926.321699
96      T_Score:Vit_D    68.066479
97    T_Score:Calcium  2985.393815
98      Vit_D:Calcium   555.770246

[99 rows x 2 columns]
AUC scores for each class: [0.9416780045351474, 0.8091863729618831, 0.8589909648069756]
Average AUC score: 0.8699517807680021


In [2]:
#Odds ratio
log_reg = model.named_steps['logisticregression']
coefs = log_reg.coef_

odds_ratios = np.exp(coefs)
conf_intervals = []
for i in range(coefs.shape[0]):
    conf = []
    for j in range(coefs.shape[1]):
        coef = coefs[i, j]
        std_error = np.std(coefs[:, j])
        ci_lower = coef - 1.96 * std_error
        ci_upper = coef + 1.96 * std_error
        conf.append((np.exp(ci_lower), np.exp(ci_upper)))
    conf_intervals.append(conf)


for i, class_name in enumerate(model.classes_):
    print(f'Odds Ratios for class {class_name}:')
    for j, col in enumerate(X.design_info.column_names):
        print(f'{col}: OR={odds_ratios[i, j]:.4f}')


Odds Ratios for class Normal:
Intercept: OR=1.0000
Gender[T.M]: OR=1.0043
DM[T.Pre]: OR=0.9977
DM[T.Unavailable]: OR=0.9199
DM[T.Yes]: OR=0.9667
HTN[T.Unavailable]: OR=1.1245
HTN[T.Yes]: OR=0.8932
Menopause[T.Not Applicable]: OR=1.0043
Menopause[T.Unknown]: OR=1.2357
Menopause[T.Yes]: OR=0.6743
Gender[T.M]:DM[T.Pre]: OR=1.0000
Gender[T.M]:DM[T.Unavailable]: OR=0.9487
Gender[T.M]:DM[T.Yes]: OR=0.8336
Gender[T.M]:HTN[T.Unavailable]: OR=0.9487
Gender[T.M]:HTN[T.Yes]: OR=1.3851
Gender[T.M]:Menopause[T.Not Applicable]: OR=1.0043
Gender[T.M]:Menopause[T.Unknown]: OR=1.0000
Gender[T.M]:Menopause[T.Yes]: OR=1.0000
DM[T.Pre]:HTN[T.Unavailable]: OR=1.6216
DM[T.Unavailable]:HTN[T.Unavailable]: OR=0.8245
DM[T.Yes]:HTN[T.Unavailable]: OR=1.1389
DM[T.Pre]:HTN[T.Yes]: OR=0.8916
DM[T.Unavailable]:HTN[T.Yes]: OR=1.0902
DM[T.Yes]:HTN[T.Yes]: OR=0.7494
DM[T.Pre]:Menopause[T.Not Applicable]: OR=1.0000
DM[T.Unavailable]:Menopause[T.Not Applicable]: OR=0.9487
DM[T.Yes]:Menopause[T.Not Applicable]: OR=0.8336