<a href="https://colab.research.google.com/github/SaiVardhan19/Osteoporosis/blob/main/Osterioporosis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Importing the Required Libraries

In [35]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split,KFold
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization


In [36]:
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.regularizers import l2

In [37]:
df=pd.read_csv("/content/Osteo data.csv")

# Data Preprocessing

In [38]:
# Handle missing values
# Filling missing values with the median of each column
df.fillna(df.median(), inplace=True)

In [39]:
# Encoding categorical features (like Smoking, Drinking)
label_encoders = {}
for column in ['Smoking', 'Drinking']:
    if column in df.columns:
        le = LabelEncoder()
        df[column] = le.fit_transform(df[column])
        label_encoders[column] = le


In [40]:
# Separate features (X) and target (y)
X = df.drop(columns=['OP'])  # Drop the target column (Osteoporosis indicator)
y = df['OP']  # Target variable


In [41]:
# Split the dataset into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [42]:
# Feature Scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [43]:
X_train.shape # Shape of the Training data(Rows,columns)

(1229, 39)

# K-Fold and Model Compilation



In [44]:
# 3-fold cross-validation where the data is shuffled before splitting into folds
kf = KFold(n_splits=3, shuffle=True, random_state=42)

for train_index, val_index in kf.split(X_train, y_train):
    X_train_fold, X_val_fold = X_train[train_index], X_train[val_index]
    y_train_fold, y_val_fold = y_train.iloc[train_index], y_train.iloc[val_index]


In [45]:
# Build the Neural Network Model:
# Input layer contains 128 neurons and we use L2 regularization to prevent overfitting
# Batch normalization is used to stabilize training as it normalises the input component for each layer such that mean=0 and SD=1
# Dropout prevents overfitting as it randomly drops neurons during a each iteration to avoid overdependence on that neuron
# We use Sigmoid function for the Output Layer as the output is to be classified between 1(Positive) and 0(Negative)

model = Sequential([
    Dense(128, input_dim=X_train.shape[1], activation='relu', kernel_regularizer=l2(0.001)),  # Input Layer
    BatchNormalization(),
    Dropout(0.5),
    Dense(64, activation='relu'), # Hidden Layer 1
    BatchNormalization(),
    Dropout(0.4),
    Dense(32, activation='relu'), # Hidden Layer 2
    Dropout(0.3),
    Dense(1, activation='sigmoid') #Output Layer
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [46]:
# This is used to stop processing/training the data when the the validation accuracy meets the converging point i.e, when the accuracy starts to drop
# Patience=5 indicates the number of epocs the model must wait to stop the training
early_stopping = EarlyStopping(monitor='val_accuracy', patience=5)

In [47]:
# Compile the model using Adam(Adaptive Moment Estimation) optimiser
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [48]:
# Train the model:
history = model.fit(X_train_fold, y_train_fold, epochs=100, batch_size=16, validation_split=0.2, verbose=1)

Epoch 1/100
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 10ms/step - accuracy: 0.5865 - loss: 0.9623 - val_accuracy: 0.7195 - val_loss: 0.6718
Epoch 2/100
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.6687 - loss: 0.6749 - val_accuracy: 0.7744 - val_loss: 0.5952
Epoch 3/100
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.6927 - loss: 0.6047 - val_accuracy: 0.7744 - val_loss: 0.5486
Epoch 4/100
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7499 - loss: 0.5890 - val_accuracy: 0.7866 - val_loss: 0.5188
Epoch 5/100
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7575 - loss: 0.5473 - val_accuracy: 0.7927 - val_loss: 0.5014
Epoch 6/100
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.7527 - loss: 0.5480 - val_accuracy: 0.8171 - val_loss: 0.4882
Epoch 7/100
[1m41/41[0m [32m━━

In [49]:
# Predict on test set
# The predict statement produces a boolean value that is converted into Numerical data using the astype()
y_pred_fold = (model.predict(X_val_fold) > 0.5).astype('int32')

[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step


# Evaluation

In [50]:
# Accuracy
accuracy = accuracy_score(y_val_fold, y_pred_fold)
print(f"Accuracy: {accuracy * 100:.2f}%")

Accuracy: 81.17%


In [51]:
# Confusion matrix
print("\nConfusion Matrix:\n", confusion_matrix(y_val_fold, y_pred_fold))


Confusion Matrix:
 [[228  34]
 [ 43 104]]


In [52]:
# Classification report
print("\nClassification Report:\n", classification_report(y_val_fold, y_pred_fold))


Classification Report:
               precision    recall  f1-score   support

           0       0.84      0.87      0.86       262
           1       0.75      0.71      0.73       147

    accuracy                           0.81       409
   macro avg       0.80      0.79      0.79       409
weighted avg       0.81      0.81      0.81       409



#New Patient data

In [53]:
# Predict whether the new patient has Osterioporosis

def predict_new(patient_data):
    """
    Predict if a new patient has osteoporosis using the trained neural network model.
    Args:
        patient_data: Data with the same number of features as the model input.
    Returns:
        0 = No Osteoporosis or 1 = Osteoporosis
    """
    patient_scaled = scaler.transform([patient_data])  # Scale the data
    prediction = (model.predict(patient_scaled) > 0.5).astype('int32')  # Predict
    print(prediction)
    return prediction[0][0]

In [54]:
#Getting the input from the user using Try and Except method
def get_patient():
    while True:  # Input validation loop
        try:
            gender = int(input("Gender (1 for Female, 2 for Male): "))
            if gender not in (1, 2):
                raise ValueError
            age = int(input("Age: "))
            height = float(input("Height (in cm): "))
            weight = float(input("Weight (in kg): "))
            bmi = weight/((height/100)**2)
            l1_4 = float(input("L1-4: "))
            l1_4t = float(input("L1.4T: "))
            fn = float(input("FN: "))
            fnt = float(input("FNT: "))
            tl = float(input("TL: "))
            tlt = float(input("TLT: "))
            alt = float(input("ALT: "))
            ast = float(input("AST: "))
            bun = float(input("BUN: "))
            crea = float(input("CREA: "))
            uric = float(input("URIC: "))
            fbg = float(input("FBG: "))
            hdl_c = float(input("HDL-C: "))
            ldl_c = float(input("LDL-C: "))
            ca = float(input("Ca: "))
            p = float(input("P: "))
            mg = float(input("Mg: "))
            calsium = float(input("Calsium (duplicate column, please check your data): "))
            calcitriol = float(input("Calcitriol: "))
            bisphosphonate = int(input("Bisphosphonate use (0 for No, 1 for Yes): "))
            if bisphosphonate not in (0, 1):
                raise ValueError
            calcitonin = int(input("Calcitonin use (0 for No, 1 for Yes): "))
            if calcitonin not in (0, 1):
                raise ValueError
            htn = int(input("Hypertension (0 for No, 1 for Yes): "))
            if htn not in (0, 1):
                raise ValueError
            copd = int(input("COPD (0 for No, 1 for Yes): "))
            if copd not in (0, 1):
                raise ValueError
            dm = int(input("DM (0 for No, 1 for Yes): "))
            if dm not in (0, 1):
                raise ValueError
            hyperlipidaemia = int(input("Hyperlipidaemia (0 for No, 1 for Yes): "))
            if hyperlipidaemia not in (0, 1):
                raise ValueError
            hyperuricemia = int(input("Hyperuricemia (0 for No, 1 for Yes): "))
            if hyperuricemia not in (0, 1):
                raise ValueError
            as_val = int(input("AS (0 for No, 1 for Yes): "))
            if as_val not in (0, 1):
                raise ValueError
            vt = int(input("VT (0 for No, 1 for Yes): "))
            if vt not in (0, 1):
                raise ValueError
            vd = int(input("VD (0 for No, 1 for Yes): "))
            if vd not in (0, 1):
                raise ValueError
            cad = int(input("CAD (0 for No, 1 for Yes): "))
            if cad not in (0, 1):
                raise ValueError
            ckd = int(input("CKD (0 for No, 1 for Yes): "))
            if ckd not in (0, 1):
                raise ValueError
            fracture = int(input("Fracture (0 for No, 1 for Yes): "))
            if fracture not in (0, 1):
                raise ValueError
            smoking = int(input("Smoking (0 for No, 1 for Yes): "))
            if smoking not in (0, 1):
                raise ValueError
            drinking = int(input("Drinking (0 for No, 1 for Yes): "))
            if drinking not in (0, 1):
                raise ValueError

            break # Exit loop if input is valid

        except ValueError:
            print("Invalid input. Please enter a valid number.")
        except Exception as e: # Catch other errors
            print(f"An error occurred: {e}")

    patient_data = [gender, age, height, weight, bmi, l1_4, l1_4t, fn, fnt, tl, tlt, alt, ast, bun, crea, uric, fbg, hdl_c, ldl_c, ca, p, mg, calsium, calcitriol, bisphosphonate, calcitonin, htn, copd, dm, hyperlipidaemia, hyperuricemia, as_val, vt, vd, cad, ckd, fracture, smoking, drinking]
    return patient_data

In [55]:
# Sample patient data
patient = [
    1,      # Gender (1 = Male, 2 = Female)
    55,     # Age (in years)
    160,    # Height (in cm)
    65,     # Weight (in kg)
    25.4,   # BMI (Body Mass Index)
    0.85,   # L1-4 (Bone density measure)
    0.77,   # L1.4T (Bone density measure)
    0.9,    # FN (Bone density measure)
    0.85,   # FNT (Bone density measure)
    0.7,    # TL (Bone density measure)
    0.72,   # TLT (Bone density measure)
    0.85,   # ALT (Alanine Transaminase level)
    1.2,    # AST (Aspartate Transaminase level)
    15.0,   # BUN (Blood Urea Nitrogen level)
    0.85,   # CREA (Creatinine level)
    5.6,    # URIC (Uric acid level)
    95,     # FBG (Fasting Blood Glucose level)
    45,     # HDL-C (High-Density Lipoprotein Cholesterol)
    120,    # LDL-C (Low-Density Lipoprotein Cholesterol)
    9.5,    # Ca (Calcium level)
    3.9,    # P (Phosphorus level)
    2.1,    # Mg (Magnesium level)
    9.0,    # Calcium (level)
    32.0,   # Calcitriol (Vitamin D3 level)
    1,      # Bisphosphonate (1 = Yes, 0 = No)
    0,      # Calcitonin (1 = Yes, 0 = No)
    1,      # HTN (Hypertension, 1 = Yes, 0 = No)
    0,      # COPD (Chronic Obstructive Pulmonary Disease, 1 = Yes, 0 = No)
    0,      # DM (Diabetes Mellitus, 1 = Yes, 0 = No)
    1,      # Hyperlipidemia (1 = Yes, 0 = No)
    0,      # Hyperuricemia (1 = Yes, 0 = No)
    0,      # AS (Aortic Stenosis, 1 = Yes, 0 = No)
    0,      # VT (Ventricular Tachycardia, 1 = Yes, 0 = No)
    0,      # VD (Valvular Disease, 1 = Yes, 0 = No)
    1,      # CAD (Coronary Artery Disease, 1 = Yes, 0 = No)
    0,      # CKD (Chronic Kidney Disease, 1 = Yes, 0 = No)
    1,      # Fracture (1 = Yes, 0 = No)
    0,      # Smoking (1 = Yes, 0 = No)
    1      # Drinking (1 = Yes, 0 = No)
    #1        OP (Osteoporosis, 1 = Yes, 0 = No) — This is usually the target variable, not used as an input
]


In [56]:
# Diagnoising the patient based on his reports and finding the probability

#patient=get_patient()
print("Prediction for new patient for Osteoporosis:")
print(f"You have {accuracy * 100:.2f}% chance that the result is :"+ "Positive" if predict_new(patient)==1 else "Negative")


Prediction for new patient for Osteoporosis:
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[[1]]
You have 81.17% chance that the result is :Positive


