In [18]:
from imblearn.over_sampling import SMOTE
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras import layers, models
from scikeras.wrappers import KerasClassifier
from sklearn.ensemble import StackingClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.metrics import log_loss
from sklearn.preprocessing import MinMaxScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
import lightgbm as lgb
import xgboost as xgb
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression

In [19]:
df = pd.read_csv('C:\\Users\\ASUS\\Organ.csv')
df['Gender'] = df['Gender'].map({'FEMALE': 0, 'MALE':1})
df = df.drop(columns=['ID', 'Year', 'Bp'])
df_original = df.copy()
df_original

Unnamed: 0,Age,Gender,heart Attack,Heart Valve,Heart Defect at birth,Cardiomyopathy,Severe cystic fibrosis,copd(lung_Disease),Repeated urinary infections,Diabities,kidney stones,Urinary Tract Infection,Transplant,Needed_or_not
0,5,0,0,0,0,1,0,0,0,1,1,1,Kideny,yes
1,42,1,1,1,1,0,0,0,1,0,1,1,heart-kidney,yes
2,70,0,1,0,0,1,1,0,0,1,1,0,heart,yes
3,4,0,0,1,0,1,0,0,1,0,1,1,heart-kidney,yes
4,17,0,0,1,1,0,1,0,1,0,1,0,heart,yes
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
116,36,1,1,1,0,1,1,1,0,0,1,0,heart-lungs,yes
117,46,0,0,1,1,1,0,1,1,0,0,0,heart,yes
118,79,0,0,1,0,0,0,0,1,1,1,1,Kideny,yes
119,66,1,1,0,0,1,1,1,1,0,0,1,lung,yes


In [20]:
df["Transplant"] = df["Transplant"].fillna("nothing")

In [21]:
# One-hot encoding with NaN values preserved
one_hot_encoded = pd.get_dummies(df['Transplant'])
one_hot_encoded = one_hot_encoded.astype(int)

# Merging the one-hot encoded columns back into the original dataframe (optional)
df_encoded = pd.concat([df, one_hot_encoded], axis=1)

df_encoded = df_encoded.drop(columns='Transplant')
df_encoded['Needed_or_not'] = df_encoded['Needed_or_not'].map({'yes': 1, 'no':0})
df_encoded = df_encoded.rename(columns={"Kideny":"Kidney"})
df_encoded

Unnamed: 0,Age,Gender,heart Attack,Heart Valve,Heart Defect at birth,Cardiomyopathy,Severe cystic fibrosis,copd(lung_Disease),Repeated urinary infections,Diabities,kidney stones,Urinary Tract Infection,Needed_or_not,Kidney,heart,heart-kidney,heart-lungs,lung,lung-kidney,nothing
0,5,0,0,0,0,1,0,0,0,1,1,1,1,1,0,0,0,0,0,0
1,42,1,1,1,1,0,0,0,1,0,1,1,1,0,0,1,0,0,0,0
2,70,0,1,0,0,1,1,0,0,1,1,0,1,0,1,0,0,0,0,0
3,4,0,0,1,0,1,0,0,1,0,1,1,1,0,0,1,0,0,0,0
4,17,0,0,1,1,0,1,0,1,0,1,0,1,0,1,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
116,36,1,1,1,0,1,1,1,0,0,1,0,1,0,0,0,1,0,0,0
117,46,0,0,1,1,1,0,1,1,0,0,0,1,0,1,0,0,0,0,0
118,79,0,0,1,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,0
119,66,1,1,0,0,1,1,1,1,0,0,1,1,0,0,0,0,1,0,0


In [22]:
df_encoded['Gender'] = df_encoded['Gender'].map({0:-1, 1:1})

In [23]:
df_encoded.columns

Index(['Age', 'Gender', 'heart Attack', 'Heart Valve', 'Heart Defect at birth',
       'Cardiomyopathy', 'Severe cystic fibrosis', 'copd(lung_Disease)',
       'Repeated urinary infections', 'Diabities', 'kidney stones',
       'Urinary Tract Infection', 'Needed_or_not', 'Kidney', 'heart',
       'heart-kidney', 'heart-lungs', 'lung', 'lung-kidney', 'nothing'],
      dtype='object')

In [24]:
# Assigning weights based on severity
Heart_severity_weights = {
    'heart Attack': 100,
    'Heart Valve': 10,
    'Heart Defect at birth': 50,
    'Cardiomyopathy': 75
}

# Creating a severity-weighted heart condition index
df_encoded['Heart_Condition_Severity_Index'] = (
    df_encoded['heart Attack'] * Heart_severity_weights['heart Attack'] +
    df_encoded['Heart Valve'] * Heart_severity_weights['Heart Valve'] +
    df_encoded['Heart Defect at birth'] * Heart_severity_weights['Heart Defect at birth'] +
    df_encoded['Cardiomyopathy'] * Heart_severity_weights['Cardiomyopathy']
)

In [25]:
# Severity-weighted lung condition index
lung_severity_weights = {
    'copd(lung_Disease)': 60,
    'Severe cystic fibrosis': 80
}

df_encoded['Lung_Condition_Severity_Index'] = (
    df_encoded['copd(lung_Disease)'] * lung_severity_weights['copd(lung_Disease)'] +
    df_encoded['Severe cystic fibrosis'] * lung_severity_weights['Severe cystic fibrosis']
)

In [26]:
# Severity-weighted kidney condition index
kidney_severity_weights = {
    'kidney stones': 20,
    'Repeated urinary infections': 30,
    'Urinary Tract Infection': 40
}

df_encoded['Kidney_Condition_Severity_Index'] = (
    df_encoded['kidney stones'] * kidney_severity_weights['kidney stones'] +
    df_encoded['Repeated urinary infections'] * kidney_severity_weights['Repeated urinary infections'] +
    df_encoded['Urinary Tract Infection'] * kidney_severity_weights['Urinary Tract Infection']
)

In [27]:
df_encoded['Chronic_Condition_Severity_Index'] = (
    df_encoded['Heart_Condition_Severity_Index'] +
    df_encoded['Lung_Condition_Severity_Index'] +
    df_encoded['Kidney_Condition_Severity_Index'] +
    df_encoded['Diabities'] * 50  # Assigning weight to Diabetes
)

In [28]:
# Age and Condition Interaction
df_encoded['Age_Heart_Interaction'] = df_encoded['Age'] * df_encoded['Heart_Condition_Severity_Index']
df_encoded['Age_Lung_Interaction'] = df_encoded['Age'] * df_encoded['Lung_Condition_Severity_Index']
df_encoded['Age_Kidney_Interaction'] = df_encoded['Age'] * df_encoded['Kidney_Condition_Severity_Index']
df_encoded['Age_Chronic_Interaction'] = df_encoded['Age'] * df_encoded['Chronic_Condition_Severity_Index']

In [29]:
# Blood Pressure and Condition Interaction
# df_encoded['BP_Heart_Interaction'] = df_encoded['Bp'] * df_encoded['Heart_Condition_Severity_Index']
# df_encoded['BP_Kidney_Interaction'] = df_encoded['Bp'] * df_encoded['Kidney_Condition_Severity_Index']
# df_encoded['BP_Chronic_Interaction'] = df_encoded['Bp'] * df_encoded['Chronic_Condition_Severity_Index']

In [30]:
# Gender-Based Features
df_encoded['Gender_Heart_Interaction'] = df_encoded['Gender'] * df_encoded['Heart_Condition_Severity_Index']
df_encoded['Gender_Kidney_Interaction'] = df_encoded['Gender'] * df_encoded['Kidney_Condition_Severity_Index']
df_encoded['Gender_Lung_Interaction'] = df_encoded['Gender'] * df_encoded['Lung_Condition_Severity_Index']

In [32]:
# BP to Age Ratio
# df_encoded['BP_Age_Ratio'] = df_encoded['Bp'] / (df_encoded['Age'] + 1)  # Adding 1 to avoid division by zero

In [33]:
# Symptom Count
symptom_columns = ['heart Attack', 'Heart Valve', 'Heart Defect at birth', 'Cardiomyopathy', 
                   'Severe cystic fibrosis', 'copd(lung_Disease)', 'Repeated urinary infections', 
                   'Diabities', 'kidney stones', 'Urinary Tract Infection']
df_encoded['Symptom_Count'] = df_encoded[symptom_columns].sum(axis=1)

In [48]:
df_encoded

Unnamed: 0,Age,Gender,heart Attack,Heart Valve,Heart Defect at birth,Cardiomyopathy,Severe cystic fibrosis,copd(lung_Disease),Repeated urinary infections,Diabities,...,Kidney_Condition_Severity_Index,Chronic_Condition_Severity_Index,Age_Heart_Interaction,Age_Lung_Interaction,Age_Kidney_Interaction,Age_Chronic_Interaction,Gender_Heart_Interaction,Gender_Kidney_Interaction,Gender_Lung_Interaction,Symptom_Count
0,5,-1,0,0,0,1,0,0,0,1,...,60,185,375,0,300,925,-75,-60,0,4
1,42,1,1,1,1,0,0,0,1,0,...,90,250,6720,0,3780,10500,160,90,0,6
2,70,-1,1,0,0,1,1,0,0,1,...,20,325,12250,5600,1400,22750,-175,-20,-80,5
3,4,-1,0,1,0,1,0,0,1,0,...,90,175,340,0,360,700,-85,-90,0,5
4,17,-1,0,1,1,0,1,0,1,0,...,50,190,1020,1360,850,3230,-60,-50,-80,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
116,36,1,1,1,0,1,1,1,0,0,...,20,345,6660,5040,720,12420,185,20,140,6
117,46,-1,0,1,1,1,0,1,1,0,...,30,225,6210,2760,1380,10350,-135,-30,-60,5
118,79,-1,0,1,0,0,0,0,1,1,...,90,150,790,0,7110,11850,-10,-90,0,5
119,66,1,1,0,0,1,1,1,1,0,...,70,385,11550,9240,4620,25410,175,70,140,6


In [49]:
df_encoded.columns

Index(['Age', 'Gender', 'heart Attack', 'Heart Valve', 'Heart Defect at birth',
       'Cardiomyopathy', 'Severe cystic fibrosis', 'copd(lung_Disease)',
       'Repeated urinary infections', 'Diabities', 'kidney stones',
       'Urinary Tract Infection', 'Needed_or_not', 'Kidney', 'heart',
       'heart-kidney', 'heart-lungs', 'lung', 'lung-kidney', 'nothing',
       'Heart_Condition_Severity_Index', 'Lung_Condition_Severity_Index',
       'Kidney_Condition_Severity_Index', 'Chronic_Condition_Severity_Index',
       'Age_Heart_Interaction', 'Age_Lung_Interaction',
       'Age_Kidney_Interaction', 'Age_Chronic_Interaction',
       'Gender_Heart_Interaction', 'Gender_Kidney_Interaction',
       'Gender_Lung_Interaction', 'Symptom_Count'],
      dtype='object')

In [50]:
df_scaled = df_encoded[['Age','Heart_Condition_Severity_Index',
          'Lung_Condition_Severity_Index',  'Kidney_Condition_Severity_Index',
       'Chronic_Condition_Severity_Index',            'Age_Heart_Interaction',
                   'Age_Lung_Interaction',           'Age_Kidney_Interaction',
                'Age_Chronic_Interaction',           'Gender_Heart_Interaction',        
                'Gender_Kidney_Interaction',
                'Gender_Lung_Interaction',   'Symptom_Count']].copy()

df_nonscaled = df_encoded.drop(columns=['Age', 'Heart_Condition_Severity_Index',
          'Lung_Condition_Severity_Index',  'Kidney_Condition_Severity_Index',
       'Chronic_Condition_Severity_Index',            'Age_Heart_Interaction',
                   'Age_Lung_Interaction',           'Age_Kidney_Interaction',
                'Age_Chronic_Interaction',             
               'Gender_Heart_Interaction',        'Gender_Kidney_Interaction',
                'Gender_Lung_Interaction',     'Symptom_Count'])

In [53]:
scaler = MinMaxScaler(feature_range=(0, 1))
df_scaled = scaler.fit_transform(df_scaled)
df_scaled = pd.DataFrame(df_scaled, columns = ['Age','Heart_Condition_Severity_Index',
          'Lung_Condition_Severity_Index',  'Kidney_Condition_Severity_Index',
       'Chronic_Condition_Severity_Index',            'Age_Heart_Interaction',
                   'Age_Lung_Interaction',           'Age_Kidney_Interaction',
                'Age_Chronic_Interaction',             
               'Gender_Heart_Interaction',        'Gender_Kidney_Interaction',
                'Gender_Lung_Interaction',          'Symptom_Count'])
df_scaled

Unnamed: 0,Age,Heart_Condition_Severity_Index,Lung_Condition_Severity_Index,Kidney_Condition_Severity_Index,Chronic_Condition_Severity_Index,Age_Heart_Interaction,Age_Lung_Interaction,Age_Kidney_Interaction,Age_Chronic_Interaction,Gender_Heart_Interaction,Gender_Kidney_Interaction,Gender_Lung_Interaction,Symptom_Count
0,0.038462,0.319149,0.000000,0.666667,0.380282,0.021930,0.000000,0.042194,0.016487,0.340426,0.166667,0.500000,0.500000
1,0.512821,0.680851,0.000000,1.000000,0.563380,0.392982,0.000000,0.531646,0.348837,0.840426,1.000000,0.500000,0.833333
2,0.871795,0.744681,0.571429,0.222222,0.774648,0.716374,0.512821,0.196906,0.774037,0.127660,0.388889,0.214286,0.666667
3,0.025641,0.361702,0.000000,1.000000,0.352113,0.019883,0.000000,0.050633,0.008678,0.319149,0.000000,0.500000,0.666667
4,0.192308,0.255319,0.571429,0.555556,0.394366,0.059649,0.124542,0.119550,0.096494,0.372340,0.222222,0.214286,0.666667
...,...,...,...,...,...,...,...,...,...,...,...,...,...
116,0.435897,0.787234,1.000000,0.222222,0.830986,0.389474,0.461538,0.101266,0.415481,0.893617,0.611111,1.000000,0.833333
117,0.564103,0.574468,0.428571,0.333333,0.492958,0.363158,0.252747,0.194093,0.343631,0.212766,0.333333,0.285714,0.666667
118,0.987179,0.042553,0.000000,1.000000,0.281690,0.046199,0.000000,1.000000,0.395696,0.478723,0.000000,0.500000,0.666667
119,0.820513,0.744681,1.000000,0.777778,0.943662,0.675439,0.846154,0.649789,0.866366,0.872340,0.888889,1.000000,0.833333


In [56]:
noise = np.random.normal(0, 0.1, df_scaled.shape)  # Adding small noise
df_noisy = df_scaled + noise

In [58]:
df_final = pd.concat([df_noisy, df_nonscaled], axis=1)
df_final

Unnamed: 0,Age,Heart_Condition_Severity_Index,Lung_Condition_Severity_Index,Kidney_Condition_Severity_Index,Chronic_Condition_Severity_Index,Age_Heart_Interaction,Age_Lung_Interaction,Age_Kidney_Interaction,Age_Chronic_Interaction,Gender_Heart_Interaction,...,kidney stones,Urinary Tract Infection,Needed_or_not,Kidney,heart,heart-kidney,heart-lungs,lung,lung-kidney,nothing
0,0.006293,0.205047,-0.024315,0.543711,0.341403,-0.058130,-0.066235,0.002407,0.017403,0.286215,...,1,1,1,1,0,0,0,0,0,0
1,0.569728,0.686752,-0.019668,0.986283,0.543193,0.404489,0.043270,0.542231,0.272515,0.986512,...,1,1,1,0,0,1,0,0,0,0
2,0.944825,0.788112,0.841402,0.214065,0.716383,0.451301,0.566562,0.324834,0.746493,-0.045665,...,1,0,1,0,1,0,0,0,0,0
3,-0.047304,0.417006,-0.052820,1.070177,0.252136,0.135256,0.047027,0.132811,0.249915,0.296860,...,1,1,1,0,0,1,0,0,0,0
4,0.168973,0.197831,0.487980,0.589567,0.576712,-0.013581,0.115214,0.135614,0.105115,0.479508,...,1,0,1,0,1,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
116,0.637550,0.842200,0.999281,0.185170,0.732569,0.472442,0.530865,0.028301,0.324151,0.740150,...,1,0,1,0,0,0,1,0,0,0
117,0.598350,0.708633,0.182730,0.219723,0.525098,0.483494,0.122401,0.094824,0.299017,0.124900,...,0,0,1,0,1,0,0,0,0,0
118,0.969240,-0.101085,0.216849,1.052776,0.278363,0.085790,-0.122694,1.017957,0.426964,0.465541,...,1,1,1,1,0,0,0,0,0,0
119,0.755095,0.992625,0.993873,0.825148,0.989717,0.750605,0.951262,0.565449,0.975042,0.794694,...,0,1,1,0,0,0,0,1,0,0


In [60]:
df_final = df_final.drop(columns=['heart Attack',
                            'Heart Valve',            'Heart Defect at birth',
                         'Cardiomyopathy',           'Severe cystic fibrosis',
                     'copd(lung_Disease)',      'Repeated urinary infections',
                              'Diabities',                    'kidney stones',
                'Urinary Tract Infection'])

In [62]:
df_final.to_csv("dataset.csv")

In [64]:
df_final.columns

Index(['Age', 'Heart_Condition_Severity_Index',
       'Lung_Condition_Severity_Index', 'Kidney_Condition_Severity_Index',
       'Chronic_Condition_Severity_Index', 'Age_Heart_Interaction',
       'Age_Lung_Interaction', 'Age_Kidney_Interaction',
       'Age_Chronic_Interaction', 'Gender_Heart_Interaction',
       'Gender_Kidney_Interaction', 'Gender_Lung_Interaction', 'Symptom_Count',
       'Gender', 'Needed_or_not', 'Kidney', 'heart', 'heart-kidney',
       'heart-lungs', 'lung', 'lung-kidney', 'nothing'],
      dtype='object')

In [66]:
X = df_final.drop(columns=['Needed_or_not'])
y = df_final['Needed_or_not']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [68]:
X_train.columns


Index(['Age', 'Heart_Condition_Severity_Index',
       'Lung_Condition_Severity_Index', 'Kidney_Condition_Severity_Index',
       'Chronic_Condition_Severity_Index', 'Age_Heart_Interaction',
       'Age_Lung_Interaction', 'Age_Kidney_Interaction',
       'Age_Chronic_Interaction', 'Gender_Heart_Interaction',
       'Gender_Kidney_Interaction', 'Gender_Lung_Interaction', 'Symptom_Count',
       'Gender', 'Kidney', 'heart', 'heart-kidney', 'heart-lungs', 'lung',
       'lung-kidney', 'nothing'],
      dtype='object')

In [70]:
model = models.Sequential()
model.add(layers.Dense(128, input_shape=(X_train.shape[1],), activation='relu'))
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(32, activation='relu'))
model.add(layers.Dense(16, activation='relu'))
# Final layer with 'sigmoid' activation for binary classification
model.add(layers.Dense(1, activation='sigmoid'))

# Compile the model using binary crossentropy for binary classification
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2)

Epoch 1/10


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 133ms/step - accuracy: 0.6702 - loss: 0.6735 - val_accuracy: 0.7000 - val_loss: 0.6542
Epoch 2/10
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - accuracy: 0.7815 - loss: 0.6077 - val_accuracy: 0.6500 - val_loss: 0.6242
Epoch 3/10
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - accuracy: 0.7017 - loss: 0.5625 - val_accuracy: 0.6000 - val_loss: 0.5955
Epoch 4/10
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - accuracy: 0.7095 - loss: 0.5224 - val_accuracy: 0.6000 - val_loss: 0.5659
Epoch 5/10
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - accuracy: 0.7564 - loss: 0.4540 - val_accuracy: 0.6000 - val_loss: 0.5367
Epoch 6/10
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - accuracy: 0.7407 - loss: 0.4080 - val_accuracy: 0.6500 - val_loss: 0.4925
Epoch 7/10
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37

<keras.src.callbacks.history.History at 0x1febdff24d0>

In [71]:
y_pred_proba = model.predict(X_test)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 94ms/step


In [72]:
y_pred = (y_pred_proba > 0.5)

In [73]:
accuracy = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
logloss = log_loss(y_test, y_pred_proba)
print(accuracy)
print(f1)
print(logloss)

1.0
1.0
0.19516009265770934


In [74]:
#random forestt 

In [75]:
X_train.columns = X_train.columns.astype(str)
X_test.columns = X_test.columns.astype(str)

In [76]:
# Initialize the classifier
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the model
rf_model.fit(X_train, y_train)

# Make predictions
y_pred_rf = rf_model.predict(X_test)

y_proba_rf = rf_model.predict_proba(X_test)[:, 1] 

accuracy_rf = accuracy_score(y_test, y_pred_rf)
f1_rf = f1_score(y_test, y_pred_rf)
logloss_rf = log_loss(y_test, y_proba_rf)
print(accuracy_rf)
print(f1_rf)
print(logloss_rf)

1.0
1.0
0.1119281574473353


In [77]:
import pickle
with open('model1.pkl', 'wb') as file:
    pickle.dump(rf_model, file)

print("Model saved successfully!")

Model saved successfully!


In [78]:
# Initialize the classifier
xgb_model = xgb.XGBClassifier(n_estimators=100, random_state=42)

# Train the model
xgb_model.fit(X_train, y_train)

# Make class predictions
y_pred_xgb = xgb_model.predict(X_test)

# Make probability predictions
y_proba_xgb = xgb_model.predict_proba(X_test)[:, 1] 

In [89]:
accuracy_xgb = accuracy_score(y_test, y_pred_xgb)
f1_xgb = f1_score(y_test, y_pred_xgb)
logloss_xgb = log_loss(y_test, y_proba_xgb)
print(accuracy_xgb)
print(f1_xgb)
print(logloss_xgb)

1.0
1.0
0.020190676245687142
