In [34]:
import pickle
import pandas as pd
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import MinMaxScaler
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import FunctionTransformer
from sklearn.ensemble import StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
# from scikeras.wrappers import KerasClassifier
from keras import models, layers

In [35]:
df = pd.read_csv("D:\LifeDevice_SIH\ML_BACKEND\model\\test_data.csv")

In [36]:
df.head()

Unnamed: 0.1,Unnamed: 0,Age,Gender,heart Attack,Heart Valve,Heart Defect at birth,Cardiomyopathy,Severe cystic fibrosis,copd(lung_Disease),Repeated urinary infections,Diabities,kidney stones,Urinary Tract Infection,Transplant
0,0,5,Female,0,0,0,1,0,0,0,1,1,1,Kidney
1,1,42,Male,1,1,1,0,0,0,1,0,1,1,heart-kidney


In [37]:
class CustomFeatureEngineering(BaseEstimator, TransformerMixin):
    def fit(self, X, y=None):
        self.transplant_categories_ = ['Kidney', 'heart', 'heart-kidney', 'heart-lungs', 'lung', 'lung-kidney', 'nothing']
        return self
    
    def transform(self, X):
        X_encoded = X.copy()
        
        # Ensure correct mapping
        X_encoded['Gender'] = X_encoded['Gender'].map({'Female': -1, 'Male': 1})
        
        # Feature engineering
        X_encoded['Heart_Condition_Severity_Index'] = (
            X_encoded['heart Attack'] * 100 +
            X_encoded['Heart Valve'] * 10 +
            X_encoded['Heart Defect at birth'] * 50 +
            X_encoded['Cardiomyopathy'] * 75
        )
        
        X_encoded['Lung_Condition_Severity_Index'] = (
            X_encoded['copd(lung_Disease)'] * 60 +
            X_encoded['Severe cystic fibrosis'] * 80
        )
        
        X_encoded['Kidney_Condition_Severity_Index'] = (
            X_encoded['kidney stones'] * 20 +
            X_encoded['Repeated urinary infections'] * 30 +
            X_encoded['Urinary Tract Infection'] * 40
        )
        
        X_encoded['Chronic_Condition_Severity_Index'] = (
            X_encoded['Heart_Condition_Severity_Index'] +
            X_encoded['Lung_Condition_Severity_Index'] +
            X_encoded['Kidney_Condition_Severity_Index'] +
            X_encoded['Diabities'] * 50
        )
        
        X_encoded['Age_Heart_Interaction'] = X_encoded['Age'] * X_encoded['Heart_Condition_Severity_Index']
        X_encoded['Age_Lung_Interaction'] = X_encoded['Age'] * X_encoded['Lung_Condition_Severity_Index']
        X_encoded['Age_Kidney_Interaction'] = X_encoded['Age'] * X_encoded['Kidney_Condition_Severity_Index']
        X_encoded['Age_Chronic_Interaction'] = X_encoded['Age'] * X_encoded['Chronic_Condition_Severity_Index']
        
        X_encoded['Gender_Heart_Interaction'] = X_encoded['Gender'] * X_encoded['Heart_Condition_Severity_Index']
        X_encoded['Gender_Kidney_Interaction'] = X_encoded['Gender'] * X_encoded['Kidney_Condition_Severity_Index']
        X_encoded['Gender_Lung_Interaction'] = X_encoded['Gender'] * X_encoded['Lung_Condition_Severity_Index']
        
        symptom_columns = ['heart Attack', 'Heart Valve', 'Heart Defect at birth', 'Cardiomyopathy', 
                           'Severe cystic fibrosis', 'copd(lung_Disease)', 'Repeated urinary infections', 
                           'Diabities', 'kidney stones', 'Urinary Tract Infection']
        X_encoded['Symptom_Count'] = X_encoded[symptom_columns].sum(axis=1)
        
        # One-hot encoding of 'Transplant' column while preserving NaN values
        X_encoded["Transplant"] = X_encoded["Transplant"].fillna("nothing")
        one_hot_encoded = pd.get_dummies(X_encoded['Transplant'])
        one_hot_encoded = one_hot_encoded.astype(int)
        
        # Ensure all predefined columns are present, filling missing ones with 0
        for category in self.transplant_categories_:
            if category not in one_hot_encoded.columns:
                one_hot_encoded[category] = 0
        
        # Merging the one-hot encoded columns back into the dataframe
        X_encoded = pd.concat([X_encoded, one_hot_encoded], axis=1)
        X_encoded = X_encoded.drop(columns='Transplant')
        
        return X_encoded

# Define the columns for scaling
scale_columns = ['Age', 'Heart_Condition_Severity_Index', 'Lung_Condition_Severity_Index',
                 'Kidney_Condition_Severity_Index', 'Chronic_Condition_Severity_Index',
                 'Age_Heart_Interaction', 'Age_Lung_Interaction', 'Age_Kidney_Interaction',
                 'Age_Chronic_Interaction', 'Gender_Heart_Interaction', 'Gender_Kidney_Interaction',
                 'Gender_Lung_Interaction', 'Symptom_Count']

non_scaled_columns = ['Gender', 'Kidney', 'heart', 'heart-kidney', 'heart-lungs', 'lung', 'lung-kidney', 'nothing']

# Drop columns only if they exist
def drop_columns_if_exist(X):
    # Convert column names to strings to avoid the TypeError
    df = pd.DataFrame(X, columns=[str(col) for col in (scale_columns + non_scaled_columns)])
    
    columns_to_drop = ['heart Attack', 'Heart Valve', 'Heart Defect at birth', 'Cardiomyopathy', 
                       'Severe cystic fibrosis', 'copd(lung_Disease)', 'Repeated urinary infections', 
                       'Diabities', 'kidney stones', 'Urinary Tract Infection']
    
    # Drop columns only if they exist in the dataframe
    return df.drop(columns=[col for col in columns_to_drop if col in df.columns])

In [38]:
with open('D:\LifeDevice_SIH\ML_BACKEND\model\pipeline.pkl', 'rb') as f:
    pipeline = pickle.load(f)

In [39]:
predicted_probs = pipeline.predict_proba(df)
predicted_classes = pipeline.predict(df)

print("Predicted probabilities:\n", predicted_probs)
print("Predicted classes:\n", predicted_classes)

Predicted probabilities:
 [[0.07 0.93]
 [0.   1.  ]]
Predicted classes:
 [1 1]
