In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from scipy import stats

# Load the dataset
file_path = (r"C:\Users\nidhi\Documents\Spring_Semester\Dissertation\Kaggle-chronic kidney disease prediction\Chronic_Kidney_Dsease_data_Cleaned.csv")
df = pd.read_csv(file_path)

# version 8 of LIT 

In [None]:
from collections.abc import Sequence
import sys
from typing import Optional

from absl import app
from absl import flags
from absl import logging
from lit_nlp import dev_server
from lit_nlp import server_flags
from lit_nlp.api import layout
from lit_nlp.components import minimal_targeted_counterfactuals

import pandas as pd
import joblib
import numpy as np
from sklearn.base import ClassifierMixin
from lit_nlp.api import model as lit_model
from lit_nlp.api import types as lit_types
from lit_nlp.api import dataset as lit_dataset

# Paths to your model and dataset
MODEL_PATH = r"C:\Users\nidhi\Documents\Spring_Semester\Dissertation\best_rf_model.pkl"
DATA_PATH = r"C:\Users\nidhi\Documents\Spring_Semester\Dissertation\Kaggle-chronic kidney disease prediction\Chronic_Kidney_Dsease_data_Cleaned.csv"

# List of 30 features used in your model
FEATURE_COLUMNS = [
    'SerumCreatinine', 'Itching', 'FastingBloodSugar', 'MuscleCramps',
    'BUNLevels', 'ProteinInUrine', 'SystolicBP', 'HbA1c', 'BMI', 'FamilyHistoryKidneyDisease',
    'Edema', 'CholesterolHDL', 'HemoglobinLevels', 'Gender', 'QualityOfLifeScore', 'CholesterolLDL', 'Statins', 'CholesterolTriglycerides',
    'SerumElectrolytesPhosphorus', 'AntidiabeticMedications',
    'OccupationalExposureChemicals', 'WaterQuality', 'NauseaVomiting',
    'PhysicalActivity', 'SocioeconomicStatus', 'EducationLevel',
    'CholesterolTotal', 'DietQuality', 'PatientID', 'GFR'
]

# Function to load your model
def load_model(model_path):
    return joblib.load(model_path)

# Function to load your dataset
def load_dataset(data_path):
    df = pd.read_csv(data_path)
    df = df[FEATURE_COLUMNS + ['Diagnosis']]  # Select only the 30 features plus the label column
    print("Dataset loaded with shape:", df.shape)
    return df

# Wrapper for scikit-learn model to make it compatible with LIT
class SklearnModelWrapper(lit_model.Model):
    def __init__(self, model: ClassifierMixin):
        self.model = model
        self.class_names = list(model.classes_)

    def predict_minibatch(self, inputs):
        """Predict on a batch of inputs."""
        input_data = np.array([ex['input'] for ex in inputs])
        probabilities = self.model.predict_proba(input_data)
        predicted_classes = self.model.predict(input_data)
        return [{
            'predictions': pred,
            'probabilities': probs.tolist()
        } for pred, probs in zip(predicted_classes, probabilities)]

    def input_spec(self):
        return {
            'input': lit_types.Embeddings()
        }

    def output_spec(self):
        return {
            'predictions': lit_types.CategoryLabel(vocab=self.class_names),
            'probabilities': lit_types.MulticlassPreds(vocab=self.class_names),
        }

    def predict(self, inputs):
        return self.predict_minibatch(inputs)

# Wrapper for the dataset to make it compatible with LIT
class CustomDataset(lit_dataset.Dataset):
    def __init__(self, dataframe):
        self._examples = [{
            'input': row[:-1].values.tolist(),  
            'label': row[-1]
        } for _, row in dataframe.iterrows()]

    def spec(self):
        return {
            'input': lit_types.Embeddings(),
            'label': lit_types.CategoryLabel(),
        }

    def __len__(self):
        return len(self._examples)

    def __getitem__(self, idx):
        return self._examples[idx]

# Custom frontend layout; see api/layout.py
modules = layout.LitModuleName
CUSTOM_LAYOUT = layout.LitCanonicalLayout(
    upper={
        'Main': [
            modules.DiveModule,
            modules.DataTableModule,
            modules.DatapointEditorModule,
        ]
    },
    lower=layout.STANDARD_LAYOUT.lower,
    description='Custom layout for your demo.',
)
CUSTOM_LAYOUTS = layout.DEFAULT_LAYOUTS | {'custom_layout': CUSTOM_LAYOUT}

# Try to define the port flag, catch the DuplicateFlagError if it is already defined
try:
    flags.DEFINE_integer('port', 6006, 'Port to run the LIT server on.')
except flags.DuplicateFlagError:
    pass

def run_lit(argv):
    # Load custom model and dataset
    model = load_model(MODEL_PATH)
    dataframe = load_dataset(DATA_PATH)

    # Wrap the model with SklearnModelWrapper
    lit_model = SklearnModelWrapper(model)

    # Wrap the dataframe with CustomDataset
    dataset = CustomDataset(dataframe)

    models = {'custom_classifier': lit_model}
    datasets = {'custom_dataset': dataset}

    # Truncate datasets if --max_examples is set.
    max_examples = 1660  # or any other number you want to set as a limit
    if max_examples is not None:
        for name in datasets:
            logging.info("Dataset: '%s' with %d examples", name, len(datasets[name]))
            datasets[name]._examples = datasets[name]._examples[:max_examples]
            logging.info('  truncated to %d examples', len(datasets[name]))

    generators = {
        'Minimal Targeted Counterfactuals': minimal_targeted_counterfactuals.TabularMTC()
    }

    lit_demo = dev_server.Server(
        models,
        datasets,
        generators=generators,
        layouts=CUSTOM_LAYOUTS,
        **server_flags.get_flags())
    return lit_demo.serve()

if __name__ == '__main__':
    if 'ipykernel' in sys.modules or 'ipython' in sys.modules:
        # If running in Jupyter or IPython, avoid FLAGS parsing issues
        try:
            app.run(run_lit, argv=['flags_dummy', '--port=6006'])
        except flags.Error as e:
            print(e)
            sys.exit(1)
    else:
        # Standard script execution
        try:
            app.run(run_lit, argv=['run_lit', '--port=6006'])
        except flags.Error as e:
            print(e)
            sys.exit(1)


# Final LIT code of Random Forest Model

In [None]:
from collections.abc import Sequence
import sys
from typing import Optional

from absl import app
from absl import flags
from absl import logging
from lit_nlp import dev_server
from lit_nlp import server_flags
from lit_nlp.api import layout
from lit_nlp.components import minimal_targeted_counterfactuals

import pandas as pd
import joblib
import numpy as np
from sklearn.base import ClassifierMixin
from lit_nlp.api import model as lit_model
from lit_nlp.api import types as lit_types
from lit_nlp.api import dataset as lit_dataset

# Paths to your model and dataset
MODEL_PATH = r"C:\Users\nidhi\Documents\Spring_Semester\Dissertation\python files\best_rf_model.pkl"
DATA_PATH = r"C:\Users\nidhi\Documents\Spring_Semester\Dissertation\Kaggle-chronic kidney disease prediction\Chronic_Kidney_Dsease_data_Cleaned.csv"

# List of 30 features used in your model
FEATURE_COLUMNS = [
    'SerumCreatinine', 'Itching', 'FastingBloodSugar', 'MuscleCramps',
    'BUNLevels', 'ProteinInUrine', 'SystolicBP', 'HbA1c', 'BMI', 'FamilyHistoryKidneyDisease',
    'Edema', 'CholesterolHDL', 'HemoglobinLevels', 'Gender', 'QualityOfLifeScore', 'CholesterolLDL', 'Statins', 'CholesterolTriglycerides',
    'SerumElectrolytesPhosphorus', 'AntidiabeticMedications',
    'OccupationalExposureChemicals', 'WaterQuality', 'NauseaVomiting',
    'PhysicalActivity', 'SocioeconomicStatus', 'EducationLevel',
    'CholesterolTotal', 'DietQuality', 'PatientID', 'GFR'
]

# Function to load your model
def load_model(model_path):
    return joblib.load(model_path)

# Function to load your dataset
def load_dataset(data_path):
    df = pd.read_csv(data_path)
    df = df[FEATURE_COLUMNS + ['Diagnosis']]  # Select only the 30 features plus the label column
    print("Dataset loaded with shape:", df.shape)
    return df

# Wrapper for scikit-learn model to make it compatible with LIT
class SklearnModelWrapper(lit_model.Model):
    def __init__(self, model: ClassifierMixin):
        self.model = model
        self.class_names = list(model.classes_)

    def predict_minibatch(self, inputs):
        """Predict on a batch of inputs."""
        input_data = np.array([[ex[col] for col in FEATURE_COLUMNS] for ex in inputs])
        probabilities = self.model.predict_proba(input_data)
        predicted_classes = self.model.predict(input_data)
        return [{
            'predictions': pred,
            'probabilities': probs.tolist()
        } for pred, probs in zip(predicted_classes, probabilities)]

    def input_spec(self):
        return {col: lit_types.Scalar() for col in FEATURE_COLUMNS}

    def output_spec(self):
        return {
            'predictions': lit_types.CategoryLabel(vocab=self.class_names),
            'probabilities': lit_types.MulticlassPreds(vocab=self.class_names),
        }

    def predict(self, inputs):
        return self.predict_minibatch(inputs)

# Wrapper for the dataset to make it compatible with LIT
class CustomDataset(lit_dataset.Dataset):
    def __init__(self, dataframe):
        self._examples = [{
            **{col: row[i] for i, col in enumerate(FEATURE_COLUMNS)},
            'label': row[-1]
        } for _, row in dataframe.iterrows()]

    def spec(self):
        return {col: lit_types.Scalar() for col in FEATURE_COLUMNS} | {
            'label': lit_types.CategoryLabel(),
        }

    def __len__(self):
        return len(self._examples)

    def __getitem__(self, idx):
        return self._examples[idx]

# Custom frontend layout; see api/layout.py
modules = layout.LitModuleName
CUSTOM_LAYOUT = layout.LitCanonicalLayout(
    upper={
        'Main': [
            modules.DiveModule,
            modules.DataTableModule,
            modules.DatapointEditorModule,
        ]
    },
    lower=layout.STANDARD_LAYOUT.lower,
    description='Custom layout for your demo.',
)
CUSTOM_LAYOUTS = layout.DEFAULT_LAYOUTS | {'custom_layout': CUSTOM_LAYOUT}

# Try to define the port flag, catch the DuplicateFlagError if it is already defined
try:
    flags.DEFINE_integer('port', 6007, 'Port to run the LIT server on.')
except flags.DuplicateFlagError:
    pass

def run_lit(argv):
    # Load custom model and dataset
    model = load_model(MODEL_PATH)
    dataframe = load_dataset(DATA_PATH)

    # Wrap the model with SklearnModelWrapper
    lit_model = SklearnModelWrapper(model)

    # Wrap the dataframe with CustomDataset
    dataset = CustomDataset(dataframe)

    models = {'custom_classifier': lit_model}
    datasets = {'custom_dataset': dataset}

    # Truncate datasets if --max_examples is set.
    max_examples = 1660  # or any other number you want to set as a limit
    if max_examples is not None:
        for name in datasets:
            logging.info("Dataset: '%s' with %d examples", name, len(datasets[name]))
            datasets[name]._examples = datasets[name]._examples[:max_examples]
            logging.info('  truncated to %d examples', len(datasets[name]))

    generators = {
        'Minimal Targeted Counterfactuals': minimal_targeted_counterfactuals.TabularMTC()
    }

    lit_demo = dev_server.Server(
        models,
        datasets,
        generators=generators,
        layouts=CUSTOM_LAYOUTS,
        **server_flags.get_flags())
    return lit_demo.serve()

if __name__ == '__main__':
    if 'ipykernel' in sys.modules or 'ipython' in sys.modules:
        # If running in Jupyter or IPython, avoid FLAGS parsing issues
        try:
            app.run(run_lit, argv=['flags_dummy', '--port=6007'])
        except flags.Error as e:
            print(e)
            sys.exit(1)
    else:
        # Standard script execution
        try:
            app.run(run_lit, argv=['run_lit', '--port=6007'])
        except flags.Error as e:
            print(e)
            sys.exit(1)


Dataset loaded with shape: (1659, 31)


I0914 01:45:30.930526 33860 752927453.py:133] Dataset: 'custom_dataset' with 1659 examples
I0914 01:45:30.932511 33860 752927453.py:135]   truncated to 1659 examples
I0914 01:45:30.937283 33860 dev_server.py:90] [1m[31m
 (    (           
 )\ ) )\ )  *   ) 
(()/((()/(` )  /( 
 /(_))/(_))( )(_))
(_)) (_)) (_(_()) 
| |  |_ _||_   _| 
| |__ | |   | |   
|____|___|  |_|   

[0m
I0914 01:45:30.939307 33860 dev_server.py:91] [1m[32mStarting LIT server...[0m
W0914 01:45:30.943280 33860 model.py:126] Unable to infer init spec for model 'SklearnModelWrapper'. Unsupported type '<class 'sklearn.base.ClassifierMixin'>' for parameter 'model' of '__init__'. If possible (e.g., this parameter is Optional), please implement a spec literal instead of using inferencing.
W0914 01:45:30.947419 33860 dataset.py:154] Unable to infer init spec for dataset 'CustomDataset'. Unable to infer a type for parameter 'dataframe' of '__init__'. Please add a type hint or default value, or implement a Spec literal.