# Data Collection & Preprocessing Module

Structured Data (Diabetes, Heart, Kidney)

In [18]:
import os
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, LabelEncoder
import joblib

# Ensure the models directory exists
if not os.path.exists("models"):
    os.makedirs("models")

# Define dataset paths
datasets = {
    "diabetes": "datasets/diabetes.csv",
    "heart": "datasets/heart.csv",
    "kidney": "datasets/kidney.csv"
}

processed_data = {}

for disease, path in datasets.items():
    print(f"Processing {disease} dataset...")

    # Load dataset
    if not os.path.exists(path):
        print(f"Error: {path} not found!")
        continue  # Skip if file doesn't exist

    df = pd.read_csv(path)

    # Convert categorical text columns to numeric values
    for col in df.columns:
        if df[col].dtype == 'object':  # If column contains text
            df[col] = df[col].astype(str)  # Ensure all values are strings
            df[col] = LabelEncoder().fit_transform(df[col])  # Convert to numeric

    # Convert all non-numeric data to NaN and handle missing values
    df = df.apply(pd.to_numeric, errors='coerce')
    df.fillna(df.mean(), inplace=True)  # Fill NaN with column mean

    # Verify if any column still contains non-numeric values
    for col in df.columns:
        if df[col].dtype == 'object':
            print(f"Error: Column {col} in {disease} dataset is still non-numeric!")
            print(df[col].unique())  # Print unique values for debugging

    # Separate features and target
    X = df.iloc[:, :-1]  # All columns except last one
    y = df.iloc[:, -1]   # Last column (target)

    # Normalize features
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    # Save processed data
    processed_data[disease] = (X_scaled, y)

    # Save the scaler
    scaler_path = f"models/{disease}_scaler.pkl"
    joblib.dump(scaler, scaler_path)

    print(f"{disease} dataset processed and scaler saved at {scaler_path}")

print("All structured datasets processed successfully.")


Processing diabetes dataset...
diabetes dataset processed and scaler saved at models/diabetes_scaler.pkl
Processing heart dataset...
heart dataset processed and scaler saved at models/heart_scaler.pkl
Processing kidney dataset...
kidney dataset processed and scaler saved at models/kidney_scaler.pkl
All structured datasets processed successfully.


1. Import Required Libraries

In [13]:
import os
import cv2
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator


2. Load and Preprocess Images

In [14]:
def load_and_preprocess_images(path, img_size=(128, 128)):
    """
    Load images from the dataset directory, resize them, normalize pixel values, and return arrays.

    Args:
        path (str): Path to the image dataset folder.
        img_size (tuple): Target size for resizing images (default: 128x128).

    Returns:
        np.array: Processed image data.
        np.array: Corresponding labels.
    """
    data = []
    labels = []
    
    if not os.path.exists(path):
        print(f"Error: Directory '{path}' does not exist!")
        return np.array(data), np.array(labels)

    for label in os.listdir(path):
        label_path = os.path.join(path, label)
        if not os.path.isdir(label_path):  # Ensure it's a folder
            continue

        for img_name in os.listdir(label_path):
            img_path = os.path.join(label_path, img_name)
            img = cv2.imread(img_path, cv2.IMREAD_COLOR)
            
            if img is None:  # Skip unreadable images
                print(f"Warning: Unable to read {img_path}, skipping...")
                continue

            img = cv2.resize(img, img_size)  # Resize image
            img = img / 255.0  # Normalize pixel values to [0,1]
            
            data.append(img)
            labels.append(label)

    return np.array(data), np.array(labels)


3. Save Processed Data

In [15]:
def save_preprocessed_data(X, y, disease_name):
    """
    Save processed image data and labels as NumPy arrays.

    Args:
        X (numpy array): Processed image data.
        y (numpy array): Corresponding labels.
        disease_name (str): Name of the disease dataset.
    """
    np.save(f"datasets/{disease_name}_data.npy", X)
    np.save(f"datasets/{disease_name}_labels.npy", y)
    print(f"{disease_name} dataset saved successfully!")


4.Run the Preprocessing Pipeline

In [16]:
# Process Malaria Dataset
malaria_data, malaria_labels = load_and_preprocess_images("datasets/malaria/")
save_preprocessed_data(malaria_data, malaria_labels, "malaria")

# Process Pneumonia Dataset
pneumonia_data, pneumonia_labels = load_and_preprocess_images("datasets/pneumonia/")
save_preprocessed_data(pneumonia_data, pneumonia_labels, "pneumonia")


malaria dataset saved successfully!
pneumonia dataset saved successfully!


# Model Training Layer

Structured Data Models

In [20]:
import pandas as pd
import joblib
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder

def train_model(data_path, model_path):
    df = pd.read_csv(data_path)
    
    # Identify categorical columns and apply label encoding
    label_encoder = LabelEncoder()
    for column in df.select_dtypes(include=['object']).columns:  # For all categorical columns
        df[column] = label_encoder.fit_transform(df[column])
    
    X = df.iloc[:, :-1]  # Features (all columns except the last)
    y = df.iloc[:, -1]   # Target (the last column)
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    model = RandomForestClassifier()
    model.fit(X_train, y_train)
    
    joblib.dump(model, model_path)
    print(f"Model saved: {model_path}")

# Train models
train_model("datasets/diabetes.csv", "models/diabetes.pkl")
train_model("datasets/heart.csv", "models/heart.pkl")
train_model("datasets/kidney.csv", "models/kidney.pkl")


Model saved: models/diabetes.pkl
Model saved: models/heart.pkl
Model saved: models/kidney.pkl


Image-Based Disease Models

In [24]:
import tensorflow as tf
from tensorflow.keras import layers, models
import numpy as np
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import os

# Set paths
malaria_data_path = "datasets/malaria"
pneumonia_data_path = "datasets/pneumonia"
model_save_path = "models"

# Create models directory if it doesn't exist
os.makedirs(model_save_path, exist_ok=True)

# Define CNN model architecture
def create_cnn_model(input_shape=(128, 128, 3)):
    model = models.Sequential([
        layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(64, (3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),
        layers.Flatten(),
        layers.Dense(128, activation='relu'),
        layers.Dense(1, activation='sigmoid')  # Binary classification
    ])
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

# Prepare image data generators
def prepare_data(data_path):
    # Apply data augmentation and preprocessing
    datagen = ImageDataGenerator(rescale=1./255, validation_split=0.2)
    
    train_gen = datagen.flow_from_directory(
        data_path,
        target_size=(128, 128),
        batch_size=32,
        class_mode='binary',  # Binary classification (Infected/Uninfected)
        subset='training'
    )

    val_gen = datagen.flow_from_directory(
        data_path,
        target_size=(128, 128),
        batch_size=32,
        class_mode='binary',
        subset='validation'
    )

    return train_gen, val_gen

# Train and save the malaria model
def train_malaria_model():
    print("Training Malaria Model...")
    malaria_train_gen, malaria_val_gen = prepare_data(malaria_data_path)
    
    model = create_cnn_model()
    model.fit(malaria_train_gen, epochs=10, validation_data=malaria_val_gen)
    
    # Save the trained model
    model.save(os.path.join(model_save_path, "malaria.h5"))
    print("Malaria model saved as 'malaria.h5'")

# Train and save the pneumonia model
def train_pneumonia_model():
    print("Training Pneumonia Model...")
    pneumonia_train_gen, pneumonia_val_gen = prepare_data(pneumonia_data_path)
    
    model = create_cnn_model()
    model.fit(pneumonia_train_gen, epochs=10, validation_data=pneumonia_val_gen)
    
    # Save the trained model
    model.save(os.path.join(model_save_path, "pneumonia.h5"))
    print("Pneumonia model saved as 'pneumonia.h5'")

# Main function to train both models
def main():
    train_malaria_model()
    train_pneumonia_model()

if __name__ == "__main__":
    main()


Training Malaria Model...
Found 56 images belonging to 2 classes.
Found 14 images belonging to 2 classes.
Epoch 1/10


  self._warn_if_super_not_called()


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 672ms/step - accuracy: 0.5685 - loss: 1.8852 - val_accuracy: 0.6429 - val_loss: 0.6628
Epoch 2/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 493ms/step - accuracy: 0.6171 - loss: 1.1418 - val_accuracy: 0.4286 - val_loss: 1.1553
Epoch 3/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 487ms/step - accuracy: 0.4841 - loss: 0.8698 - val_accuracy: 0.5714 - val_loss: 0.9147
Epoch 4/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 416ms/step - accuracy: 0.6101 - loss: 0.7522 - val_accuracy: 0.5714 - val_loss: 0.7189
Epoch 5/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 484ms/step - accuracy: 0.6622 - loss: 0.5767 - val_accuracy: 0.6429 - val_loss: 0.6267
Epoch 6/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 542ms/step - accuracy: 0.9196 - loss: 0.4435 - val_accuracy: 0.5000 - val_loss: 0.6382
Epoch 7/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0



Malaria model saved as 'malaria.h5'
Training Pneumonia Model...
Found 56 images belonging to 1 classes.
Found 13 images belonging to 1 classes.
Epoch 1/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 663ms/step - accuracy: 1.0000 - loss: 0.4563 - val_accuracy: 1.0000 - val_loss: 1.2024e-10
Epoch 2/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 613ms/step - accuracy: 1.0000 - loss: 1.6950e-10 - val_accuracy: 1.0000 - val_loss: 2.5280e-17
Epoch 3/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 435ms/step - accuracy: 1.0000 - loss: 4.8219e-17 - val_accuracy: 1.0000 - val_loss: 6.0229e-23
Epoch 4/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 525ms/step - accuracy: 1.0000 - loss: 5.5728e-25 - val_accuracy: 1.0000 - val_loss: 8.7187e-28
Epoch 5/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 505ms/step - accuracy: 1.0000 - loss: 2.9985e-29 - val_accuracy: 1.0000 - val_loss: 6.0009e-32
Epoch 6/10
[1m2/2[



Pneumonia model saved as 'pneumonia.h5'


# Deployment with Streamlit

In [39]:
import streamlit as st
import numpy as np
import pandas as pd
import pickle
import tensorflow as tf
from tensorflow.keras.models import load_model
from PIL import Image
import cv2

# Load structured data models
diabetes_model = pickle.load(open("models/diabetes.pkl", "rb"))
heart_model = pickle.load(open("models/heart.pkl", "rb"))
kidney_model = pickle.load(open("models/kidney.pkl", "rb"))

# Load image-based models
malaria_model = load_model("models/malaria.h5")
pneumonia_model = load_model("models/pneumonia.h5")

# Sidebar Navigation
st.sidebar.title("Sireesha - Unified AI Multidisease Predictor")
option = st.sidebar.radio("Select a disease to predict:", ["Home", "Diabetes", "Heart Disease", "Kidney Disease", "Malaria", "Pneumonia"])

# Home Page
if option == "Home":
    st.title("Unified AI Multidisease Predictor")
    st.write("This web application allows you to predict multiple diseases using AI models.")

# Structured Data Prediction Function
def predict_disease(model, user_input, feature_names):
    df = pd.DataFrame([user_input], columns=feature_names)
    prediction = model.predict(df)
    return "Positive" if prediction[0] == 1 else "Negative"

# Image Prediction Function
def predict_image(model, image):
    image = image.resize((128, 128))
    image = np.array(image) / 255.0
    image = np.expand_dims(image, axis=0)
    prediction = model.predict(image)
    return "Positive" if prediction[0] > 0.5 else "Negative"

# Diabetes Prediction
if option == "Diabetes":
    st.title("Diabetes Prediction")
    pregnancies = st.number_input("Pregnancies", min_value=0, max_value=20, value=1)
    glucose = st.number_input("Glucose Level", min_value=0, max_value=200, value=100)
    blood_pressure = st.number_input("Blood Pressure", min_value=0, max_value=200, value=70)
    skin_thickness = st.number_input("Skin Thickness", min_value=0, max_value=100, value=20)
    insulin = st.number_input("Insulin Level", min_value=0, max_value=800, value=80)
    bmi = st.number_input("BMI", min_value=0.0, max_value=50.0, value=25.0)
    dpf = st.number_input("Diabetes Pedigree Function", min_value=0.0, max_value=2.5, value=0.5)
    age = st.number_input("Age", min_value=0, max_value=120, value=30)
    
    if st.button("Predict Diabetes"):
        result = predict_disease(diabetes_model, [pregnancies, glucose, blood_pressure, skin_thickness, insulin, bmi, dpf, age], 
                                 ["Pregnancies", "Glucose", "BloodPressure", "SkinThickness", "Insulin", "BMI", "DiabetesPedigreeFunction", "Age"])
        st.write(f"Prediction: **{result}**")

# Heart Disease Prediction
if option == "Heart Disease":
    st.title("Heart Disease Prediction")
    age = st.number_input("Age", min_value=20, max_value=100, value=50)
    sex = st.selectbox("Sex", ["Male", "Female"])
    cp = st.selectbox("Chest Pain Type", [0, 1, 2, 3])
    trestbps = st.number_input("Resting Blood Pressure", min_value=90, max_value=200, value=120)
    chol = st.number_input("Cholesterol", min_value=100, max_value=600, value=200)
    
    sex = 1 if sex == "Male" else 0

    if st.button("Predict Heart Disease"):
        result = predict_disease(heart_model, [age, sex, cp, trestbps, chol], 
                                 ["age", "sex", "cp", "trestbps", "chol"])
        st.write(f"Prediction: **{result}**")

# Kidney Disease Prediction
if option == "Kidney Disease":
    st.title("Kidney Disease Prediction")
    rbc = st.selectbox("Red Blood Cells", ["Normal", "Abnormal"])
    pc = st.selectbox("Pus Cell", ["Normal", "Abnormal"])
    hemo = st.number_input("Hemoglobin", min_value=0.0, max_value=20.0, value=12.0)
    
    rbc = 1 if rbc == "Abnormal" else 0
    pc = 1 if pc == "Abnormal" else 0

    if st.button("Predict Kidney Disease"):
        result = predict_disease(kidney_model, [rbc, pc, hemo], 
                                 ["rbc", "pc", "hemo"])
        st.write(f"Prediction: **{result}**")

# Malaria Prediction
if option == "Malaria":
    st.title("Malaria Detection")
    uploaded_file = st.file_uploader("Upload Blood Smear Image", type=["jpg", "png", "jpeg"])
    if uploaded_file is not None:
        image = Image.open(uploaded_file)
        st.image(image, caption="Uploaded Image", use_column_width=True)
        
        if st.button("Predict Malaria"):
            result = predict_image(malaria_model, image)
            st.write(f"Prediction: **{result}**")

# Pneumonia Prediction
if option == "Pneumonia":
    st.title("Pneumonia Detection")
    uploaded_file = st.file_uploader("Upload Chest X-Ray Image", type=["jpg", "png", "jpeg"])
    if uploaded_file is not None:
        image = Image.open(uploaded_file)
        st.image(image, caption="Uploaded Image", use_column_width=True)
        
        if st.button("Predict Pneumonia"):
            result = predict_image(pneumonia_model, image)
            st.write(f"Prediction: **{result}**")





In [40]:
def set_background(base64_str):
    st.markdown(
        f"""
        <style>
        .stApp {{
            background-image: url("data:image/png;base64,{base64_str}");
            background-size: cover;
        }}
        </style>
        """,
        unsafe_allow_html=True
    )


In [48]:
import streamlit as st
import base64

# Function to encode the image file to base64
def get_base64_image(image_path):
    with open(image_path, "rb") as img_file:
        return base64.b64encode(img_file.read()).decode()

# Function to set the background image
def set_background(encoded_image):
    st.markdown(
        f"""
        <style>
        .stApp {{
            background-image: url('data:image/jpeg;base64,{encoded_image}');
            background-size: cover;
            background-position: center;
            background-repeat: no-repeat;
            height: 100vh;
        }}
        </style>
        """, unsafe_allow_html=True)

# Homepage content
def homepage():
    # Set the absolute path to your image
    image_path = r"C:\Users\siree\Desktop\project 2\statics\doctor.jpg"  # Use the correct absolute path
    encoded_image = get_base64_image(image_path)
    
    # Set the background image
    set_background(encoded_image)
    
    # Add content to the homepage
    st.title("Welcome to the AI Multidisease Predictor")
    st.subheader("Powered by AI for Early Detection")
    
    # Optional: Add additional content here (e.g., instructions, app description, etc.)
    st.write(
        "Use this app to predict and detect diseases like Diabetes, Malaria, and Pneumonia using AI-powered models."
    )

# Call the homepage function to render the page
if __name__ == "__main__":
    homepage()


