# Diabetes Prediction Model
This notebook demonstrates the steps for preprocessing the diabetes dataset, training a neural network model, and making predictions.
The pipeline includes:
- Preprocessing
- Model Training
- Model Evaluation


In [10]:
# Import dependencies
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.regularizers import l2
import pickle
import numpy as np
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score


**Preprocessing Function**

In [2]:
def preprocess_data(file_path='/content/diabetes.csv'):
    # Load the dataset
    data = pd.read_csv(file_path)

    # Separate features and target
    X = data.drop(columns=['Outcome'])
    y = data['Outcome']

    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

    # Scale the features
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    return X_train, X_test, y_train, y_test, scaler


**Model Training Function**

In [11]:
def train_model(X_train, y_train, X_test, y_test, scaler):
    # Define the model
    model = Sequential([
        Dense(16, activation='relu', input_shape=(X_train.shape[1],), kernel_regularizer=l2(0.01)),
        Dense(32, activation='relu', kernel_regularizer=l2(0.01)),
        Dense(16, activation='relu', kernel_regularizer=l2(0.01)),
        Dense(1, activation='sigmoid')
    ])

    # Compile the model
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

    # Early stopping
    early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

    # Train the model
    history = model.fit(X_train, y_train, validation_split=0.2, epochs=20, batch_size=32, callbacks=[early_stopping], verbose=1)

    # Evaluate on test set
    test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=0)

    # Make predictions on the test set
    y_pred_probs = model.predict(X_test)
    y_pred = (y_pred_probs > 0.5).astype(int).flatten()  # Convert probabilities to binary predictions

    # Compute additional metrics
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    accuracy = accuracy_score(y_test, y_pred)

    # Print all metrics
    print(f"\nTest Accuracy: {test_accuracy:.2f}")
    print(f"Test Loss: {test_loss:.2f}")
    print(f"Precision: {precision:.2f}")
    print(f"Recall: {recall:.2f}")
    print(f"F1-Score: {f1:.2f}")

    return model, history, {
        "accuracy": accuracy,
        "loss": test_loss,
        "precision": precision,
        "recall": recall,
        "f1_score": f1
    }


**Prediction Function**

In [12]:
def predict(model, scaler, input_data):
    input_data = scaler.transform([input_data])
    prediction_prob = model.predict(input_data)[0][0]
    prediction = 1 if prediction_prob > 0.5 else 0
    return prediction, prediction_prob


**Execute Preprocessing**

In [13]:
# Preprocess the data
X_train, X_test, y_train, y_test, scaler = preprocess_data(file_path='diabetes.csv')

# Display data shapes
print(f"Training Data Shape: {X_train.shape}")
print(f"Testing Data Shape: {X_test.shape}")


Training Data Shape: (614, 8)
Testing Data Shape: (154, 8)


**Train and save the Model**

In [16]:
# Train and evaluate the model
model, history, metrics = train_model(X_train, y_train, X_test, y_test, scaler)

# Save the model and scaler for reuse
model.save('diabetes_model.h5')
with open('scaler.pkl', 'wb') as f:
    pickle.dump(scaler, f)

# Display metrics
print("\nModel Evaluation Metrics:")
for metric_name, metric_value in metrics.items():
    print(f"{metric_name.capitalize()}: {metric_value:.2f}")


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/20
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 46ms/step - accuracy: 0.5326 - loss: 1.2691 - val_accuracy: 0.6260 - val_loss: 1.2442
Epoch 2/20
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - accuracy: 0.6551 - loss: 1.1916 - val_accuracy: 0.6423 - val_loss: 1.1658
Epoch 3/20
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.6550 - loss: 1.1299 - val_accuracy: 0.6504 - val_loss: 1.0986
Epoch 4/20
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.6368 - loss: 1.0815 - val_accuracy: 0.6585 - val_loss: 1.0403
Epoch 5/20
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.6772 - loss: 1.0227 - val_accuracy: 0.6911 - val_loss: 0.9826
Epoch 6/20
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.6976 - loss: 0.9702 - val_accuracy: 0.7073 - val_loss: 0.9253
Epoch 7/20
[1m16/16[0m [32m━━━━━━━



[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step





Test Accuracy: 0.73
Test Loss: 0.67
Precision: 0.62
Recall: 0.57
F1-Score: 0.60

Model Evaluation Metrics:
Accuracy: 0.73
Loss: 0.67
Precision: 0.62
Recall: 0.57
F1_score: 0.60


** Test the Model**

In [17]:
# Load test input and make prediction
test_input = [6, 148, 72, 35, 0, 33.6, 0.627, 50]  # Example input
prediction, prediction_prob = predict(model, scaler, test_input)

# Display results
print(f"Prediction: {'Diabetic' if prediction == 1 else 'Non-Diabetic'}")
print(f"Prediction Probability: {prediction_prob:.2f}")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
Prediction: Diabetic
Prediction Probability: 0.62


