In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score
import pickle

In [32]:
df = pd.read_csv('Healthcare-Diabetes.csv')

X = df.drop(columns=['Outcome'])
y = df['Outcome']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

gbm = GradientBoostingClassifier()

gbm.fit(X_train, y_train)

y_pred = gbm.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)

print(f"Accuracy of the Gradient Boosting Model: {accuracy:.2f}")

pickle.dump(gbm, open("model.pkl", "wb"))

Accuracy of the Gradient Boosting Model: 0.88


In [34]:

%%writefile app.py

from flask import Flask, request, jsonify, render_template
import pickle
import numpy as np

app = Flask(__name__)
model = pickle.load(open('model.pkl', 'rb'))

@app.route("/")
def index():
    return render_template("index.html")

@app.route("/predict", methods=["POST"])
def predict():
    float_features = [float(x) for x in request.form.values()]
    features = [np.array(float_features)]
    prediction = model.prediction(features)
    return render_template("index.html", predicted_text=prediction[0])

if __name__ == "__main__":
    app.run(debug=True)

Overwriting app.py


In [30]:
%%writefile templates/index.html

<!DOCTYPE html>
<html>
<head>
    <meta charset="utf-8" />
    <title>Diabetes Prediction</title>
</head>
<body>
    <h1>Diabetes Prediction</h1>
    <form action="{{ url_for('predict')}}" method="post">
        <input type='text' name="Pregnancies" placeholder="Pregnancies" required="required" />
        <input type='text' name="Glucose" placeholder="Glucose" required="required" />
        <input type='text' name="BloodPressure" placeholder="BloodPressure" required="required" />
        <input type='text' name="SkinThickness" placeholder="SkinThickness" required="required" />
        <input type='text' name="Insulin" placeholder="Insulin" required="required" />
        <input type='text' name="BMI" placeholder="BMI" required="required" />
        <input type='text' name="DiabetesPedigreeFunction" placeholder="DiabetesPedigreeFunction" required="required" />
        <input type='text' name="Age" placeholder="Age" required="required" />
        <button type="submit" class="btn btn-primary btn-large">Predict</button>
    </form>
    <h2>The prediction is: {{predicted_text}}</h2>
</body>
</html>

Writing templates/index.html


In [45]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [53]:
df = pd.read_csv('Healthcare-Diabetes.csv')

X = df.drop(columns=['Outcome'])
y = df['Outcome']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

models = {
    'Gradient Boosting': GradientBoostingClassifier(random_state=42),
    'Decision Tree': DecisionTreeClassifier(random_state=42),
    'Logistic Regression': LogisticRegression(random_state=42, max_iter=10000),
    'K-Nearest Neighbors': KNeighborsClassifier()
}

results = {}
for name, model in models.items():
    model.fit(X_train_scaled, y_train)
    y_pred = model.predict(X_test_scaled)
    accuracy = accuracy_score(y_test, y_pred)
    classification_rep = classification_report(y_test, y_pred)
    conf_matrix = confusion_matrix(y_test, y_pred)
    results[name] = {
    'accuracy': accuracy,
    'classification_report': classification_rep,
    'confusion_matrix': conf_matrix
    }

for name, result in results.items():
    print(f"Model: {name}")
    print(f"Accuracy: {result['accuracy']}")
    print("Classification Report:")
    print(result['classification_report'])
    print("Confusion Matrix:")
    print(result['confusion_matrix'])
    print("\n")

Model: Gradient Boosting
Accuracy: 0.8826714801444043
Classification Report:
              precision    recall  f1-score   support

           0       0.88      0.95      0.91       367
           1       0.88      0.75      0.81       187

    accuracy                           0.88       554
   macro avg       0.88      0.85      0.86       554
weighted avg       0.88      0.88      0.88       554

Confusion Matrix:
[[348  19]
 [ 46 141]]


Model: Decision Tree
Accuracy: 0.9620938628158845
Classification Report:
              precision    recall  f1-score   support

           0       0.97      0.98      0.97       367
           1       0.96      0.93      0.94       187

    accuracy                           0.96       554
   macro avg       0.96      0.95      0.96       554
weighted avg       0.96      0.96      0.96       554

Confusion Matrix:
[[359   8]
 [ 13 174]]


Model: Logistic Regression
Accuracy: 0.7725631768953068
Classification Report:
              precision    reca