## Creating the Gradient Boosting Model

In [None]:
# Import the dependencies
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
import joblib

In [None]:
# Get the file path for the CSV file
file_path = Path("../Data/diabetes_binary_health_indicators_BRFSS2015.csv")

# Read the CSV file into a DataFrame
df = pd.read_csv(file_path)

# Display the first few rows of the DataFrame
df.head()

In [None]:
# Drop any rows with missing values
df.dropna(inplace=True)

In [None]:
# Get the features and the target variables
X = df.drop(columns=['Diabetes_binary'], axis=1)
y = df['Diabetes_binary']

In [None]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Initialize the Gradient Boosting Classifier
gb_clf = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42)

In [None]:
# Train the model
gb_clf.fit(X_train, y_train)

In [None]:
# Make predictions
y_pred = gb_clf.predict(X_test)

In [None]:
# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)

print(f'Accuracy: {accuracy}')
print('Confusion Matrix:')
print(conf_matrix)
print('Classification Report:')
print(class_report)

In [None]:
# Export the model to a .pkl file
model_path = Path("Models/gradient_boosting.pkl")
joblib.dump(gb_clf, model_path)