# Precious Kings
## Project Title: Time Series Predictive Modeling Diabetes Progression and Health Risk Stratification Using Electronic Health Records

In [1]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split # Though we might not split for simplicity here
from sklearn.ensemble import RandomForestClassifier # Import RandomForest
from sklearn.preprocessing import StandardScaler     # For scaling features
import pickle                                       # For saving the model and scaler
import os                                           # To check if the file exists

print("Libraries imported successfully Precious")

Libraries imported successfully Precious


### Load dataset

In [2]:
# Load the dataset
data = pd.read_csv('diabetes.csv')  # Replace with your actual dataset path
# data

###  Define Features (X) and Target (y)

In [3]:
# Define the list of feature column names
# Make sure these match your CSV column names EXACTLY!
feature_cols = ['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 'BMI', 'DiabetesPedigreeFunction', 'Age']
target_col = 'Outcome'

# Create the features DataFrame (X)
X = data[feature_cols]

# Create the target Series (y)
y = data[target_col]

print("Features (X) and Target (y) defined.")
print(f"Shape of X: {X.shape}")
print(f"Shape of y: {y.shape}")

Features (X) and Target (y) defined.
Shape of X: (768, 8)
Shape of y: (768,)


###  Preprocess Data: Feature Scaling

In [4]:
# Initialize the StandardScaler
scaler = StandardScaler()

# Fit the scaler to the features (X) and transform X
# fit_transform() does both steps in one go
X_scaled = scaler.fit_transform(X)

print("Features scaled using StandardScaler.")

# You can optionally convert X_scaled back to a DataFrame to view it nicely (optional)
# X_scaled_df = pd.DataFrame(X_scaled, columns=feature_cols)
# print("\nFirst 5 rows of scaled features:")
# print(X_scaled_df.head())

print("\nScaler is fitted and ready to be saved.")

Features scaled using StandardScaler.

Scaler is fitted and ready to be saved.


### Train the Random Forest Model

In [5]:
# Initialize the Random Forest Classifier model
# n_estimators is the number of trees in the forest
model = RandomForestClassifier(n_estimators=100, random_state=42) # random_state for reproducibility

# Train the model
model.fit(X_scaled, y)

print("Random Forest Classifier model trained successfully!")

Random Forest Classifier model trained successfully!


### Save the Model and Scaler

In [6]:
# Define filenames for the saved files
model_filename = 'diabetes_rf_model.pkl' # Updated filename
scaler_filename = 'scaler.pkl'

# Save the trained model
with open(model_filename, 'wb') as model_file:
    pickle.dump(model, model_file)
print(f"Model saved successfully as '{model_filename}'")

# Save the fitted scaler
with open(scaler_filename, 'wb') as scaler_file:
    pickle.dump(scaler, scaler_file)
print(f"Scaler saved successfully as '{scaler_filename}'")

Model saved successfully as 'diabetes_rf_model.pkl'
Scaler saved successfully as 'scaler.pkl'


### Predict the model

In [7]:
# Make predictions using the trained model
predictions = model.predict(X_scaled)

# Display the first 10 predictions
print("First 10 predictions:", predictions[:10])

First 10 predictions: [1 0 1 0 1 0 1 0 1 1]


### Calculate the F1 Score

In [8]:
# Import the f1_score function
from sklearn.metrics import f1_score

# Calculate the F1 score
f1 = f1_score(y, predictions)

# Display the F1 score
print(f'F1 Score: {f1:.2f}')

F1 Score: 1.00
