In [None]:
# Importing Dependencies
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.metrics import accuracy_score
import pickle

# Data Collection and Analysis
# PIMA Diabetes dataset
df = pd.read_csv('diabetes.csv')

# Printing basic dataset information
print("Dataset Shape:", df.shape)
print("Dataset Description:\n", df.describe())
print("Value Counts for Outcome:\n", df['Outcome'].value_counts())

# 0 ---> Non-Diabetic
# 1 ---> Diabetic

# Separating the Data and Labels
X = df.drop(columns='Outcome', axis=1)
Y = df['Outcome']

# Data Standardization
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Splitting data into training and testing sets
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, stratify=Y, random_state=42)

# Model Training
classifier = svm.SVC(kernel='linear')
classifier.fit(X_train, Y_train)

# Model Evaluation
y_train_pred = classifier.predict(X_train)
y_test_pred = classifier.predict(X_test)

train_accuracy = accuracy_score(Y_train, y_train_pred)
test_accuracy = accuracy_score(Y_test, y_test_pred)

print("Training Data Accuracy:", train_accuracy)
print("Testing Data Accuracy:", test_accuracy)

# Save the Trained Model
with open('model.pkl', 'wb') as file:
    pickle.dump(classifier, file)

print("Model saved as 'model.pkl'.")

# Predictive Functionality
print("\nDiabetes Predictive Model For Females")
input_data = []

# Input prompts for the user
features = [
    "Enter the number of Pregnancies: ",
    "Enter the Glucose Level (e.g., 80): ",
    "Enter the Blood Pressure (e.g., 70): ",
    "Enter the Skin Thickness (e.g., 29): ",
    "Enter the Insulin Level: ",
    "Enter the BMI (e.g., 33.6): ",
    "Enter the Diabetes Pedigree Function (e.g., 0.134): ",
    "Enter the Age of the Patient: "
]

for feature in features:
    input_value = float(input(feature))
    input_data.append(input_value)

# Converting input into a NumPy array
user_input = np.array([input_data])

# Reshaping and standardizing the input data
standardized_input = scaler.transform(user_input)

# Load the saved model and make predictions
with open('model.pkl', 'rb') as file:
    loaded_model = pickle.load(file)

prediction = loaded_model.predict(standardized_input)

if prediction[0] == 1:
    print("The person is likely Diabetic.")
else:
    print("The person is likely Non-Diabetic.")

In [11]:
# checking versions
print("numpy", np.__version__)
print("pandas", pd.__version__)
print("sklearn", sklearn.__version__)
print("streamlit", streamlit.__version__)

numpy 1.26.4
pandas 2.2.2
sklearn 1.5.1
streamlit 1.37.1


In [None]:
import pickle
from sklearn.preprocessing import StandardScaler

# Example data for scaling
X_train = [[...]]

# Train the scaler
scaler = StandardScaler()
scaler.fit(X_train)

# Save the scaler to a file
with open('scaler.pkl', 'wb') as file:
    pickle.dump(scaler, file)