<a href="https://colab.research.google.com/github/TejasKawle/Diabetes-Detection-System/blob/main/Diabetes.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [22]:
# Importing the dependencies
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn import svm
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import joblib

# Function to load and explore the dataset
def load_and_explore_data(file_path):
    try:
        data = pd.read_csv(file_path)
        print("\nDataset Loaded Successfully!")
        print(f"\nDataset Shape: {data.shape}")
        print("\nStatistical Summary:\n", data.describe())
        print("\nClass Distribution:\n", data['Outcome'].value_counts())
        return data
    except FileNotFoundError:
        print("Error: File not found. Please check the file path.")
        return None

# Function to preprocess the data
def preprocess_data(data):
    X = data.drop(columns='Outcome', axis=1)
    Y = data['Outcome']
    scaler = StandardScaler()
    scaler.fit(X)
    X = scaler.transform(X)
    return X, Y, scaler

# Function to train the model
def train_model(X_train, Y_train):
    classifier = svm.SVC(kernel='linear')
    classifier.fit(X_train, Y_train)
    return classifier

# Function to evaluate the model
def evaluate_model(classifier, X_train, Y_train, X_test, Y_test):
    # Accuracy on training data
    train_pred = classifier.predict(X_train)
    train_accuracy = accuracy_score(Y_train, train_pred)

    # Accuracy on testing data
    test_pred = classifier.predict(X_test)
    test_accuracy = accuracy_score(Y_test, test_pred)

    print("\nTraining Data Accuracy: {:.2f}%".format(train_accuracy * 100))
    print("Testing Data Accuracy: {:.2f}%".format(test_accuracy * 100))

    # Classification report and confusion matrix
    print("\nClassification Report:\n", classification_report(Y_test, test_pred))
    print("\nConfusion Matrix:\n", confusion_matrix(Y_test, test_pred))

# Function to save the model and scaler
def save_model(classifier, scaler, model_path, scaler_path):
    joblib.dump(classifier, model_path)
    joblib.dump(scaler, scaler_path)
    print(f"\nModel saved to {model_path}")
    print(f"Scaler saved to {scaler_path}")

# Function for prediction
def make_prediction(input_data, classifier, scaler):
    try:
        input_array = np.asarray(input_data)
        input_reshaped = input_array.reshape(1, -1)
        std_data = scaler.transform(input_reshaped)
        prediction = classifier.predict(std_data)
        return "Diabetic" if prediction[0] == 1 else "Non-Diabetic"
    except Exception as e:
        return f"Error in prediction: {e}"

# Main workflow
def main():
    # File path for the dataset
    file_path = 'diabetes.csv'

    # Load and explore data
    data = load_and_explore_data(file_path)
    if data is None:
        return

    # Data preprocessing
    X, Y, scaler = preprocess_data(data)

    # Splitting the data
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, stratify=Y, random_state=2)

    # Train the model
    classifier = train_model(X_train, Y_train)

    # Evaluate the model
    evaluate_model(classifier, X_train, Y_train, X_test, Y_test)

    # Save the model and scaler
    save_model(classifier, scaler, 'diabetes_model.pkl', 'diabetes_scaler.pkl')

    # Make a prediction
    input_data = (0, 105, 64, 41, 142, 41.5, 0.173, 22)  # Example input
    result = make_prediction(input_data, classifier, scaler)
    print(f"\nPrediction for input data: {result}")

# Execute the main workflow
if __name__ == "__main__":
    main()



Dataset Loaded Successfully!

Dataset Shape: (768, 9)

Statistical Summary:
        Pregnancies     Glucose  BloodPressure  ...  DiabetesPedigreeFunction         Age     Outcome
count   768.000000  768.000000     768.000000  ...                768.000000  768.000000  768.000000
mean      3.845052  120.894531      69.105469  ...                  0.471876   33.240885    0.348958
std       3.369578   31.972618      19.355807  ...                  0.331329   11.760232    0.476951
min       0.000000    0.000000       0.000000  ...                  0.078000   21.000000    0.000000
25%       1.000000   99.000000      62.000000  ...                  0.243750   24.000000    0.000000
50%       3.000000  117.000000      72.000000  ...                  0.372500   29.000000    0.000000
75%       6.000000  140.250000      80.000000  ...                  0.626250   41.000000    1.000000
max      17.000000  199.000000     122.000000  ...                  2.420000   81.000000    1.000000

[8 rows x 9 

