In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.metrics import accuracy_score, classification_report

In [2]:
def load_data(file_path):
    """
    Loads the CSV dataset from the given file path.
    """
    return pd.read_csv(file_path)


def preprocess_data(df):
    """
    Separates features and target, scales features,
    and returns standardized data, target, and scaler object.
    """
    x = df.drop(columns="Outcome", axis=1)
    y = df["Outcome"]
    scaler = StandardScaler()
    x_scaled = scaler.fit_transform(x)
    return x_scaled, y, scaler

def train_model(x_train, y_train):
    """
    Trains a linear SVM classifier and returns the trained model.
    """
    clf = svm.SVC(kernel='linear')
    clf.fit(x_train, y_train)
    return clf



def evaluate_model(clf, x, y, dataset_name="Dataset"):
    """
    Predicts labels and prints accuracy and classification report.
    """
    predictions = clf.predict(x)
    accuracy = accuracy_score(y, predictions)
    print(f"{dataset_name} Accuracy: {accuracy:.2f} ({round(accuracy*100, 2)}%)")
    print(f"{dataset_name} Classification Report:\n{classification_report(y, predictions)}\n")
    return accuracy


def predict_diabetes(input_data, scaler, clf):
    """
    Predicts whether a single instance indicates diabetes.
    """
    input_array = np.asarray(input_data).reshape(1, -1)
    scaled_input = scaler.transform(input_array)
    prediction = clf.predict(scaled_input)
    return "Diabetic" if prediction[0] == 1 else "Non-diabetic"





In [None]:
def main():
    # Load dataset
    file_path = "diabetes.csv"
    df = load_data(file_path)

    # Preprocess data
    x_scaled, y, scaler = preprocess_data(df)

    # Split into train/test sets
    x_train, x_test, y_train, y_test = train_test_split(
        x_scaled, y, test_size=0.2, stratify=y, random_state=2
    )

    # Train model
    classifier = train_model(x_train, y_train)

    # Evaluate model
    evaluate_model(classifier, x_train, y_train, dataset_name="Training Data")
    evaluate_model(classifier, x_test, y_test, dataset_name="Testing Data")

    # Example single prediction
    example_input = (8, 126, 88, 36, 108, 38.5, 0.349, 49)
    result = predict_diabetes(example_input, scaler, classifier)
    print(f"Prediction for input {example_input}: {result}")

if __name__ == "__main__":
    main()

Training Data Accuracy: 0.79 (78.66%)
Training Data Classification Report:
              precision    recall  f1-score   support

           0       0.80      0.89      0.84       400
           1       0.75      0.59      0.66       214

    accuracy                           0.79       614
   macro avg       0.77      0.74      0.75       614
weighted avg       0.78      0.79      0.78       614


Testing Data Accuracy: 0.77 (77.27%)
Testing Data Classification Report:
              precision    recall  f1-score   support

           0       0.78      0.91      0.84       100
           1       0.76      0.52      0.62        54

    accuracy                           0.77       154
   macro avg       0.77      0.71      0.73       154
weighted avg       0.77      0.77      0.76       154


Prediction for input (8, 126, 88, 36, 108, 38.5, 0.349, 49): Diabetic


