In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.metrics import accuracy_score

# Load the dataset
diabetes_dataset = pd.read_csv('C:/Users/shaan/OneDrive/Desktop/diabetes.csv')

# Separate features and labels
X = diabetes_dataset.drop(columns='Outcome', axis=1)
Y = diabetes_dataset['Outcome']

# Standardizing the data
scaler = StandardScaler()
X = scaler.fit_transform(X)  # Apply transformation correctly

# Splitting the dataset
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, stratify=Y, random_state=2)

# Print dataset shapes
print("X shape:", X.shape, "X_train shape:", X_train.shape, "X_test shape:", X_test.shape)

# Train SVM model
classifier = svm.SVC(kernel='linear')
classifier.fit(X_train, Y_train)

# Evaluate model
X_train_prediction = classifier.predict(X_train)
training_data_accuracy = accuracy_score(X_train_prediction, Y_train)
print('Training Accuracy:', training_data_accuracy)

X_test_prediction = classifier.predict(X_test)
test_data_accuracy = accuracy_score(X_test_prediction, Y_test)
print('Test Accuracy:', test_data_accuracy)

# Input data for prediction
#input_data = (5, 166, 72, 19, 175, 25.8, 0.587, 51)
input_data = (13, 145, 82, 19, 110, 22.2, 0.245, 57)

# Convert input data to a NumPy array
input_data_as_numpy_array = np.asarray(input_data)

# Convert to a DataFrame with correct column names to avoid warnings
input_data_df = pd.DataFrame([input_data], columns=diabetes_dataset.columns[:-1])

# Standardize the input data
std_data = scaler.transform(input_data_df)  # Now it matches the format
print("Standardized input data:", std_data)

# Make prediction
prediction = classifier.predict(std_data)
print("Prediction:", prediction)

# Output result
if prediction[0] == 0:
    print('The person is NOT diabetic.')
else:
    print('The person IS diabetic.')


X shape: (768, 8) X_train shape: (614, 8) X_test shape: (154, 8)
Training Accuracy: 0.7866449511400652
Test Accuracy: 0.7727272727272727
Standardized input data: [[ 2.7187125   0.75443236  0.66661825 -0.09637905  0.26222798 -1.24286663
  -0.68519336  2.02160968]]
Prediction: [0]
The person is NOT diabetic.


In [7]:
import pickle

In [9]:
filename = 'trained_model.sav'
pickle.dump(classifier, open(filename, 'wb'))

In [11]:
# loading the saved model
loaded_model = pickle.load(open('trained_model.sav', 'rb'))

In [24]:
# ✅ Input data for prediction
input_data = (3, 126, 88, 41, 235, 39.3, 0.704, 27)  # Modify as needed

# ✅ Convert input_data into a DataFrame with correct column names
input_data_df = pd.DataFrame([input_data], columns=diabetes_dataset.columns[:-1])

# ✅ Standardize input data using the same scaler from training
input_data_scaled = scaler.transform(input_data_df)  # 🔥 FIXED: Using `scaler`

# ✅ Make prediction
prediction = loaded_model.predict(input_data_scaled)

# ✅ Output result
print("Prediction:", prediction)
print("The person is", "Diabetic" if prediction[0] == 1 else "Not Diabetic")


Prediction: [0]
The person is Not Diabetic
