In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.utils import resample

# Load the dataset
data = pd.read_csv('diabetes.csv')

# Separate the majority and minority classes
df_minority = data[data['Outcome'] == 1]
df_majority = data[data['Outcome'] == 0]

# Upsample minority class
df_minority_upsampled = resample(df_minority, replace=True,  # Sample with replacement
                                 n_samples=len(df_majority),  # Match majority class
                                 random_state=42)

# Combine majority class with upsampled minority class
data_upsampled = pd.concat([df_majority, df_minority_upsampled])

# Data preprocessing
X = data_upsampled[['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 'BMI', 'DiabetesPedigreeFunction', 'Age']]
y = data_upsampled['Outcome']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

# Scale the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Model training with increased iterations to prevent convergence warnings
model = LogisticRegression(max_iter=1000)
model.fit(X_train_scaled, y_train)

# Model accuracy
accuracy = accuracy_score(y_test, model.predict(X_test_scaled))
print(f"Model Accuracy: {accuracy * 100:.2f}%")

# Example prediction
def predict_diabetes(pregnancies, glucose, blood_pressure, skin_thickness, insulin, bmi, dpf, age):
    input_data = np.array([[pregnancies, glucose, blood_pressure, skin_thickness, insulin, bmi, dpf, age]])
    input_data_scaled = scaler.transform(input_data)
    prediction = model.predict(input_data_scaled)
    return "The patient is likely to have diabetes." if prediction[0] == 1 else "The patient is unlikely to have diabetes."

# Example usage
if __name__ == "__main__":
    # Example input data
    pregnancies = int(input("Enter number of pregnancies: "))
    glucose = float(input("Enter glucose level: "))
    blood_pressure = float(input("Enter blood pressure level: "))
    skin_thickness = float(input("Enter skin thickness level: "))
    insulin = float(input("Enter insulin level: "))
    bmi = float(input("Enter BMI: "))
    dpf = float(input("Enter diabetes pedigree function: "))
    age = int(input("Enter age: "))

    # Get prediction
    result = predict_diabetes(pregnancies, glucose, blood_pressure, skin_thickness, insulin, bmi, dpf, age)
    print(result)


Model Accuracy: 74.50%


In [4]:
prediction

array([0], dtype=int64)

prediction

In [7]:
input_data

array([[ 0. , 80. , 80. , 22. , 81. , 26. ,  0.5, 31. ]])

In [9]:
import pandas as pd
import numpy as np

In [16]:
data = pd.read_csv("diabetes.csv")

In [17]:
data.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [18]:
data.shape

(768, 9)

In [21]:
data['Outcome'].value_counts()

Outcome
0    500
1    268
Name: count, dtype: int64