In [17]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score

data = pd.read_csv('diabetes_prediction_dataset.csv')

# Preprocessing: Encoding categorical data
label_encoder_gender = LabelEncoder()
label_encoder_smoking = LabelEncoder()
data['gender'] = label_encoder_gender.fit_transform(data['gender'])
data['smoking_history'] = label_encoder_smoking.fit_transform(data['smoking_history'])

# Features and target
X = data.drop('diabetes', axis=1)
y = data['diabetes']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the Logistic Regression model
model = LogisticRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Model Accuracy: {accuracy:.2f}')

# Example user input for prediction
example_input = {
    'gender': 'Female',
    'age': 45,
    'hypertension': 0,
    'heart_disease': 0,
    'smoking_history': 'never',
    'bmi': 28.1,
    'HbA1c_level': 5.9,
    'blood_glucose_level': 120
}

# Convert user input into DataFrame
example_df = pd.DataFrame([example_input])
if example_df['gender'][0] in label_encoder_gender.classes_:
    example_df['gender'] = label_encoder_gender.transform(example_df['gender'])
else:
    example_df['gender'] = -1

if example_df['smoking_history'][0] in label_encoder_smoking.classes_:
    example_df['smoking_history'] = label_encoder_smoking.transform(example_df['smoking_history'])
else:
    example_df['smoking_history'] = -1

# Predict using the trained model
prediction = model.predict(example_df)[0]
print(f'Predicted Diabetes Status: {"Diabetic" if prediction == 1 else "Non-Diabetic"}')




Model Accuracy: 0.95
Predicted Diabetes Status: Non-Diabetic


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [19]:
# Function to take user input and make a prediction
def predict_from_user_input():
    user_input = {
        'gender': input("Enter gender (Male/Female/Other): "),
        'age': int(input("Enter age: ")),
        'hypertension': int(input("Enter hypertension (0 for No, 1 for Yes): ")),
        'heart_disease': int(input("Enter heart disease (0 for No, 1 for Yes): ")),
        'smoking_history': input("Enter smoking history (never, former, current, etc.): "),
        'bmi': float(input("Enter BMI: ")),
        'HbA1c_level': float(input("Enter HbA1c level: ")),
        'blood_glucose_level': int(input("Enter blood glucose level: "))
    }

    user_df = pd.DataFrame([user_input])
    if user_df['gender'][0] in label_encoder_gender.classes_:
        user_df['gender'] = label_encoder_gender.transform(user_df['gender'])
    else:
        user_df['gender'] = -1

    if user_df['smoking_history'][0] in label_encoder_smoking.classes_:
        user_df['smoking_history'] = label_encoder_smoking.transform(user_df['smoking_history'])
    else:
        user_df['smoking_history'] = -1

    prediction = model.predict(user_df)[0]
    print(f'Predicted Diabetes Status: {"Diabetic" if prediction == 1 else "Non-Diabetic"}')

predict_from_user_input()

Enter gender (Male/Female/Other): male
Enter age: 28
Enter hypertension (0 for No, 1 for Yes): 0
Enter heart disease (0 for No, 1 for Yes): 1
Enter smoking history (never, former, current, etc.): former
Enter BMI: 27
Enter HbA1c level: 6
Enter blood glucose level: 118
Predicted Diabetes Status: Non-Diabetic
