In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

In [2]:
def get_user_input_stroke():
    gender = input("Enter gender (Male=0, Female=1): ")
    age = input("Enter age: ")
    hypertension = input("Enter hypertension (0 for No, 1 for Yes): ")
    heart_disease = input("Enter heart disease (0 for No, 1 for Yes): ")
    ever_married = input("Enter ever married (0 for No, 1 for Yes): ")
    work_type = input("Enter work type (Private=0, Self-employed=1, Children=2, Govt_job=3, Never_worked=4): ")
    residence_type = input("Enter residence type (Urban=0, Rural=1): ")
    avg_glucose_level = input("Enter average glucose level: ")
    bmi = input("Enter BMI: ")
    smoking_status = input("Enter smoking status (Formerly smoked=0, Never smoked=1, Smokes=2, Unknown=3): ")
    return gender, age, hypertension, heart_disease, ever_married, work_type, residence_type, avg_glucose_level, bmi, smoking_status

In [3]:
def strokeml(gender, age, hypertension, heart_disease, ever_married, work_type, residence_type, avg_glucose_level, bmi, smoking_status):
    heart_data = pd.read_csv("Stroke.csv", sep=",")

    heart_data['bmi'].fillna(heart_data['bmi'].median(), inplace=True)
    heart_data.drop('id', axis=1, inplace=True)

    heart_data.gender[heart_data.gender == 'Male'] = 0
    heart_data.gender[heart_data.gender == 'Female'] = 1
    heart_data.ever_married[heart_data.ever_married == 'Yes'] = 1
    heart_data.ever_married[heart_data.ever_married == 'No'] = 0
    heart_data.work_type[heart_data.work_type == 'Private'] = 0
    heart_data.work_type[heart_data.work_type == 'Self-employed'] = 1
    heart_data.work_type[heart_data.work_type == 'children'] = 2
    heart_data.work_type[heart_data.work_type == 'Govt_job'] = 3
    heart_data.work_type[heart_data.work_type == 'Never_worked'] = 4
    heart_data.Residence_type[heart_data.Residence_type == 'Urban'] = 0
    heart_data.Residence_type[heart_data.Residence_type == 'Rural'] = 1
    heart_data.smoking_status[heart_data.smoking_status == 'formerly smoked'] = 0
    heart_data.smoking_status[heart_data.smoking_status == 'never smoked'] = 1
    heart_data.smoking_status[heart_data.smoking_status == 'smokes'] = 2
    heart_data.smoking_status[heart_data.smoking_status == 'Unknown'] = 3

    S = heart_data[heart_data.stroke == 0]
    NS = heart_data[heart_data.stroke == 1]
    S_sample = S.sample(n=251)
    nds = pd.concat([S_sample, NS], axis=0)

    X = nds.drop(columns='stroke', axis=1)
    Y = nds['stroke']

    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.25, stratify=Y, random_state=99)

    model = LogisticRegression()
    model.fit(X_train, Y_train)

    gender = float(gender)
    age = float(age)
    hypertension = float(hypertension)
    heart_disease = float(heart_disease)
    ever_married = float(ever_married)
    work_type = float(work_type)
    residence_type = float(residence_type)
    avg_glucose_level = float(avg_glucose_level)
    bmi = float(bmi)
    smoking_status = float(smoking_status)
    input_data = (gender, age, hypertension, heart_disease, ever_married, work_type, residence_type, avg_glucose_level, bmi, smoking_status)

    input_data_as_numpy_array = np.asarray(input_data)
    input_data_reshaped = input_data_as_numpy_array.reshape(1, -1)
    prediction = model.predict(input_data_reshaped)

    if prediction[0] == 0:
        return "NOT AT RISK"
    else:
        return "AT RISK"

In [4]:
result = strokeml(*get_user_input_stroke())
print(result)

Enter gender (Male=0, Female=1):  0
Enter age:  69
Enter hypertension (0 for No, 1 for Yes):  1
Enter heart disease (0 for No, 1 for Yes):  1
Enter ever married (0 for No, 1 for Yes):  1
Enter work type (Private=0, Self-employed=1, Children=2, Govt_job=3, Never_worked=4):  3
Enter residence type (Urban=0, Rural=1):  0
Enter average glucose level:  75
Enter BMI:  26
Enter smoking status (Formerly smoked=0, Never smoked=1, Smokes=2, Unknown=3):  2


AT RISK


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  heart_data['bmi'].fillna(heart_data['bmi'].median(), inplace=True)
You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure th