In [1]:
import pandas as pd

In [2]:
df = pd.read_csv('heart_attack_dataset.csv')

In [3]:
from sklearn.model_selection import train_test_split

In [4]:
train_df, val_df = train_test_split(df, test_size=0.2, random_state=42)

In [5]:
input_cols = list(train_df.columns)[:-1]
target_col = 'HeartDisease'

In [6]:
import numpy as np

In [7]:
numeric_cols = train_df.select_dtypes(include=np.number).columns.tolist()
categorical_cols = train_df.select_dtypes('object').columns.tolist()

In [8]:
from sklearn.preprocessing import MinMaxScaler

In [9]:
scaler_heart = MinMaxScaler()
scaler_heart.fit(train_df[numeric_cols])

In [10]:
train_df[numeric_cols] = scaler_heart.transform(train_df[numeric_cols])
val_df[numeric_cols] = scaler_heart.transform(val_df[numeric_cols])

In [11]:
from sklearn.preprocessing import OneHotEncoder

In [12]:
enc_heart = OneHotEncoder(sparse_output=False, handle_unknown="ignore")
enc_heart.fit(train_df[categorical_cols])

In [13]:
train_one_hot = enc_heart.transform(train_df[categorical_cols])
val_one_hot = enc_heart.transform(val_df[categorical_cols])

In [14]:
encoded_cols = list(enc_heart.get_feature_names_out(categorical_cols))

In [15]:
train_df[encoded_cols] = train_one_hot
val_df[encoded_cols] = val_one_hot

In [16]:
numeric_input_cols = [col for col in input_cols if col in numeric_cols]

In [17]:
X_train = train_df[numeric_input_cols + encoded_cols]
train_targets = train_df[target_col]

X_val = val_df[numeric_input_cols + encoded_cols]
val_targets = val_df[target_col]

In [18]:
from sklearn.linear_model import LogisticRegression

In [19]:
model_heart = LogisticRegression(solver='liblinear')


In [20]:
model_heart.fit(X_train, train_targets)

In [21]:
train_preds = model_heart.predict(X_train)

In [22]:
from sklearn.metrics import accuracy_score

accuracy_score(train_targets, train_preds)

0.8692098092643051

In [23]:
val_preds = model_heart.predict(X_val)

In [24]:
accuracy_score(val_preds, val_targets)

0.8695652173913043

In [30]:
len(X_val.columns)

20

In [31]:
sample_data = {
    'Age': [55],
    'Sex': ['M'],
    'ChestPainType': ['ATA'],
    'RestingBP': [130],
    'Cholesterol': [250],
    'FastingBS': [0],
    'RestingECG': ['Normal'],
    'MaxHR': [150],
    'ExerciseAngina': ['N'],
    'Oldpeak': [1.2],
    'ST_Slope': ['Flat']
}

# Convert to DataFrame
sample_df = pd.DataFrame(sample_data)

# Step 1: Preprocess the input data

# Define the columns for numeric and categorical values
numeric_cols = ['Age', 'RestingBP', 'Cholesterol', 'FastingBS', 'MaxHR', 'Oldpeak']
categorical_cols = ['Sex', 'ChestPainType', 'RestingECG', 'ExerciseAngina', 'ST_Slope']

# Step 2: Encode categorical features using OneHotEncoder


In [32]:
encoded_categorical = enc_heart.transform(sample_df[categorical_cols])

# Step 3: Scale numeric features using MinMaxScaler
sample_df[numeric_cols] = scaler_heart.transform(sample_df[numeric_cols])

# Combine the numeric and encoded categorical columns into a final array for prediction
sample_preprocessed = np.concatenate([sample_df[numeric_cols], encoded_categorical], axis=1)



ValueError: The feature names should match those that were passed during fit.
Feature names seen at fit time, yet now missing:
- HeartDisease


In [None]:
sample_preprocessed.columns

In [None]:
# Step 4: Make a prediction using the trained model
prediction = model_heart.predict(sample_preprocessed)

# Output the result
if prediction == 1:
    result = "The person is at risk of a heart attack."
else:
    result = "The person is not at risk of a heart attack."

print(result)