In [11]:
import pandas as pd

# Load the dataset
df = pd.read_csv('Heart_Disease_Prediction.csv')

# Display the first few rows of the dataset to understand its structure
print(df.head())

# Display summary statistics
print(df.describe())

# Check for missing values
print(df.isnull().sum())


   Age  Sex  Chest pain type   BP  Cholesterol  FBS over 120  EKG results  \
0   70    1                4  130          322             0            2   
1   80    0                3  115          564             0            2   
2   55    1                2  124          261             0            0   
3   65    1                4  128          263             0            0   
4   45    0                2  120          269             0            2   

   Max HR  Exercise angina  ST depression  Slope of ST  \
0     109                0            2.4            2   
1     160                0            1.6            2   
2     141                0            0.3            1   
3     105                1            0.2            2   
4     121                1            0.2            1   

   Number of vessels fluro  Thallium Heart Disease  
0                        3         3      Presence  
1                        0         7       Absence  
2                        0   

In [12]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Load the dataset
df = pd.read_csv('Heart_Disease_Prediction.csv')

# Handle missing values for numeric columns
numeric_columns = df.select_dtypes(include=['number']).columns
df[numeric_columns] = df[numeric_columns].apply(lambda x: x.fillna(x.mean()))

# Encode categorical features
categorical_columns = ['Sex', 'Chest pain type', 'FBS over 120', 'EKG results', 
                       'Exercise angina', 'Slope of ST', 'Number of vessels fluro', 'Thallium']
df = pd.get_dummies(df, columns=categorical_columns, drop_first=True)

# Separate features and target variable
X = df.drop('Heart Disease', axis=1)
y = df['Heart Disease']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Output the processed data (optional, for verification)
print(X_train[:5])  # Show the first 5 rows of the processed training data


[[-0.59218926  0.12509482  0.40639114  0.56612833 -0.9343038  -1.52416434
   2.27407752 -0.59160798 -1.         -0.39380225 -0.09667365 -1.02817453
  -0.7367884   1.06705851 -0.2632621  -0.4840307  -0.40935018 -0.27317918
  -0.24253563 -0.81335015]
 [ 0.48724432  2.14322204  1.47854898 -0.39777057  1.95963424  0.65609723
  -0.43973875 -0.59160798  1.         -0.39380225 -0.09667365  0.97259753
   1.35724179 -0.93715573  3.79849594 -0.4840307  -0.40935018 -0.27317918
  -0.24253563  1.22948278]
 [-0.16041582 -0.09914154 -0.08095333  1.04807779 -0.9343038   0.65609723
  -0.43973875  1.69030851 -1.          2.53934556 -0.09667365  0.97259753
  -0.7367884  -0.93715573 -0.2632621  -0.4840307  -0.40935018  3.66060104
  -0.24253563 -0.81335015]
 [-0.4842459   0.46144936 -0.33437246  0.60994192 -0.42360885  0.65609723
  -0.43973875  1.69030851 -1.         -0.39380225 -0.09667365 -1.02817453
  -0.7367884   1.06705851 -0.2632621   2.06598468 -0.40935018 -0.27317918
  -0.24253563  1.22948278]
 [-0

In [13]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

# Initialize the Logistic Regression model
model = LogisticRegression()

# Train the model
model.fit(X_train, y_train)

# Predict on the test set
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print(f'Accuracy: {accuracy}')
print(f'Classification Report: \n{report}')


Accuracy: 0.9074074074074074
Classification Report: 
              precision    recall  f1-score   support

     Absence       0.89      0.97      0.93        33
    Presence       0.94      0.81      0.87        21

    accuracy                           0.91        54
   macro avg       0.92      0.89      0.90        54
weighted avg       0.91      0.91      0.91        54



In [15]:
import pickle

# Assuming model is your trained model and scaler is your StandardScaler
with open('model.pkl', 'wb') as file:
    pickle.dump(model, file)

with open('scaler.pkl', 'wb') as file:
    pickle.dump(scaler, file)
