# Predicting Heart Disease-Related Readmissions Using Machine Learning Models

In [1]:
# Import necessary libraries and load the dataset
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier  
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer

# Load the dataset
data = pd.read_csv('heart_disease_dataset_with_new_features.csv')

# Display the first few rows of the dataset
print(data.head())

# Data preprocessing steps

# Handle missing values (impute)
imputer = SimpleImputer(strategy='mean')  
data_imputed = pd.DataFrame(imputer.fit_transform(data), columns=data.columns)

# Use techniques like one-hot encoding or label encoding for categorical variables

# Split data into features and target variable
X = data_imputed.drop(columns=['target'])  
y = data_imputed['target'] 

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Print training and testing sets for comparison
print("Training Set:")
print(X_train.head())
print("\nTesting Set:")
print(X_test.head())

# Initialize and train the Random Forest Classifier 
rf_model = RandomForestClassifier(random_state=42)  
rf_model.fit(X_train, y_train)

# Make predictions
y_pred = rf_model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

# Print evaluation metrics
print('\nModel Evaluation Metrics:')
print(f'Accuracy: {accuracy:.2f}')
print(f'Precision: {precision:.2f}')
print(f'Recall: {recall:.2f}')
print(f'F1-score: {f1:.2f}')
print(f'Confusion Matrix:\n{conf_matrix}')

   age  sex  cp  trestbps  chol  fbs  restecg  thalach  exang  oldpeak  slope  \
0   48    1   0       130   256    1        0      150      1      0.0      2   
1   61    1   0       148   203    0        1      161      0      0.0      2   
2   44    0   2       118   242    0        1      149      0      0.3      1   
3   47    1   0       110   275    0        0      118      1      1.0      1   
4   56    1   3       120   193    0        0      162      0      1.9      1   

   ca  thal  target  heart_health_score  chest_pain_level  risk_score  
0   2     3       0                 328                 0         284  
1   1     3       0                 255                 0         251  
2   1     2       1                 329                50         255  
3   1     2       0                 336                 0         314  
4   0     3       1                 279                75         207  
Training Set:
      age  sex   cp  trestbps   chol  fbs  restecg  thalach  exang 