# Import Required Libraries
Import the necessary libraries, including pandas, scikit-learn, and joblib.

In [49]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import StackingClassifier
from sklearn.metrics import accuracy_score
import joblib

# Load the Dataset
Load the Heart Disease UCI Dataset using pandas.

In [50]:
# Load the dataset
data = pd.read_csv('Heart Disease UCI Dataset.csv')

# Display the first few rows of the dataset
data.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1


# Preprocess the Data
Handle missing values, encode categorical variables, and normalize the data if necessary.

In [51]:
# Handle missing values by replacing '?' with NaN and then dropping rows with NaN values
data.replace('?', pd.NA, inplace=True)
data.dropna(inplace=True)

# Encode categorical variables
data['sex'] = data['sex'].astype(int)
data['cp'] = data['cp'].astype(int)
data['fbs'] = data['fbs'].astype(int)
data['restecg'] = data['restecg'].astype(int)
data['exang'] = data['exang'].astype(int)
data['slope'] = data['slope'].astype(int)
data['ca'] = pd.to_numeric(data['ca'], errors='coerce').dropna().astype(int)
data['thal'] = pd.to_numeric(data['thal'], errors='coerce').dropna().astype(int)

# Separate features and target variable
X = data.drop('target', axis=1)
y = data['target']

# Normalize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the Data into Training and Testing Sets
Use train_test_split from scikit-learn to split the data into training and testing sets.

In [52]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Train the SVC Model
Initialize and train the SVC model using the training data.

In [None]:
import os

# Define the base models
base_models = [
    ('svc', SVC(probability=True)),
    ('lr', LogisticRegression())
]

# Define the meta-model
meta_model = LogisticRegression()

# Create the stacking classifier
stacking_clf = StackingClassifier(estimators=base_models, final_estimator=meta_model)

# Train the stacking classifier
stacking_clf.fit(X_train, y_train)

# Evaluate the model
y_pred = stacking_clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Stacking Model Accuracy: {accuracy}")

# Ensure the directory exists
os.makedirs('models', exist_ok=True)

# Save the model
joblib.dump(stacking_clf, 'models/stacking_model.pkl')

Stacking Model Accuracy: 0.8852459016393442


['model/stacking_model.pkl']

# Evaluate the Model
Calculate the accuracy of the model using the testing data.

In [54]:
# Evaluate the model
y_pred = stacking_clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Stacking Model Accuracy: {accuracy}")

Stacking Model Accuracy: 0.8852459016393442


# Save the Model
Save the trained model to a folder using joblib.

In [None]:
# Save the model
joblib.dump(stacking_clf, 'models/stacking_model.pkl')

['model/stacking_model.pkl']