In [6]:
pip install scikit-learn

Note: you may need to restart the kernel to use updated packages.


In [10]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score
import pickle

# Load the dataset
file_path = 'heart.csv'
heart_data = pd.read_csv(file_path)

# Encode categorical variables
label_encoders = {}
categorical_columns = ['Sex', 'ChestPainType', 'RestingECG', 'ExerciseAngina', 'ST_Slope']

for column in categorical_columns:
    le = LabelEncoder()
    heart_data[column] = le.fit_transform(heart_data[column])
    label_encoders[column] = le

# Separate features and target
X = heart_data.drop(columns=['HeartDisease'])
y = heart_data['HeartDisease']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train_scaled = pd.DataFrame(scaler.fit_transform(X_train), columns=X_train.columns)
X_test_scaled = pd.DataFrame(scaler.transform(X_test), columns=X_test.columns)

# Build and train the SVM model
svm_model = SVC(random_state=42)
svm_model.fit(X_train_scaled, y_train)

# Save the model and scaler to disk
filename = 'final_svm_model.sav'
scaler_filename = 'scaler.sav'
pickle.dump(svm_model, open(filename, 'wb'))
pickle.dump(scaler, open(scaler_filename, 'wb'))

# Load the model and scaler from disk
loaded_model = pickle.load(open(filename, 'rb'))
loaded_scaler = pickle.load(open(scaler_filename, 'rb'))

# Transform the test data using the loaded scaler and maintain column names
X_test_scaled_loaded = pd.DataFrame(loaded_scaler.transform(X_test), columns=X_test.columns)

# Evaluate the loaded model
result = loaded_model.score(X_test_scaled_loaded, y_test)
print(f"{result*100:.2f}% Accuracy")

# Make predictions
y_pred = loaded_model.predict(X_test_scaled_loaded)

# Print classification report
report = classification_report(y_test, y_pred)
print("Classification Report:")
print(report)


86.41% Accuracy
Classification Report:
              precision    recall  f1-score   support

           0       0.82      0.86      0.84        77
           1       0.89      0.87      0.88       107

    accuracy                           0.86       184
   macro avg       0.86      0.86      0.86       184
weighted avg       0.87      0.86      0.86       184

