In [1]:
# Importing necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [3]:
# Loading the dataset
data = pd.read_csv('data/heart_attack_prediction_dataset.csv')

In [4]:
# Extracting numerical values from 'Blood Pressure' column and splitting it into 'Systolic BP' and 'Diastolic BP' columns
data[['Systolic BP', 'Diastolic BP']] = data['Blood Pressure'].str.split('/', expand=True)
data.drop('Blood Pressure', axis=1, inplace=True)

In [9]:
# Dropping non-numeric columns and target column
X = data.drop(['Patient ID', 'Heart Attack Risk', 'Country', 'Continent', 'Hemisphere', 'Diet'], axis=1)
y = data['Heart Attack Risk']

In [10]:
# Splitting the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [11]:
# Defining the columns to be one-hot encoded
categorical_cols = ['Sex']

In [12]:
# Defining the preprocessor
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(), categorical_cols)
    ], remainder='passthrough'
)

In [13]:
# Transforming the features
X_train_prep = preprocessor.fit_transform(X_train)
X_test_prep = preprocessor.transform(X_test)

In [14]:
# Standardizing features by removing the mean and scaling to unit variance
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_prep)
X_test_scaled = scaler.transform(X_test_prep)

In [15]:
# Initializing SVM model
svm_model = SVC(kernel='linear', random_state=42)

In [16]:
# Training the SVM model
svm_model.fit(X_train_scaled, y_train)

In [17]:
# Predicting on the testing set
y_pred = svm_model.predict(X_test_scaled)

In [18]:
# Accuracy Calculation
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.6417569880205363


In [19]:
# Classification report
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.64      1.00      0.78      1125
           1       0.00      0.00      0.00       628

    accuracy                           0.64      1753
   macro avg       0.32      0.50      0.39      1753
weighted avg       0.41      0.64      0.50      1753



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [20]:
# Confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(conf_matrix)

Confusion Matrix:
[[1125    0]
 [ 628    0]]


In [None]:
plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues", cbar=False)
plt.xlabel("Predicted labels")
plt.ylabel("True labels")
plt.title("Confusion Matrix")
plt.show()