In [None]:
# Machine Learning for Cardiovascular Data | SVM MODEL | 

In [1]:
# Import Dependencies 

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

In [2]:
# import CSV dataset

df= pd.read_csv("cardio_df_ultimate.csv")

# remove extra index column

df= df.drop(columns=["Unnamed: 0"], axis=1)
df.head(3)

Unnamed: 0,age_years,gender,height,weight,systolic_bp,diastolic_bp,cholesterol,gluc,smoke,alco,active,cardio,BMI
0,50,2,168,62.0,110,80,1,1,0,0,1,0,21.97
1,55,1,156,85.0,140,90,3,1,0,0,1,1,34.93
2,51,1,165,64.0,130,70,3,1,0,0,0,1,23.51


In [3]:
# SVM | Support Vector |
# Target- Cardio 

y = df["cardio"]
X= df.drop(columns="cardio")

In [4]:
# Using the train_test_split function to create the train and test sets

X_train, X_test, y_train, y_test = train_test_split(X,
                                                   y,
                                                   random_state=13,
                                                   stratify=y)
X_train.shape # 48234,12 (12 columns)

(48234, 12)

In [5]:
X_test.shape # 16078,12

(16078, 12)

In [6]:
# Creating the SVM Model 
model= SVC(kernel="linear")

In [7]:
# Fit the data with the model
model.fit(X_train, y_train)

SVC(kernel='linear')

In [8]:
# Make predictions utilizing the test data
y_prediction = model.predict(X_test)
results = pd.DataFrame({
    "Prediction" : y_prediction,
    "Actual" : y_test
}).reset_index(drop=True)
results.head()

Unnamed: 0,Prediction,Actual
0,0,0
1,1,1
2,0,1
3,1,1
4,0,0


In [9]:
# Accuracy Score - .723

acc_score = accuracy_score(y_test, y_prediction)

print("The Accuracy Score is : ", acc_score)

The Accuracy Score is :  0.7237218559522328


In [11]:
# Confusion matrix 

cm = confusion_matrix(y_test, y_prediction)

# CM DataFrame

cm_df = pd.DataFrame(cm,
                    index=["No CVD (0)","Has CVD (1)"], columns=["Predicted No CVD (0)", "Predicted CVD (1)"])
cm_df

Unnamed: 0,Predicted No CVD (0),Predicted CVD (1)
No CVD (0),6278,1767
Has CVD (1),2675,5358


In [12]:
# CM Information

Total = (6278 + 1767 + 2675 + 5358)

print("n = ", Total)

# n = 16,078

# TN = 6,278 | .390
# FP = 1,767 | .109
# FN = 2,675 | .166
# TP = 5,358 | .333

n =  16078


In [13]:
# Generate the classification report | SVM MODEL | 
print(classification_report(y_test, y_prediction))

              precision    recall  f1-score   support

           0       0.70      0.78      0.74      8045
           1       0.75      0.67      0.71      8033

    accuracy                           0.72     16078
   macro avg       0.73      0.72      0.72     16078
weighted avg       0.73      0.72      0.72     16078



In [15]:

print("SVM Confusion Martix")
display(cm_df)

print(f"Accuracy Score :  {acc_score}")
print("Classification Report")

print(classification_report(y_test, y_prediction))

SVM Confusion Martix


Unnamed: 0,Predicted No CVD (0),Predicted CVD (1)
No CVD (0),6278,1767
Has CVD (1),2675,5358


Accuracy Score :  0.7237218559522328
Classification Report
              precision    recall  f1-score   support

           0       0.70      0.78      0.74      8045
           1       0.75      0.67      0.71      8033

    accuracy                           0.72     16078
   macro avg       0.73      0.72      0.72     16078
weighted avg       0.73      0.72      0.72     16078

