## HEART DISEASE PREDICTION-SVM

In [2]:
#Importing the libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

In [3]:
#Loading the data
data=pd.read_csv('heart.csv')
data=pd.DataFrame(data)
data

Unnamed: 0,Age,Sex,ChestPainType,RestingBP,Cholesterol,FastingBS,RestingECG,MaxHR,ExerciseAngina,Oldpeak,ST_Slope,HeartDisease
0,40,M,ATA,140,289,0,Normal,172,N,0.0,Up,0
1,49,F,NAP,160,180,0,Normal,156,N,1.0,Flat,1
2,37,M,ATA,130,283,0,ST,98,N,0.0,Up,0
3,48,F,ASY,138,214,0,Normal,108,Y,1.5,Flat,1
4,54,M,NAP,150,195,0,Normal,122,N,0.0,Up,0
...,...,...,...,...,...,...,...,...,...,...,...,...
913,45,M,TA,110,264,0,Normal,132,N,1.2,Flat,1
914,68,M,ASY,144,193,1,Normal,141,N,3.4,Flat,1
915,57,M,ASY,130,131,0,Normal,115,Y,1.2,Flat,1
916,57,F,ATA,130,236,0,LVH,174,N,0.0,Flat,1


# Data cleanliness check


In [4]:
data.isnull().any()

Age               False
Sex               False
ChestPainType     False
RestingBP         False
Cholesterol       False
FastingBS         False
RestingECG        False
MaxHR             False
ExerciseAngina    False
Oldpeak           False
ST_Slope          False
HeartDisease      False
dtype: bool

No null values!

In [5]:
# checking duplicated values
data.duplicated().any()

False

No duplicate values!

In [6]:
# Now lets get some information and description of the data
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 918 entries, 0 to 917
Data columns (total 12 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Age             918 non-null    int64  
 1   Sex             918 non-null    object 
 2   ChestPainType   918 non-null    object 
 3   RestingBP       918 non-null    int64  
 4   Cholesterol     918 non-null    int64  
 5   FastingBS       918 non-null    int64  
 6   RestingECG      918 non-null    object 
 7   MaxHR           918 non-null    int64  
 8   ExerciseAngina  918 non-null    object 
 9   Oldpeak         918 non-null    float64
 10  ST_Slope        918 non-null    object 
 11  HeartDisease    918 non-null    int64  
dtypes: float64(1), int64(6), object(5)
memory usage: 86.2+ KB


### Descriptive statistics

In [7]:
data.describe()

Unnamed: 0,Age,RestingBP,Cholesterol,FastingBS,MaxHR,Oldpeak,HeartDisease
count,918.0,918.0,918.0,918.0,918.0,918.0,918.0
mean,53.510893,132.396514,198.799564,0.233115,136.809368,0.887364,0.553377
std,9.432617,18.514154,109.384145,0.423046,25.460334,1.06657,0.497414
min,28.0,0.0,0.0,0.0,60.0,-2.6,0.0
25%,47.0,120.0,173.25,0.0,120.0,0.0,0.0
50%,54.0,130.0,223.0,0.0,138.0,0.6,1.0
75%,60.0,140.0,267.0,0.0,156.0,1.5,1.0
max,77.0,200.0,603.0,1.0,202.0,6.2,1.0


In [9]:
data.groupby('Sex').mean()

  data.groupby('Sex').mean()


Unnamed: 0_level_0,Age,RestingBP,Cholesterol,FastingBS,MaxHR,Oldpeak,HeartDisease
Sex,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
F,52.492228,132.212435,241.196891,0.134715,146.139896,0.668912,0.259067
M,53.782069,132.445517,187.513103,0.25931,134.325517,0.945517,0.631724


### Handling categorical variables



In [11]:
# Encoding categorical data
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
labelencoder=LabelEncoder()
data['ChestPainType']=labelencoder.fit_transform(data['ChestPainType'])
data['Sex']=labelencoder.fit_transform(data['Sex'])
data['RestingECG']=labelencoder.fit_transform(data['RestingECG'])
data['ExerciseAngina']=labelencoder.fit_transform(data['ExerciseAngina'])
data['ST_Slope']=labelencoder.fit_transform(data['ST_Slope'])




### Separating the dataset into dependent and independent variables

In [13]:
# separating the data
x=data.iloc[:,:-1] (independent variables)
x
y=data.iloc[:,-1] (dependent variable)
y

0      0
1      1
2      0
3      1
4      0
      ..
913    1
914    1
915    1
916    1
917    0
Name: HeartDisease, Length: 918, dtype: int64

In [30]:
x

Unnamed: 0,Age,Sex,ChestPainType,RestingBP,Cholesterol,FastingBS,RestingECG,MaxHR,ExerciseAngina,Oldpeak,ST_Slope
0,40,1,1,140,289,0,1,172,0,0.0,2
1,49,0,2,160,180,0,1,156,0,1.0,1
2,37,1,1,130,283,0,2,98,0,0.0,2
3,48,0,0,138,214,0,1,108,1,1.5,1
4,54,1,2,150,195,0,1,122,0,0.0,2
...,...,...,...,...,...,...,...,...,...,...,...
913,45,1,3,110,264,0,1,132,0,1.2,1
914,68,1,0,144,193,1,1,141,0,3.4,1
915,57,1,0,130,131,0,1,115,1,1.2,1
916,57,0,1,130,236,0,0,174,0,0.0,1


### Standardizing the data (settingng the values in a range of (0-1))

In [31]:
from sklearn.preprocessing import StandardScaler
standardscaler=StandardScaler()
x=standardscaler.fit_transform(x)

### Splitting up the data

In [49]:
# splitting the data into train set and test set
from sklearn.model_selection import train_test_split
x_test,x_train,y_test,y_train=train_test_split(x,y,test_size=0.2,random_state=0)


### setting the svm model

In [33]:
# setting up the model
from sklearn.svm import SVC 
model=SVC()
model.fit(x_train,y_train)

### Making predictions on the unknown data(test set)

In [34]:
#making the predictions
x_test_predictions=model.predict(x_test)
x_test_predictions




array([1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0,
       0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1,
       1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0,
       1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0,
       1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0,
       0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1,
       0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0,
       1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1,
       1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
       1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1,
       1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1,
       0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1,
       1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0,
       0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1,

### Evaluating model performance

In [35]:
#calculating accuracy score of the model on test set
from sklearn.metrics import accuracy_score
accuracy_score(x_test_predictions,y_test)

0.8487738419618529

In [36]:
#calculating accuracy score of the model on training set
from sklearn.metrics import accuracy_score
x_train_predictions=model.predict(x_train)
accuracy_score(x_train_predictions,y_train)

0.8967391304347826

### Creating a prediction system

In [45]:
# here we'll just pick a person's information randomly from our data set
input_data=(40,1,1,140,289,0,1,172,0,0,2)

# making input data as an array
input_data_as_array=np.asarray(input_data)

#reshaping input data
input_data_reshaped=input_data_as_array.reshape(1,-1)

#standardize input data
std_data=standardscaler.transform(input_data_reshaped)

#make predictions
predictions=model.predict(std_data)
print(predictions)

if (predictions[0]==0):
    print("A person has heart disease")
else:
    print("A person does not have heart disease")

[0]
A person has heart disease




In [46]:
#More predictions
input_data=(68,1,0,144,193,1,1,141,0,3.4,1)

# making input data as an array
input_data_as_array=np.asarray(input_data)

#reshaping input data
input_data_reshaped=input_data_as_array.reshape(1,-1)

#standardize input data
std_data=standardscaler.transform(input_data_reshaped)

#make predictions
predictions=model.predict(std_data)
print(predictions)

if (predictions[0]==0):
    print("A person has heart disease")
else:
    print("A person does not have heart disease")

[1]
A person does not have heart disease




In [51]:
input_data=(60,0,1,100,93,1,0,120,0,3.4,2) (random made data from nowhere )

# making input data as an array
input_data_as_array=np.asarray(input_data)

#reshaping input data
input_data_reshaped=input_data_as_array.reshape(1,-1)

#standardize input data
std_data=standardscaler.transform(input_data_reshaped)

#make predictions
predictions=model.predict(std_data)
print(predictions)

if (predictions[0]==0):
    print("A person has heart disease")
else:
    print("A person does not have heart disease")

[1]
A person does not have heart disease


