# Heart Disease analysis

### Importing the Dependencies

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

: 

### Data collection

In [None]:
#loading the csv data to a pandas DataFrame
heart_data = pd.read_csv('heart.csv')

## data Analysis

In [None]:
#print first five rows of the dataset
heart_data.head()

In [None]:
#last 5 rows of the dataset
heart_data.tail()

In [None]:
#no. of row and columns in the data set
heart_data.shape

In [None]:
#getting some information about the dataset
heart_data.info()

In [None]:
#checking for missing values
heart_data.isnull().sum()

In [None]:
#finding the duplicate values
heart_data.duplicated().any()

In [None]:
# drop duplicate values
heart_data=heart_data.drop_duplicates()
heart_data

In [None]:
# getting some statistical measure of the data
heart_data.describe()

### Exploring Relations: Heatmaps with python for data visualization

In [None]:
sns.heatmap(heart_data.corr())

In [None]:
sns.countplot(heart_data)

In [None]:
#cheaking the distibution of target variable
heart_data['target'].value_counts()

##### 1--> Defective heart
##### 0--> Healthy heart

 ### Data preprocess

In [None]:
cate_val=[]
num_val=[]
for column in heart_data.columns:
    if heart_data[column].nunique()<=10:
        cate_val.append(column)
    else:
        num_val.append(column)

In [None]:
cate_val

In [None]:
num_val

### Encoding Categorical Data

In [None]:
# check unique value
heart_data['cp'].unique()

In [None]:
#cate_val.remove('sex')
#cate_val.remove('target')
#heart_data

### Feature Scaling

In [None]:
heart_data.head()

In [None]:
from sklearn.preprocessing import StandardScaler
st = StandardScaler()
heart_data[num_val] = st.fit_transform(heart_data[num_val])

In [None]:
heart_data.head()

### Splitting the features and target

In [None]:
X=heart_data.drop(columns='target',axis=1)
Y=heart_data['target']

In [None]:
X

In [None]:
Y

### Splitting the data into train data and test data

In [None]:
X_train,X_test,Y_train,Y_test=train_test_split(X,Y,test_size=0.2,stratify=Y,random_state=42)
print(X.shape,X_train.shape,X_test.shape)

## Model Training
### Logistic Regression

In [None]:
from sklearn.linear_model import LogisticRegression
#lr=LogisticRegression()#max_iter=1000
#training the LogisticRegression model with Training data
lr = LogisticRegression(max_iter=1000, solver='lbfgs', C=1.0, random_state=42)
lr.fit(X_train, Y_train)

In [None]:
Y_pred1=lr.predict(X_test)

In [None]:
from sklearn.metrics import accuracy_score
accuracy_score(Y_test,Y_pred1)

### SVC

In [None]:
from sklearn import svm
svm=svm.SVC()
svm.fit(X_train,Y_train)

In [None]:
Y_pred2=svm.predict(X_test)
accuracy_score(Y_test,Y_pred2)

### KNeighbors Classifier

In [None]:
from sklearn.neighbors import KNeighborsClassifier
knn=KNeighborsClassifier(n_neighbors=2)
knn.fit(X_train,Y_train)

In [None]:
Y_pred3=knn.predict(X_test)
accuracy_score(Y_test,Y_pred3)

### Non-Linear ML Algorithms

In [None]:
data=pd.read_csv('heart.csv')
data.head()

In [None]:
data=data.drop_duplicates()
data.shape

In [None]:
X=data.drop('target',axis=1)
Y=data['target']

In [None]:
X_train,X_test,Y_train,Y_test=train_test_split(X,Y,test_size=0.2,stratify=Y,random_state=42)
print(X.shape,X_train.shape,X_test.shape)

### DecisionTreeClassifier

In [None]:
from sklearn.tree import DecisionTreeClassifier
dt=DecisionTreeClassifier()
dt.fit(X_train,Y_train)

In [None]:
Y_pred4=dt.predict(X_test)
accuracy_score(Y_test,Y_pred4)

### RandomForestClassifier

In [None]:
from sklearn.ensemble import RandomForestClassifier
rf=RandomForestClassifier()
rf.fit(X_train,Y_train)

In [None]:
Y_pred5=rf.predict(X_test)
accuracy_score(Y_test,Y_pred5)

In [None]:
#accuracy check
final_data = pd.DataFrame({
    'Models': ['lr', 'svm', 'knn', 'dt','rf'],
    'ACC': [
        accuracy_score(Y_test, Y_pred1),
        accuracy_score(Y_test, Y_pred2),
        accuracy_score(Y_test, Y_pred3),
        accuracy_score(Y_test, Y_pred4),
        accuracy_score(Y_test, Y_pred4)
    ]
})
final_data

In [None]:
sns.barplot(x=final_data['Models'], y=final_data['ACC'])

In [None]:
X=data.drop('target',axis=1)
Y=data['target']
X.shape

In [None]:
lr=LogisticRegression(max_iter=1000)
#training the LogisticRegression model with Training data
lr.fit(X_train,Y_train)

# Model Evaluation

### Prediction on new Data

In [130]:
new_data=pd.DataFrame({
    'age':52,
    'sex':1,
    'cp':0,
    'trestbps':125,
    'chol':212,
    'fbs':0,
    'restecg':1,
    'thalach':168,
    'exang':0,
    'oldpeak':1.0,
    'slope':2,
    'ca':2,
    'thal':3
},index=[0])
new_data


Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal
0,52,1,0,125,212,0,1,168,0,1.0,2,2,3


In [131]:
prediction=lr.predict(new_data)
if (prediction==0):
  print("The Person does not have a Heart Disease")
else:
  print("The Person has Heart Disease")

The Person does not have a Heart Disease
