### 1. Importing the Libraries

In [1]:
import pandas as pd

### 2. Importing the Dataset

In [2]:
data  = pd.read_csv('heart.csv')
data.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,52,1,0,125,212,0,1,168,0,1.0,2,2,3,0
1,53,1,0,140,203,1,0,155,1,3.1,0,0,3,0
2,70,1,0,145,174,0,1,125,1,2.6,0,0,3,0
3,61,1,0,148,203,0,1,161,0,0.0,2,1,3,0
4,62,0,0,138,294,1,1,106,0,1.9,1,3,2,0


### 3. Taking Care of Missing Values

In [3]:
data_dup = data.duplicated().any()
data_dup

True

In [4]:
data = data.drop_duplicates()
data

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,52,1,0,125,212,0,1,168,0,1.0,2,2,3,0
1,53,1,0,140,203,1,0,155,1,3.1,0,0,3,0
2,70,1,0,145,174,0,1,125,1,2.6,0,0,3,0
3,61,1,0,148,203,0,1,161,0,0.0,2,1,3,0
4,62,0,0,138,294,1,1,106,0,1.9,1,3,2,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
723,68,0,2,120,211,0,0,115,0,1.5,1,0,2,1
733,44,0,2,108,141,0,1,175,0,0.6,1,0,2,1
739,52,1,0,128,255,0,1,161,1,0.0,2,1,3,0
843,59,1,3,160,273,0,0,125,0,0.0,2,0,2,0


In [5]:
data_dup = data.duplicated().any()
data_dup

False

### 4. Taking Care of Duplicate Values

In [6]:
data.isnull().sum()

age         0
sex         0
cp          0
trestbps    0
chol        0
fbs         0
restecg     0
thalach     0
exang       0
oldpeak     0
slope       0
ca          0
thal        0
target      0
dtype: int64

### 5. Data Processing

In [7]:
data.columns

Index(['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach',
       'exang', 'oldpeak', 'slope', 'ca', 'thal', 'target'],
      dtype='object')

In [8]:
cate_val =[]
cont_val = []

for column in data.columns:
    if data[column].nunique() <=10:
        cate_val.append(column)
    else:
        cont_val.append(column)

In [9]:
cate_val

['sex', 'cp', 'fbs', 'restecg', 'exang', 'slope', 'ca', 'thal', 'target']

In [10]:
cont_val

['age', 'trestbps', 'chol', 'thalach', 'oldpeak']

### 6. Encoding Categorical Data

In [11]:
data['cp'].unique()

array([0, 1, 2, 3], dtype=int64)

In [12]:
cate_val

['sex', 'cp', 'fbs', 'restecg', 'exang', 'slope', 'ca', 'thal', 'target']

In [13]:
cate_val.remove('target')
cate_val.remove('sex')
data = pd.get_dummies(data, columns= cate_val,drop_first=True)

In [14]:
data.head()

Unnamed: 0,age,sex,trestbps,chol,thalach,oldpeak,target,cp_1,cp_2,cp_3,...,exang_1,slope_1,slope_2,ca_1,ca_2,ca_3,ca_4,thal_1,thal_2,thal_3
0,52,1,125,212,168,1.0,0,0,0,0,...,0,0,1,0,1,0,0,0,0,1
1,53,1,140,203,155,3.1,0,0,0,0,...,1,0,0,0,0,0,0,0,0,1
2,70,1,145,174,125,2.6,0,0,0,0,...,1,0,0,0,0,0,0,0,0,1
3,61,1,148,203,161,0.0,0,0,0,0,...,0,0,1,1,0,0,0,0,0,1
4,62,0,138,294,106,1.9,0,0,0,0,...,0,1,0,0,0,1,0,0,1,0


### 7. Feature Scaling

In [17]:
from sklearn.preprocessing import StandardScaler

In [18]:
sc = StandardScaler()
data[cont_val] = sc.fit_transform(data[cont_val])

In [19]:
data.head()

Unnamed: 0,age,sex,trestbps,chol,thalach,oldpeak,target,cp_1,cp_2,cp_3,...,exang_1,slope_1,slope_2,ca_1,ca_2,ca_3,ca_4,thal_1,thal_2,thal_3
0,-0.267966,1,-0.376556,-0.667728,0.806035,-0.037124,0,0,0,0,...,0,0,1,0,1,0,0,0,0,1
1,-0.15726,1,0.47891,-0.841918,0.237495,1.773958,0,0,0,0,...,1,0,0,0,0,0,0,0,0,1
2,1.724733,1,0.764066,-1.403197,-1.074521,1.342748,0,0,0,0,...,1,0,0,0,0,0,0,0,0,1
3,0.728383,1,0.935159,-0.841918,0.499898,-0.899544,0,0,0,0,...,0,0,1,1,0,0,0,0,0,1
4,0.839089,0,0.364848,0.919336,-1.905464,0.739054,0,0,0,0,...,0,1,0,0,0,1,0,0,1,0


### 8. Splitting The Dataset Into The Training Set And Test Set

In [20]:
X= data.drop("target",axis=1)
y= data["target"]

In [21]:
from sklearn.model_selection import train_test_split

In [22]:
X_train,X_test,y_train,y_test =train_test_split(X,y,test_size=0.2,random_state=1)

### 9. Logistic Regression

In [23]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

In [24]:
log = LogisticRegression()

In [25]:
log.fit(X_train,y_train)

y_pred1 = log.predict(X_test)
accuracy_score(y_test,y_pred1)

0.8360655737704918

### 10. SVC

In [26]:
from sklearn import svm

In [27]:
svm = svm.SVC()
svm.fit(X_train,y_train)

SVC()

In [28]:
y_pred2=svm.predict(X_test) 

In [29]:
accuracy_score(y_test,y_pred2)

0.8032786885245902

### 11. KNeighbors Classifier

In [30]:
from sklearn.neighbors import KNeighborsClassifier

In [31]:
knn = KNeighborsClassifier()

In [32]:
knn.fit(X_train,y_train)

KNeighborsClassifier()

In [33]:
y_pred3 = knn.predict(X_test)

In [34]:
accuracy_score(y_test,y_pred3)

0.7868852459016393

### Non-Linear ML Algorithms

In [52]:
data = pd.read_csv('heart.csv')

In [53]:
data = data.drop_duplicates()

In [54]:
data.shape

(302, 14)

In [55]:
data.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,52,1,0,125,212,0,1,168,0,1.0,2,2,3,0
1,53,1,0,140,203,1,0,155,1,3.1,0,0,3,0
2,70,1,0,145,174,0,1,125,1,2.6,0,0,3,0
3,61,1,0,148,203,0,1,161,0,0.0,2,1,3,0
4,62,0,0,138,294,1,1,106,0,1.9,1,3,2,0


In [56]:
X = data.drop("target",axis=1)
y = data["target"]

In [57]:
X_train,X_test,y_train,y_test =train_test_split(X,y,test_size=0.2,random_state=42)

### 12. Decision Tree Classifier

In [58]:
from sklearn.tree import DecisionTreeClassifier

In [59]:
dt =DecisionTreeClassifier()

In [60]:
dt.fit(X_train,y_train)

DecisionTreeClassifier()

In [61]:
y_pred4 =dt.predict(X_test)

In [62]:
accuracy_score(y_test,y_pred3)

0.5245901639344263

### 13. Random Forest Classifier

In [63]:
from sklearn.ensemble import RandomForestClassifier

rf = RandomForestClassifier()
rf.fit(X_train,y_train)

y_pred5 =rf.predict(X_test)
accuracy_score(y_test,y_pred5)

0.8524590163934426

### 14. Gradient Boosting Classifier

In [64]:
from sklearn.ensemble import GradientBoostingClassifier

In [65]:
gdc = GradientBoostingClassifier()

In [66]:
gdc.fit(X_train,y_train)

GradientBoostingClassifier()

In [67]:
y_pred6 = gdc.predict(X_test)

In [68]:
accuracy_score(y_test,y_pred6)

0.8032786885245902

### 15. Prediction on New Data

In [69]:
import pandas as pd

In [None]:
new_data = pd.DataFrame({
    'age':52,
    'sex':1,
    'cp':0,
    'trestbps':125,
    'chol':212,
    'fbs':0,
    'restecg':1,
    'thalach':168,
    'exang':0,
    'oldpeak':1.0,
     'slope':2,
    'ca':2,
    'thal':3,    
},index=[0])

In [None]:
new_data

### 16. Save Model Usign Joblib

### GUI