<a href="https://colab.research.google.com/github/RitoDas1923/ML/blob/main/Hearts.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [3]:
df = pd.read_csv('https://raw.githubusercontent.com/RitoDas1923/ML_Datasets/main/heart.csv')





# Exploratory Data Analysis and Feature Engineering


In [None]:
df.head()

In [None]:
df.info()

In [None]:
df.isna().any()

In [None]:
corr_target = df.corr()['target'][:-1].sort_values()
print(corr_target)
corr_target.plot(kind='bar')

In [None]:
sns.countplot(x=df['target'])


# Train - Test Split

In [None]:
from sklearn.model_selection import train_test_split
x = df.drop(['target','fbs'],axis = 1) # Since fbs has almost negiligible correlation value . Thus removing it increases accuracy.
y = df['target']
x_train,x_test,y_train,y_test = train_test_split(x, y, test_size = 0.3, train_size = 0.7, random_state = 42)

# Preprocessing Using Quartile Transformation

In [None]:
from sklearn import preprocessing
quantile_transformer = preprocessing.QuantileTransformer(n_quantiles=212,random_state=42)
x_train_trans = quantile_transformer.fit_transform(x_train)
x_test_trans = quantile_transformer.transform(x_test)

# Logistic Regression


In [None]:
from sklearn.linear_model import LogisticRegression
lg = LogisticRegression()
lg.fit(x_train_trans,y_train)

LogisticRegression()

In [None]:
pred_lg = lg.predict(x_test_trans)

In [None]:
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.metrics import accuracy_score
print(confusion_matrix(y_test, pred_lg))
print('\n')
print(classification_report(y_test, pred_lg))
print("Accuracy : "+str(accuracy_score(y_test,pred_lg)))
plt.plot(y_test.tolist())
plt.plot(pred_lg)
plt.show()


# K-Nearest Neighbours

In [None]:
from sklearn.neighbors import KNeighborsClassifier
KNeighborsClassifier().get_params().keys()

In [None]:
from sklearn.model_selection import GridSearchCV
knn = KNeighborsClassifier()
leaf_size = list(range(1,50))
n_neighbors = list(range(1,30))
p=[1,2]
param_grid = dict(leaf_size=leaf_size, n_neighbors=n_neighbors, p= p)
grid_search = GridSearchCV(knn,param_grid,verbose = 1, n_jobs= -1, cv=10)
grid_search.fit(x_train_trans,y_train)
knn_pred = grid_search.predict(x_test_trans)


In [None]:
print(grid_search.best_estimator_)
print(confusion_matrix(y_test, knn_pred))
print('\n')
print(classification_report(y_test, knn_pred))
print("Accuracy : ",accuracy_score(y_test,knn_pred))
plt.plot(y_test.tolist())
plt.plot(knn_pred)
plt.show()

# Decison Tree

In [None]:
from sklearn.tree import DecisionTreeClassifier
DecisionTreeClassifier().get_params().keys()

In [None]:
dtrees = DecisionTreeClassifier()
criterion = ['gini','entropy']
max_depth = list(range(1,10))
min_samples_split = list(range(2,10))
min_samples_leaf = list(range(1,5))
param_grid = dict(criterion = criterion,max_depth = max_depth,min_samples_split=min_samples_split,min_samples_leaf=min_samples_leaf)
grid_search = GridSearchCV(dtrees,param_grid,verbose = 1, n_jobs= -1, cv=10)
grid_search.fit(x_train_trans,y_train)
pred_dt = grid_search.predict(x_test_trans)

In [None]:
print(grid_search.best_estimator_)
print(confusion_matrix(y_test, pred_dt))
print('\n')
print(classification_report(y_test, pred_dt))
print("Accuracy : ",accuracy_score(y_test,pred_dt))
plt.plot(y_test.tolist())
plt.plot(pred_dt)
plt.show()


# Random Forest

In [None]:
from sklearn.ensemble import RandomForestClassifier
RandomForestClassifier().get_params().keys()

In [None]:
rf = RandomForestClassifier()
param_grid = {
    'bootstrap': [True],
    'max_depth': [80,90, 100, 110],
    'max_features': [2, 3],
    'min_samples_leaf': [3, 4, 5],
    'min_samples_split': [8, 10, 12],
    'n_estimators': [100, 200, 300]
}
grid_search = GridSearchCV(rf,param_grid,verbose=3,n_jobs=-1, cv=3)
grid_search.fit(x_train_trans,y_train)
pred_rf = grid_search.predict(x_test_trans)

In [None]:
print(grid_search.best_estimator_)
print(confusion_matrix(y_test, pred_rf))
print('\n')
print(classification_report(y_test, pred_rf))
print("Accuracy : ",accuracy_score(y_test,pred_rf))
plt.plot(y_test.tolist())
plt.plot(pred_rf)
plt.show()

# Support Vector Machine

In [None]:
from sklearn.svm import SVC
SVC().get_params().keys()

In [None]:
svm = SVC()
param_grid = {'C': [0.1, 1, 10, 100, 1000],
              'gamma': [1, 0.1, 0.01, 0.001, 0.0001],
              'kernel': ['rbf']}
grid_search = GridSearchCV(svm,param_grid,verbose = 3,n_jobs=-1,cv=10)
grid_search.fit(x_train_trans,y_train)
pred_svm = grid_search.predict(x_test_trans)


In [None]:
print(grid_search.best_estimator_)
print(confusion_matrix(y_test, pred_svm))
print('\n')
print(classification_report(y_test, pred_svm))
print("Accuracy : ",accuracy_score(y_test,pred_svm))
plt.plot(y_test.tolist())
plt.plot(pred_svm)
plt.show()

# K Means Clustering

In [None]:
from sklearn.cluster import KMeans
KMeans().get_params().keys()

dict_keys(['algorithm', 'copy_x', 'init', 'max_iter', 'n_clusters', 'n_init', 'random_state', 'tol', 'verbose'])

In [None]:
kmm = KMeans()
kmm = KMeans(n_clusters=2,random_state=42)
kmm.fit(x_test_trans)
pred_kmm = kmm.labels_

In [None]:
print(confusion_matrix(y_test, pred_kmm))
print('\n')
print(classification_report(y_test, pred_kmm))
print("Accuracy : ",accuracy_score(y_test,pred_kmm))
plt.plot(y_test.tolist())
plt.plot(pred_kmm)
plt.show()

# Artificial Neural Network (ANN)

In [None]:
import tensorflow as tf

In [None]:
ann = tf.keras.models.Sequential()
ann.add(tf.keras.layers.Dense(units=8, activation='relu'))
ann.add(tf.keras.layers.Dense(units=14, activation='relu'))
ann.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))
ann.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])

In [None]:
ann.fit(x=x_train,y=y_train,epochs=220,batch_size = 16,validation_data=(x_test_trans,y_test))
predictions=ann.predict(x_test)

final_predictions = []
for x in predictions:
  if x>0.5:
    final_predictions.append([1])
  else:
    final_predictions.append([0])

final_predictions = np.array(final_predictions)

print(confusion_matrix(y_test, final_predictions))
print('\n')
print(classification_report(y_test, final_predictions))
print("Accuracy : ",accuracy_score(y_test,final_predictions))
plt.plot(y_test.tolist())
plt.plot(final_predictions)
plt.show()

Thus from the observations we can conclude Artificial Neural Network(ANN) and K Nearest Neighbours (KNN) has the highest accuracy for this Heart Dataset

