In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler
from sklearn.metrics import accuracy_score, classification_report

from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier as DTC
from sklearn.ensemble import RandomForestClassifier as RFC
from sklearn.neighbors import KNeighborsClassifier as KNN
import xgboost as XGB

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/heartbeat/ptbdb_abnormal.csv
/kaggle/input/heartbeat/ptbdb_normal.csv
/kaggle/input/heartbeat/mitbih_test.csv
/kaggle/input/heartbeat/mitbih_train.csv


In [2]:
np.set_printoptions(precision=4)

In [3]:
ptbdb_normal= pd.read_csv('/kaggle/input/heartbeat/ptbdb_normal.csv', header=None)
ptbdb_abnormal=pd.read_csv('/kaggle/input/heartbeat/ptbdb_abnormal.csv',header=None)

ptbdb_combined = pd.concat([ptbdb_normal, ptbdb_abnormal], ignore_index=True, axis=0)

#Reshuffle the whole new dataframe
ptbdb_combined_shuffled = ptbdb_combined.sample(frac=1, random_state=42)

#Generate Test and Train datasets
X = ptbdb_combined_shuffled.iloc[:, :-1] #All values except the last column
y = ptbdb_combined_shuffled.iloc[:, -1] #All values from the last column

train, test, train_target, test_target = train_test_split(X, y, test_size=0.20, random_state=42)

In [4]:
class Config:
    oversample = True
    undersample = False

In [5]:
oversampler = SMOTE()
undersampler = RandomUnderSampler()

In [6]:
if Config.oversample:
    train, train_target = oversampler.fit_resample(train, train_target)
elif Config.undersample:
    train, train_target = undersampler.fit_resample(train, train_target)
else: 
    print("Using the original mitbih dataset")

In [7]:
train.shape

(16800, 187)

# **SVM**

In [8]:
model = SVC(cache_size=500)

In [9]:
model.fit(train,train_target)

In [10]:
#model.score(train,train_target)

In [11]:
predictions = model.predict(test)

In [12]:
report=classification_report(test_target, predictions, digits=4)
print(report)

              precision    recall  f1-score   support

         0.0     0.7714    0.9304    0.8435       805
         1.0     0.9711    0.8946    0.9313      2106

    accuracy                         0.9045      2911
   macro avg     0.8713    0.9125    0.8874      2911
weighted avg     0.9159    0.9045    0.9070      2911



# **KNN**

In [13]:
model = KNN(n_jobs = -1)

In [14]:
model.fit(train,train_target)

In [15]:
model.score(train,train_target)

0.967202380952381

In [16]:
predictions = model.predict(test)

In [17]:
report=classification_report(test_target, predictions, digits=4)
print(report)

              precision    recall  f1-score   support

         0.0     0.7929    0.9652    0.8706       805
         1.0     0.9855    0.9036    0.9428      2106

    accuracy                         0.9206      2911
   macro avg     0.8892    0.9344    0.9067      2911
weighted avg     0.9322    0.9206    0.9228      2911



# **Decision Tree**

In [18]:
model = DTC()

In [19]:
model.fit(train,train_target)

In [20]:
predictions = model.predict(test)

In [21]:
report=classification_report(test_target, predictions, digits=4)
print(report)

              precision    recall  f1-score   support

         0.0     0.8389    0.8795    0.8587       805
         1.0     0.9531    0.9354    0.9442      2106

    accuracy                         0.9200      2911
   macro avg     0.8960    0.9075    0.9014      2911
weighted avg     0.9215    0.9200    0.9205      2911



# **Random Forest**

In [22]:
model = RFC(n_jobs = -1)

In [23]:
model.fit(train,train_target)

In [24]:
model.score(train,train_target)

1.0

In [25]:
predictions = model.predict(test)

In [26]:
report=classification_report(test_target, predictions, digits=4)
print(report)

              precision    recall  f1-score   support

         0.0     0.9599    0.9528    0.9564       805
         1.0     0.9820    0.9848    0.9834      2106

    accuracy                         0.9760      2911
   macro avg     0.9710    0.9688    0.9699      2911
weighted avg     0.9759    0.9760    0.9759      2911



# **XGBoost**

In [27]:
model = XGB.XGBClassifier(objective='binary:logistic')

In [28]:
model.fit(train,train_target)

In [29]:
model.score(train,train_target)

1.0

In [30]:
predictions = model.predict(test)

In [31]:
report=classification_report(test_target, predictions, digits=4)
print(report)

              precision    recall  f1-score   support

         0.0     0.9577    0.9565    0.9571       805
         1.0     0.9834    0.9839    0.9836      2106

    accuracy                         0.9763      2911
   macro avg     0.9706    0.9702    0.9704      2911
weighted avg     0.9763    0.9763    0.9763      2911

