In [3]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler,StandardScaler

from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

%matplotlib inline

In [None]:
df = pd.read_csv('heart.csv')
df

In [None]:
df.info()

In [None]:
df.describe()

In [None]:
df.shape

In [None]:
df.dtypes

In [None]:
df['sex'] = df['sex'].astype('object')
df['cp'] = df['cp'].astype('object')
df['fbs'] = df['fbs'].astype('object') 
df['restecg'] = df['restecg'].astype('object') 
df['exang'] = df['exang'].astype('object')
df['slope'] = df['slope'].astype('object')
df['ca'] = df['ca'].astype('object')
df['thal'] = df['thal'].astype('object') 
df.dtypes

In [None]:
df['target'] = df.target.replace({1: "Disease", 0: "No_disease"}) 
df['sex'] = df.sex.replace({1: "Male", 0: "Female"})
df['cp'] = df.cp.replace({0: "typical_angina",1: "atypical_angina", 
                          2:"non-anginal pain", 3: "asymtomatic"})
df['exang'] = df.exang.replace({1: "Yes", 0: "No"})
df['fbs'] = df.fbs.replace({1: "True", 0: "False"}) 
df['slope'] = df.slope.replace({0: "upsloping", 1: "flat",
                                2:"downsloping"})
df['thal'] = df.thal.replace({1: "fixed_defect", 2: "reversable_defect",
                              3:"normal"})



In [None]:
df.describe()

In [None]:
sns.boxplot(x='target', y='oldpeak', data=df)

In [None]:
continous_features = ['age','trestbps','chol','thalach','oldpeak'] 
def outliers(df_out, drop = False):
    for each_feature in df_out.columns: 
        feature_data = df_out[each_feature]
        Q1 = np.percentile(feature_data, 25.) 
        Q3 = np.percentile(feature_data, 75.) 
        IQR = Q3-Q1 
        outlier_step = IQR * 1.5 
 
        outliers=feature_data[~((feature_data >= Q1 - outlier_step)
                        &(feature_data<=Q3+outlier_step))].index.tolist()
        if not drop:
            print('For the feature {}, No of Outliers is{}'.format(each_feature,
                                                                   len(outliers))) 
        if drop:
            df.drop(outliers, inplace = True, errors = 'ignore') 
            print('Outliers from {} feature removed'.format(each_feature))

outliers(df[continous_features])


In [None]:
outliers(df[continous_features],drop=True) 

In [None]:
duplicated=df.duplicated().sum()
if duplicated:
    print("Duplicated rows :{}".format(duplicated)) 
else:
    print("No duplicates")


In [None]:
duplicates=df[df.duplicated(keep=False)] 
duplicates.head()

In [None]:
df.drop_duplicates()

In [None]:
df['ca'].unique()

In [None]:
df['thal'].unique()

In [None]:
df['thal'].replace({"fixed_defect":1, "reversable_defect":2, "normal":3},
                   inplace= True)

In [None]:
df[df['ca']==4]

In [None]:
df.loc[df['ca']==4,'ca']=np.NaN
df.loc[df['thal']==0,'thal']=np.NaN

In [None]:
df.isna().sum()

In [None]:
df = df.fillna(df.median()) 
df.isnull().sum()

In [None]:
df.info()

In [None]:
df['target'] = df.target.replace({"Disease":1,"No_disease":0}) 
df['sex'] = df.sex.replace({"Male":1,"Female":0})
df['cp'] = df.cp.replace({"typical_angina":0,"atypical_angina":1,
                          "non-anginal pain":2, "asymtomatic":3})

df['exang'] = df.exang.replace({"Yes":1,"No":0})
df['fbs'] = df.fbs.replace({"True":1,"False":0}) 
df['slope'] = df.slope.replace({"upsloping":0,"flat":1,"downsloping":2})
df['thal'] = df.thal.replace({"fixed_defect":1, "reversable_defect":2, 
                              "normal":3})

In [None]:
df.info()

In [None]:
df['target'].value_counts()

In [None]:
df.head()

In [None]:
x = df.drop('target',axis = 1)
y = df['target']

In [None]:
normal_scaler = MinMaxScaler(feature_range = (0,1))
x_scaled = normal_scaler.fit_transform(x)
x_scaled_df = pd.DataFrame(x_scaled, columns = x.columns)
x_scaled_df

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x_scaled_df,y,
                    test_size = 0.2,random_state = 1,stratify = y)

In [None]:
log_reg = LogisticRegression()

In [None]:
log_reg.fit(x_train, y_train)

In [None]:
# prediction
y_pred = log_reg.predict(x_test)

In [None]:
y_pred[0:5]

In [None]:
y_test[0:5]

In [None]:
accuracy_score(y_test,y_pred)

In [None]:
confusion_matrix(y_test,y_pred)

In [None]:
clf_report = classification_report(y_test,y_pred)
print(clf_report)