# **Loading Dataset as Dataframe using Pandas**

In [1]:
import pandas as pd
import numpy as np
import sklearn

volunteer = pd.read_csv('./heart disease classification dataset.csv')
volunteer.head()

Unnamed: 0.1,Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,0,63,male,3,145.0,233.0,1,0,150.0,0,2.3,0,0,1,yes
1,1,37,male,2,130.0,250.0,0,1,187.0,0,3.5,0,0,2,yes
2,2,41,female,1,130.0,204.0,0,0,172.0,0,1.4,2,0,2,yes
3,3,56,male,1,120.0,236.0,0,1,178.0,0,0.8,2,0,2,yes
4,4,57,female,0,,354.0,0,1,163.0,1,0.6,2,0,2,yes


# **Handling missing values**

In [2]:
volunteer.shape

(303, 15)

In [3]:
volunteer.isnull().sum()

Unnamed: 0    0
age           0
sex           0
cp            0
trestbps      4
chol          1
fbs           0
restecg       0
thalach       5
exang         0
oldpeak       0
slope         0
ca            0
thal          0
target        0
dtype: int64

In [4]:
volunteer=volunteer.dropna(axis=0,subset=['chol'])
volunteer.shape

(302, 15)

In [5]:
from sklearn.impute import SimpleImputer

impute = SimpleImputer(missing_values=np.nan, strategy='mean')

impute.fit(volunteer[['trestbps']])

volunteer['trestbps'] = impute.transform(volunteer[['trestbps']])

In [6]:
from sklearn.impute import SimpleImputer

impute = SimpleImputer(missing_values=np.nan, strategy='mean')

impute.fit(volunteer[['thalach']])

volunteer['thalach'] = impute.transform(volunteer[['thalach']])

In [7]:
volunteer.isnull().sum()

Unnamed: 0    0
age           0
sex           0
cp            0
trestbps      0
chol          0
fbs           0
restecg       0
thalach       0
exang         0
oldpeak       0
slope         0
ca            0
thal          0
target        0
dtype: int64

# **Encoding Categorical Features**

In [8]:
volunteer.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 302 entries, 0 to 302
Data columns (total 15 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Unnamed: 0  302 non-null    int64  
 1   age         302 non-null    int64  
 2   sex         302 non-null    object 
 3   cp          302 non-null    int64  
 4   trestbps    302 non-null    float64
 5   chol        302 non-null    float64
 6   fbs         302 non-null    int64  
 7   restecg     302 non-null    int64  
 8   thalach     302 non-null    float64
 9   exang       302 non-null    int64  
 10  oldpeak     302 non-null    float64
 11  slope       302 non-null    int64  
 12  ca          302 non-null    int64  
 13  thal        302 non-null    int64  
 14  target      302 non-null    object 
dtypes: float64(4), int64(9), object(2)
memory usage: 37.8+ KB


In [9]:
volunteer['sex'].unique()

array(['male', 'female'], dtype=object)

In [10]:
from sklearn.preprocessing import LabelEncoder

enc=LabelEncoder()

volunteer['sex']=enc.fit_transform(volunteer['sex'])

print(volunteer[['sex']].head())

   sex
0    1
1    1
2    0
3    1
4    0


In [11]:
volunteer['target'].unique()

array(['yes', 'no'], dtype=object)

In [12]:
from sklearn.preprocessing import LabelEncoder

enc=LabelEncoder()

volunteer['target']=enc.fit_transform(volunteer['target'])

print(volunteer[['target']].head())

   target
0       1
1       1
2       1
3       1
4       1


# **Scaling**

In [13]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler

scale=MinMaxScaler()
scale.fit(volunteer)

MinMaxScaler()

In [14]:
volunteer=scale.transform(volunteer)

In [15]:
volunteer=pd.DataFrame(volunteer)
volunteer

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
0,0.000000,0.708333,1.0,1.000000,0.481132,0.244292,1.0,0.0,0.603053,0.0,0.370968,0.0,0.00,0.333333,1.0
1,0.003311,0.166667,1.0,0.666667,0.339623,0.283105,0.0,0.5,0.885496,0.0,0.564516,0.0,0.00,0.666667,1.0
2,0.006623,0.250000,0.0,0.333333,0.339623,0.178082,0.0,0.0,0.770992,0.0,0.225806,1.0,0.00,0.666667,1.0
3,0.009934,0.562500,1.0,0.333333,0.245283,0.251142,0.0,0.5,0.816794,0.0,0.129032,1.0,0.00,0.666667,1.0
4,0.013245,0.583333,0.0,0.000000,0.356401,0.520548,0.0,0.5,0.702290,1.0,0.096774,1.0,0.00,0.666667,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
297,0.986755,0.583333,0.0,0.000000,0.433962,0.262557,0.0,0.5,0.396947,1.0,0.032258,0.5,0.00,1.000000,0.0
298,0.990066,0.333333,1.0,1.000000,0.150943,0.315068,0.0,0.5,0.465649,0.0,0.193548,0.5,0.00,1.000000,0.0
299,0.993377,0.812500,1.0,0.000000,0.471698,0.152968,1.0,0.5,0.534351,0.0,0.548387,0.5,0.50,1.000000,0.0
300,0.996689,0.583333,1.0,0.000000,0.356401,0.011416,0.0,0.5,0.335878,1.0,0.193548,0.5,0.25,1.000000,0.0


In [16]:
print(volunteer.min(axis=0))
print(volunteer.max(axis=0))

0     0.0
1     0.0
2     0.0
3     0.0
4     0.0
5     0.0
6     0.0
7     0.0
8     0.0
9     0.0
10    0.0
11    0.0
12    0.0
13    0.0
14    0.0
dtype: float64
0     1.0
1     1.0
2     1.0
3     1.0
4     1.0
5     1.0
6     1.0
7     1.0
8     1.0
9     1.0
10    1.0
11    1.0
12    1.0
13    1.0
14    1.0
dtype: float64


# **Splitting**

In [17]:
volunteers=volunteer.drop(0,axis=1)


In [18]:
X=volunteers.drop(1,axis=1)
X

Unnamed: 0,2,3,4,5,6,7,8,9,10,11,12,13,14
0,1.0,1.000000,0.481132,0.244292,1.0,0.0,0.603053,0.0,0.370968,0.0,0.00,0.333333,1.0
1,1.0,0.666667,0.339623,0.283105,0.0,0.5,0.885496,0.0,0.564516,0.0,0.00,0.666667,1.0
2,0.0,0.333333,0.339623,0.178082,0.0,0.0,0.770992,0.0,0.225806,1.0,0.00,0.666667,1.0
3,1.0,0.333333,0.245283,0.251142,0.0,0.5,0.816794,0.0,0.129032,1.0,0.00,0.666667,1.0
4,0.0,0.000000,0.356401,0.520548,0.0,0.5,0.702290,1.0,0.096774,1.0,0.00,0.666667,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
297,0.0,0.000000,0.433962,0.262557,0.0,0.5,0.396947,1.0,0.032258,0.5,0.00,1.000000,0.0
298,1.0,1.000000,0.150943,0.315068,0.0,0.5,0.465649,0.0,0.193548,0.5,0.00,1.000000,0.0
299,1.0,0.000000,0.471698,0.152968,1.0,0.5,0.534351,0.0,0.548387,0.5,0.50,1.000000,0.0
300,1.0,0.000000,0.356401,0.011416,0.0,0.5,0.335878,1.0,0.193548,0.5,0.25,1.000000,0.0


In [19]:
Y=volunteers[1]
Y

0      0.708333
1      0.166667
2      0.250000
3      0.562500
4      0.583333
         ...   
297    0.583333
298    0.333333
299    0.812500
300    0.583333
301    0.583333
Name: 1, Length: 302, dtype: float64

In [20]:
from sklearn.model_selection import train_test_split

X_train, X_test, Y_train, Y_test = train_test_split(X,Y, random_state=1)

In [21]:
X_train.shape


(226, 13)

In [22]:
X_test.shape

(76, 13)

In [25]:
# Training our model
from sklearn.linear_model import LogisticRegression




In [26]:
model = LogisticRegression()
model.fit(X_train, Y_train)


ValueError: Unknown label type: 'continuous'

In [27]:
Y_hat = model.predict(X_test)
print("Predicted values are", Y_hat)

AttributeError: 'LogisticRegression' object has no attribute 'coef_'

In [28]:
from sklearn.metrics import accuracy_score
lr_accuracy=accuracy_score(Y_test,Y_hat)
print("LR accuracy:",lr_accuracy)


NameError: name 'Y_hat' is not defined