# Native Accessors

In [3]:
# Accessing values as an attributes
import pandas as pd
data = pd.read_csv("Dataset/Heart_Disease_Prediction.csv")
data.Cholesterol

0      322
1      564
2      261
3      263
4      269
      ... 
265    199
266    263
267    294
268    192
269    286
Name: Cholesterol, Length: 270, dtype: int64

In [4]:
# Accessing its values using []
data["Cholesterol"]

0      322
1      564
2      261
3      263
4      269
      ... 
265    199
266    263
267    294
268    192
269    286
Name: Cholesterol, Length: 270, dtype: int64

In [6]:
# specify the row(s)
data["Cholesterol"][0:200]

0      322
1      564
2      261
3      263
4      269
      ... 
195    250
196    248
197    214
198    239
199    304
Name: Cholesterol, Length: 200, dtype: int64

# Indexing in Pandas

In [23]:
# index-based selection, it means that we are going to indexing use numerical values
# this paradigm uses row-first column-second method

print(data.iloc[1]) # selecting the second row of every columns
print(data.iloc[:6,:3]) # Selecting the first 6 rows and first 3 columns
print(data.iloc[:,1:5]) # selecting every rows of second-fifth columns
print(data.iloc[[0,1,2,5,6],:]) # Selecting the 0,1,2,5,6 th rows of every columns
print(data.iloc[-5:]) # Selecting the five last rows

Age                             67
Sex                              0
Chest pain type                  3
BP                             115
Cholesterol                    564
FBS over 120                     0
EKG results                      2
Max HR                         160
Exercise angina                  0
ST depression                  1.6
Slope of ST                      2
Number of vessels fluro          0
Thallium                         7
Heart Disease              Absence
Name: 1, dtype: object
   Age  Sex  Chest pain type
0   70    1                4
1   67    0                3
2   57    1                2
3   64    1                4
4   74    0                2
5   65    1                4
     Sex  Chest pain type   BP  Cholesterol
0      1                4  130          322
1      0                3  115          564
2      1                2  124          261
3      1                4  128          263
4      0                2  120          269
..   ...            

In [25]:
# label based selection, is an indexing paradigm that uses label to locating values
# this paradigm same as the first one, using first-row column-second rule
# another difference is in this method, indexing rule is including the last values
# ex : data[0:100] will show 0 to 100 rows 
print(data.loc[1:100,"ST depression"])

1      1.6
2      0.3
3      0.2
4      0.2
5      0.4
      ... 
96     0.4
97     0.1
98     0.2
99     1.1
100    0.6
Name: ST depression, Length: 100, dtype: float64

# Manipulating Index

In [29]:
# we can manipulate our data's index
data2 = pd.read_csv("Dataset/Heart_Disease_Prediction.csv")
data.set_index("Age")

Unnamed: 0_level_0,Sex,Chest pain type,BP,Cholesterol,FBS over 120,EKG results,Max HR,Exercise angina,ST depression,Slope of ST,Number of vessels fluro,Thallium,Heart Disease
Age,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
70,1,4,130,322,0,2,109,0,2.4,2,3,3,Presence
67,0,3,115,564,0,2,160,0,1.6,2,0,7,Absence
57,1,2,124,261,0,0,141,0,0.3,1,0,7,Presence
64,1,4,128,263,0,0,105,1,0.2,2,1,7,Absence
74,0,2,120,269,0,2,121,1,0.2,1,1,3,Absence
...,...,...,...,...,...,...,...,...,...,...,...,...,...
52,1,3,172,199,1,0,162,0,0.5,1,0,7,Absence
44,1,2,120,263,0,0,173,0,0.0,1,0,7,Absence
56,0,2,140,294,0,2,153,0,1.3,2,0,3,Absence
57,1,4,140,192,0,0,148,0,0.4,2,0,6,Absence


# Conditional Selection

In [34]:
# Here, we will learn how to select some values in our data using conditional statement
# We will use loc as our main tool

data.loc[data["Heart Disease"]=="Absence"] # Selecting the values where the heart disease is absence

Unnamed: 0,Age,Sex,Chest pain type,BP,Cholesterol,FBS over 120,EKG results,Max HR,Exercise angina,ST depression,Slope of ST,Number of vessels fluro,Thallium,Heart Disease
1,67,0,3,115,564,0,2,160,0,1.6,2,0,7,Absence
3,64,1,4,128,263,0,0,105,1,0.2,2,1,7,Absence
4,74,0,2,120,269,0,2,121,1,0.2,1,1,3,Absence
5,65,1,4,120,177,0,0,140,0,0.4,1,0,7,Absence
10,59,1,4,135,234,0,0,161,0,0.5,2,0,7,Absence
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
263,49,1,2,130,266,0,0,171,0,0.6,1,0,3,Absence
265,52,1,3,172,199,1,0,162,0,0.5,1,0,7,Absence
266,44,1,2,120,263,0,0,173,0,0.0,1,0,7,Absence
267,56,0,2,140,294,0,2,153,0,1.3,2,0,3,Absence


In [36]:
# Multi Conditional Statements
data.loc[(data["ST depression"]>1) & (data.Age>60)] # selecting values where ST depression is more than 1 and age > 60

Unnamed: 0,Age,Sex,Chest pain type,BP,Cholesterol,FBS over 120,EKG results,Max HR,Exercise angina,ST depression,Slope of ST,Number of vessels fluro,Thallium,Heart Disease
0,70,1,4,130,322,0,2,109,0,2.4,2,3,3,Presence
1,67,0,3,115,564,0,2,160,0,1.6,2,0,7,Absence
9,63,0,4,150,407,0,2,154,0,4.0,2,3,7,Presence
13,61,1,1,134,234,0,0,145,0,2.6,2,2,3,Presence
15,71,0,4,112,149,0,0,125,0,1.6,2,0,3,Absence
18,64,1,1,110,211,0,2,144,1,1.8,2,0,3,Absence
20,67,1,4,120,229,0,2,129,1,2.6,2,2,7,Presence
31,66,1,4,160,228,0,2,138,0,2.3,1,0,6,Absence
36,61,1,4,140,207,0,2,138,1,1.9,1,1,7,Presence
56,61,1,4,138,166,0,2,125,1,3.6,2,1,3,Presence


# Builtin Conditional Selector

In [41]:
# isin(), is a builtin conditional selector that will select the data whose values "is in" a list of value
data.loc[data.Age.isin([60,61,62,63,64])] # Selecting the values which Ages are 60,61,62,63, and 64

Unnamed: 0,Age,Sex,Chest pain type,BP,Cholesterol,FBS over 120,EKG results,Max HR,Exercise angina,ST depression,Slope of ST,Number of vessels fluro,Thallium,Heart Disease
3,64,1,4,128,263,0,0,105,1,0.2,2,1,7,Absence
8,60,1,4,140,293,0,2,170,0,1.2,2,2,7,Presence
9,63,0,4,150,407,0,2,154,0,4.0,2,3,7,Presence
13,61,1,1,134,234,0,0,145,0,2.6,2,2,3,Presence
18,64,1,1,110,211,0,2,144,1,1.8,2,0,3,Absence
36,61,1,4,140,207,0,2,138,1,1.9,1,1,7,Presence
41,62,0,4,124,209,0,0,163,0,0.0,1,0,3,Absence
53,63,0,2,140,195,0,0,179,0,0.0,1,2,3,Absence
56,61,1,4,138,166,0,2,125,1,3.6,2,1,3,Presence
57,60,0,3,120,178,1,0,96,0,0.0,1,0,3,Absence


In [42]:
# notnull(), is function to highlight the non-null values
data.loc[data.BP.notnull()] # it seems that this data does not have null values :)

Unnamed: 0,Age,Sex,Chest pain type,BP,Cholesterol,FBS over 120,EKG results,Max HR,Exercise angina,ST depression,Slope of ST,Number of vessels fluro,Thallium,Heart Disease
0,70,1,4,130,322,0,2,109,0,2.4,2,3,3,Presence
1,67,0,3,115,564,0,2,160,0,1.6,2,0,7,Absence
2,57,1,2,124,261,0,0,141,0,0.3,1,0,7,Presence
3,64,1,4,128,263,0,0,105,1,0.2,2,1,7,Absence
4,74,0,2,120,269,0,2,121,1,0.2,1,1,3,Absence
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
265,52,1,3,172,199,1,0,162,0,0.5,1,0,7,Absence
266,44,1,2,120,263,0,0,173,0,0.0,1,0,7,Absence
267,56,0,2,140,294,0,2,153,0,1.3,2,0,3,Absence
268,57,1,4,140,192,0,0,148,0,0.4,2,0,6,Absence


In [43]:
# isnull(), is a function to highlight the data that contains null
data.loc[data.Cholesterol.isnull()] # There are no null values!

Unnamed: 0,Age,Sex,Chest pain type,BP,Cholesterol,FBS over 120,EKG results,Max HR,Exercise angina,ST depression,Slope of ST,Number of vessels fluro,Thallium,Heart Disease


# Assigning Data

In [44]:
# To assign new column, we can do with these easy steps :
data["ishuman"] = ["Yes" for i in range(len(data))]
data

Unnamed: 0,Age,Sex,Chest pain type,BP,Cholesterol,FBS over 120,EKG results,Max HR,Exercise angina,ST depression,Slope of ST,Number of vessels fluro,Thallium,Heart Disease,ishuman
0,70,1,4,130,322,0,2,109,0,2.4,2,3,3,Presence,Yes
1,67,0,3,115,564,0,2,160,0,1.6,2,0,7,Absence,Yes
2,57,1,2,124,261,0,0,141,0,0.3,1,0,7,Presence,Yes
3,64,1,4,128,263,0,0,105,1,0.2,2,1,7,Absence,Yes
4,74,0,2,120,269,0,2,121,1,0.2,1,1,3,Absence,Yes
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
265,52,1,3,172,199,1,0,162,0,0.5,1,0,7,Absence,Yes
266,44,1,2,120,263,0,0,173,0,0.0,1,0,7,Absence,Yes
267,56,0,2,140,294,0,2,153,0,1.3,2,0,3,Absence,Yes
268,57,1,4,140,192,0,0,148,0,0.4,2,0,6,Absence,Yes


Coded by Maulana Zulfikar Aziz