6. Rows & Columns - Selection

In [2]:
import pandas as pd
import numpy as np

In [None]:
# # | Method    | Use            |
# # | --------- | -------------- |
# # | df["col"] | single column  |
# # | .loc      | label based    |
# # | .iloc     | position based |
# .loc = label based indexing
# .iloc = integer position based indexing

In [1]:
import pandas as pd

df = pd.DataFrame({
    "Name": ["arman","akansha","zoya","pavitra","mehak"],
    "Age": [20,21,19,22,20],
    "Marks": [80,90,85,70,88]
})

print(df)


      Name  Age  Marks
0    arman   20     80
1  akansha   21     90
2     zoya   19     85
3  pavitra   22     70
4    mehak   20     88


1. .loc[ ] (label based)

In [2]:
# select single column
print(df[['Name']])

      Name
0    arman
1  akansha
2     zoya
3  pavitra
4    mehak


In [3]:
# 1. .loc[] (label based), format =  df.loc[row_label, column_label]
# (A) Single row
df.loc[0]

Name     arman
Age         20
Marks       80
Name: 0, dtype: object

In [None]:
# (B) Multiple rows
df.loc[0:2] #  0 to 2 (2 included!)

Unnamed: 0,Name,Age,Marks
0,arman,20,80
1,akansha,21,90
2,zoya,19,85


In [6]:
# (C) Specific rows
df.loc[[0,2,4]]

Unnamed: 0,Name,Age,Marks
0,arman,20,80
2,zoya,19,85
4,mehak,20,88


In [9]:
# (D) Single column
df.loc[:,"Marks"]

0    80
1    90
2    85
3    70
4    88
Name: Marks, dtype: int64

In [10]:
# (E) Multiple columns
df.loc[:,["Name","Marks"]]

Unnamed: 0,Name,Marks
0,arman,80
1,akansha,90
2,zoya,85
3,pavitra,70
4,mehak,88


In [13]:
# (F) Rows + columns together
df.loc[0:3,["Name","Age"]]

Unnamed: 0,Name,Age
0,arman,20
1,akansha,21
2,zoya,19
3,pavitra,22


In [17]:
# (G) Conditional selection ( MOST IMPORTANT)
df.loc[df["Marks"] >80,["Name","Marks"]]

Unnamed: 0,Name,Marks
1,akansha,90
2,zoya,85
4,mehak,88


2. .iloc[ ] (position based)

In [None]:
# Format:
# df.iloc[row_position, column_position]

In [19]:
# (A) Single value
# df.iloc[0,2]
df.iloc[3,1] #age of pavitra

np.int64(22)

In [20]:
# (B) Single row
df.iloc[1]

Name     akansha
Age           21
Marks         90
Name: 1, dtype: object

In [22]:
# (C) Multiple rows
df.iloc[0:2]

Unnamed: 0,Name,Age,Marks
0,arman,20,80
1,akansha,21,90


In [23]:
# (D) Specific rows
df.iloc[[0,2,4]]

Unnamed: 0,Name,Age,Marks
0,arman,20,80
2,zoya,19,85
4,mehak,20,88


In [24]:
# (E) Single column
df.iloc[:,2]

0    80
1    90
2    85
3    70
4    88
Name: Marks, dtype: int64

In [25]:
# (F) Multiple columns
df.iloc[:,[0,2]]

Unnamed: 0,Name,Marks
0,arman,80
1,akansha,90
2,zoya,85
3,pavitra,70
4,mehak,88


In [None]:
# G) Rows + columns
df.iloc[0:2,0:3:2] 

Unnamed: 0,Name,Marks
0,arman,80
1,akansha,90


Update values using loc/iloc 

In [None]:
# Increase marks where marks <= 80
df.loc[df["Marks"]<=80,["Marks"]] = 85
print(df)

      Name  Age  Marks
0    arman   20     85
1  akansha   21     90
2     zoya   19     85
3  pavitra   22     85
4    mehak   20     88


In [38]:
# Change a particular value
df.iloc[0,2] = 86
df

Unnamed: 0,Name,Age,Marks
0,arman,20,86
1,akansha,21,90
2,zoya,19,85
3,pavitra,22,85
4,mehak,20,88


In [44]:
#  Drop rows / columns (selection based)
print(df.drop(3,axis=0))  # 3rd row
print(df.drop("Age",axis=1)) # age column


      Name  Age  Marks
0    arman   20     86
1  akansha   21     90
2     zoya   19     85
4    mehak   20     88
      Name  Marks
0    arman     86
1  akansha     90
2     zoya     85
3  pavitra     85
4    mehak     88


In [None]:
# Real-life style example
# Students who scored more than 85
top = df.loc[df["Marks"] > 85, ["Name","Marks"]]

#increase age by one for all
df.loc[:, "Age"]= df["Age"]+ 1


Unnamed: 0,Name,Marks
0,arman,86
1,akansha,90
4,mehak,88


In [None]:
df.iloc[:, :]

Unnamed: 0,Name,Age,Marks
0,arman,23,86
1,akansha,24,90
2,zoya,22,85
3,pavitra,25,85
4,mehak,23,88


In [53]:
df.iloc[::-1]

Unnamed: 0,Name,Age,Marks
4,mehak,23,88
3,pavitra,25,85
2,zoya,22,85
1,akansha,24,90
0,arman,23,86


In [81]:
# practice question
df2 = pd.DataFrame({
    "Name": ["arman","akansha","zoya","pavitra","mehak","abhishek","manav"],
    "Age": [20,21,19,22,20,21,23],
    "Marks": [80,90,85,70,88,70,72]
})
df2

Unnamed: 0,Name,Age,Marks
0,arman,20,80
1,akansha,21,90
2,zoya,19,85
3,pavitra,22,70
4,mehak,20,88
5,abhishek,21,70
6,manav,23,72


In [82]:
# Print only Name & Marks of students with marks > 80
print(df2.loc[df2["Marks"] > 80,["Name","Marks"]])

      Name  Marks
1  akansha     90
2     zoya     85
4    mehak     88


In [83]:
# Change all marks < 75 to 75
df2.loc[df2["Marks"]<75,["Marks"]] = 75
df2

Unnamed: 0,Name,Age,Marks
0,arman,20,80
1,akansha,21,90
2,zoya,19,85
3,pavitra,22,75
4,mehak,20,88
5,abhishek,21,75
6,manav,23,75


In [94]:
# Print 1st, 3rd, 5th row using iloc
print(df2.iloc[[0,2,4]])

    Name  Age  Marks
0  arman   20     80
2   zoya   19     85
4  mehak   20     88


In [96]:
# Print last 2 rows
# df2.tail(2)
df2.iloc[-2:]

Unnamed: 0,Name,Age,Marks
5,abhishek,21,75
6,manav,23,75


In [90]:
# Change 2nd row Age to 23
df2.iloc[1,1] = 23
df2

Unnamed: 0,Name,Age,Marks
0,arman,20,80
1,akansha,23,90
2,zoya,19,85
3,pavitra,22,75
4,mehak,20,88
5,abhishek,21,75
6,manav,23,75
