# Pandas `loc` and `iloc` for selecting data


In [2]:
import pandas as pd

In [4]:
df = pd.read_csv(r"C:\Users\Arjun Walunj\Desktop\Python\weather_report.csv", index_col=['Day'])
df

Unnamed: 0_level_0,Weather,Temperature,Wind,Humidity
Day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Mon,Sunny,12.79,13,30
Tue,Sunny,19.67,28,96
Wed,Sunny,17.51,16,20
Thu,Cloudy,14.44,11,22
Fri,Shower,10.51,26,79
Sat,Shower,11.07,27,62
Sun,Sunny,17.5,20,10


## 1. Differences between loc and iloc

The main distinction between `loc` and `iloc` is:
* `loc` is label-based, which means that you have to specify rows and columns based on their row and column labels. 
* `iloc` is integer position-based, so you have to specify rows and columns by their integer position values (0-based integer position).

## 2. Selecting via a single value 

To get Fridays' temperature

In [3]:
df

Unnamed: 0_level_0,Weather,Temperature,Wind,Humidity
Day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Mon,Sunny,12.79,13,30
Tue,Sunny,19.67,28,96
Wed,Sunny,17.51,16,20
Thu,Cloudy,14.44,11,22
Fri,Shower,10.51,26,79
Sat,Shower,11.07,27,62
Sun,Sunny,17.5,20,10


In [4]:
# Pass label to `loc`
df.loc['Fri', 'Temperature']

10.51

In [5]:
# The equivalent `iloc` statement should take row number 4 and column number 1
df.iloc[4, 1]

10.51

Use `:` to return all data

In [6]:
# To get all rows
df.loc[:, 'Temperature']

Day
Mon    12.79
Tue    19.67
Wed    17.51
Thu    14.44
Fri    10.51
Sat    11.07
Sun    17.50
Name: Temperature, dtype: float64

In [7]:
# The equivalent `iloc` statement
df.iloc[:,1]

Day
Mon    12.79
Tue    19.67
Wed    17.51
Thu    14.44
Fri    10.51
Sat    11.07
Sun    17.50
Name: Temperature, dtype: float64

In [8]:
# To get all columns
df.loc['Fri', :]

Weather        Shower
Temperature     10.51
Wind               26
Humidity           79
Name: Fri, dtype: object

In [9]:
# The equivalent `iloc` statement
df.iloc[4, :]

Weather        Shower
Temperature     10.51
Wind               26
Humidity           79
Name: Fri, dtype: object

## 3. Selecting via a list of values

In [10]:
# Multiple rows
df.loc[['Thu', 'Fri'], 'Temperature']

Day
Thu    14.44
Fri    10.51
Name: Temperature, dtype: float64

In [11]:
df.iloc[[3,4],1]

Day
Thu    14.44
Fri    10.51
Name: Temperature, dtype: float64

In [12]:
# Multiple columns
df.loc[['Fri','Sat','Sun'], ['Temperature', 'Wind']]

Unnamed: 0_level_0,Temperature,Wind
Day,Unnamed: 1_level_1,Unnamed: 2_level_1
Fri,10.51,26
Sat,11.07,27
Sun,17.5,20


In [13]:
df.iloc[[4,5,6] , [1,2]]

Unnamed: 0_level_0,Temperature,Wind
Day,Unnamed: 1_level_1,Unnamed: 2_level_1
Fri,10.51,26
Sat,11.07,27
Sun,17.5,20


In [14]:
# Multiple columns using iloc
df.iloc[4, [1, 2]]

Temperature    10.51
Wind              26
Name: Fri, dtype: object

In [15]:
df

Unnamed: 0_level_0,Weather,Temperature,Wind,Humidity
Day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Mon,Sunny,12.79,13,30
Tue,Sunny,19.67,28,96
Wed,Sunny,17.51,16,20
Thu,Cloudy,14.44,11,22
Fri,Shower,10.51,26,79
Sat,Shower,11.07,27,62
Sun,Sunny,17.5,20,10


In [16]:
df.iloc[ [4,5] , [1,3]]

Unnamed: 0_level_0,Temperature,Humidity
Day,Unnamed: 1_level_1,Unnamed: 2_level_1
Fri,10.51,79
Sat,11.07,62


In [17]:
# Multiple rows and columns
rows = ['Thu', 'Fri']
cols=['Temperature','Wind']

df.loc[rows, cols]

Unnamed: 0_level_0,Temperature,Wind
Day,Unnamed: 1_level_1,Unnamed: 2_level_1
Thu,14.44,11
Fri,10.51,26


In [18]:
# the equivalent iloc statement
rows = [3, 4]
cols = [1, 2]
df.iloc[rows, cols]

Unnamed: 0_level_0,Temperature,Wind
Day,Unnamed: 1_level_1,Unnamed: 2_level_1
Thu,14.44,11
Fri,10.51,26


## 4. Selecting a range of data via slice

For loc, we can use the syntax `A:B` to select data from label `A` to label `B` (Both `A` and `B` are included):

In [19]:
# Slicing column labels
rows=['Thu', 'Fri']
df.loc[rows, 'Temperature':'Humidity' ]

Unnamed: 0_level_0,Temperature,Wind,Humidity
Day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Thu,14.44,11,22
Fri,10.51,26,79


In [20]:
# Slicing row labels
cols = ['Temperature', 'Wind']
df.loc['Mon':'Thu', cols]

Unnamed: 0_level_0,Temperature,Wind
Day,Unnamed: 1_level_1,Unnamed: 2_level_1
Mon,12.79,13
Tue,19.67,28
Wed,17.51,16
Thu,14.44,11


We can use the syntax `A:B:S` to select data from label `A` to label `B` with step size `S` (Both `A` and `B` are included):

In [21]:
df

Unnamed: 0_level_0,Weather,Temperature,Wind,Humidity
Day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Mon,Sunny,12.79,13,30
Tue,Sunny,19.67,28,96
Wed,Sunny,17.51,16,20
Thu,Cloudy,14.44,11,22
Fri,Shower,10.51,26,79
Sat,Shower,11.07,27,62
Sun,Sunny,17.5,20,10


In [6]:
# Slicing with step
df.loc['Mon':'Fri':3, :]

Unnamed: 0_level_0,Weather,Temperature,Wind,Humidity
Day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Mon,Sunny,12.79,13,30
Thu,Cloudy,14.44,11,22


With iloc, we can also use the syntax `n:m` to select data from position `n` (included) to position `m` (excluded).

In [16]:
df.iloc[[1, 2], 0 : 3]

Unnamed: 0_level_0,Weather,Temperature,Wind
Day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Tue,Sunny,19.67,28
Wed,Sunny,17.51,16


In [24]:
df

Unnamed: 0_level_0,Weather,Temperature,Wind,Humidity
Day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Mon,Sunny,12.79,13,30
Tue,Sunny,19.67,28,96
Wed,Sunny,17.51,16,20
Thu,Cloudy,14.44,11,22
Fri,Shower,10.51,26,79
Sat,Shower,11.07,27,62
Sun,Sunny,17.5,20,10


In [21]:
df.iloc[0:4:2, :]

Unnamed: 0_level_0,Weather,Temperature,Wind,Humidity
Day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Mon,Sunny,12.79,13,30
Wed,Sunny,17.51,16,20


## 5. Selecting via conditions and callable

### 5.2 Conditions

In [26]:
df

Unnamed: 0_level_0,Weather,Temperature,Wind,Humidity
Day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Mon,Sunny,12.79,13,30
Tue,Sunny,19.67,28,96
Wed,Sunny,17.51,16,20
Thu,Cloudy,14.44,11,22
Fri,Shower,10.51,26,79
Sat,Shower,11.07,27,62
Sun,Sunny,17.5,20,10


In [22]:
# One condition
df.loc[df.Weather == 'Sunny', :]

Unnamed: 0_level_0,Weather,Temperature,Wind,Humidity
Day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Mon,Sunny,12.79,13,30
Tue,Sunny,19.67,28,96
Wed,Sunny,17.51,16,20
Sun,Sunny,17.5,20,10


In [30]:
# One condition
df.loc[df.Temperature > 15, :]

Unnamed: 0_level_0,Weather,Temperature,Wind,Humidity
Day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Tue,Sunny,19.67,28,96
Wed,Sunny,17.51,16,20
Sun,Sunny,17.5,20,10


In [34]:
## multiple conditions
df.loc[
    (df.Humidity > 50) & (df.Weather == 'Shower')  , 
    ['Humidity','Weather','Temperature','Wind'],
]

Unnamed: 0_level_0,Humidity,Weather,Temperature,Wind
Day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Fri,79,Shower,10.51,26
Sat,62,Shower,11.07,27


In [24]:
# Getting ValueError
df.loc[df.Humidity > 50, :]

Unnamed: 0_level_0,Weather,Temperature,Wind,Humidity
Day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Tue,Sunny,19.67,28,96
Fri,Shower,10.51,26,79
Sat,Shower,11.07,27,62


In [25]:
# Single condition
df.iloc[list(df.Humidity > 50),:]

Unnamed: 0_level_0,Weather,Temperature,Wind,Humidity
Day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Tue,Sunny,19.67,28,96
Fri,Shower,10.51,26,79
Sat,Shower,11.07,27,62


In [38]:
## multiple conditions
df.iloc[
    list((df.Humidity > 50) & (df.Weather == 'Shower')),:]

Unnamed: 0_level_0,Weather,Temperature,Wind,Humidity
Day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Fri,Shower,10.51,26,79
Sat,Shower,11.07,27,62


# Finished