In [1]:
# %load command1.py
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity='all'

%config InlineBackend.figure_format='svg'
plt.rcParams['figure.dpi']=120

pd.options.display.float_format='{:,.2f}'.format
pd.set_option('display.max_colwidth', None)


In [2]:
%%writefile ./pandasData/weather.csv

Day,Weather,Temperature,Wind,Humidity
Mon,Sunny,12.79,13,30
Tue,Sunny,19.67,28,96
Wed,Sunny,17.51,16,20
Thu,Cloudy,14.44,11,22
Fri,Shower,10.51,26,79
Sat,Shower,11.07,27,62
Sun,Sunny,17.5,20,10

Overwriting ./pandasData/weather.csv


In [3]:
df=pd.read_csv('./pandasData/weather.csv', index_col=['Day'])
df

Unnamed: 0_level_0,Weather,Temperature,Wind,Humidity
Day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Mon,Sunny,12.79,13,30
Tue,Sunny,19.67,28,96
Wed,Sunny,17.51,16,20
Thu,Cloudy,14.44,11,22
Fri,Shower,10.51,26,79
Sat,Shower,11.07,27,62
Sun,Sunny,17.5,20,10


### Differences between loc and iloc

The main distinction between `loc` and `iloc` is:
* `loc` is label-based, which means that you have to specify rows and columns based on their row and column labels. 
* `iloc` is integer position-based, so you have to specify rows and columns by their integer position values (0-based integer position).

### Selecting via a single value 

In [4]:
# Pass label to `loc`
df.loc['Fri', 'Temperature']
df.iloc[4, 1]

10.51

10.51

In [5]:
# To get all rows of Temperatrue column
df.loc[:, 'Temperature']

# The equivalent `iloc` statement
df.iloc[:, 1]

Day
Mon   12.79
Tue   19.67
Wed   17.51
Thu   14.44
Fri   10.51
Sat   11.07
Sun   17.50
Name: Temperature, dtype: float64

Day
Mon   12.79
Tue   19.67
Wed   17.51
Thu   14.44
Fri   10.51
Sat   11.07
Sun   17.50
Name: Temperature, dtype: float64

In [6]:
# To get all columns
df.loc['Fri', :]

# The equivalent `iloc` statement
df.iloc[4, :]

Weather        Shower
Temperature     10.51
Wind               26
Humidity           79
Name: Fri, dtype: object

Weather        Shower
Temperature     10.51
Wind               26
Humidity           79
Name: Fri, dtype: object

### Selecting via a list of values

In [7]:
# Multiple rows
df.loc[['Thu', 'Fri'], 'Temperature']

# Multiple columns
df.loc['Fri', ['Temperature', 'Wind']]

# Multiple rows using iloc
df.iloc[[3, 4], 1]

# Multiple columns using iloc
df.iloc[4, [1, 2]]

# Multiple rows and columns
rows = ['Thu', 'Fri']
cols=['Temperature','Wind']
df.loc[rows, cols]

# the equivalent iloc statement
rows = [3, 4]
cols = [1, 2]
df.iloc[rows, cols]

Day
Thu   14.44
Fri   10.51
Name: Temperature, dtype: float64

Temperature   10.51
Wind             26
Name: Fri, dtype: object

Day
Thu   14.44
Fri   10.51
Name: Temperature, dtype: float64

Temperature   10.51
Wind             26
Name: Fri, dtype: object

Unnamed: 0_level_0,Temperature,Wind
Day,Unnamed: 1_level_1,Unnamed: 2_level_1
Thu,14.44,11
Fri,10.51,26


Unnamed: 0_level_0,Temperature,Wind
Day,Unnamed: 1_level_1,Unnamed: 2_level_1
Thu,14.44,11
Fri,10.51,26


### Selecting via conditions and callable

**Conditions**

In [8]:
# One condition
df.loc[df.Humidity > 50, :]

## multiple conditions
df.loc[
    (df.Humidity > 50) & (df.Weather == 'Shower'), 
    ['Temperature','Wind'],
]

Unnamed: 0_level_0,Weather,Temperature,Wind,Humidity
Day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Tue,Sunny,19.67,28,96
Fri,Shower,10.51,26,79
Sat,Shower,11.07,27,62


Unnamed: 0_level_0,Temperature,Wind
Day,Unnamed: 1_level_1,Unnamed: 2_level_1
Fri,10.51,26
Sat,11.07,27


In [9]:
list(df.Humidity > 50)

# Single condition
df.iloc[list(df.Humidity > 50)]

# df.iloc[df.Humidity > 50, :] Getting ValueError

## multiple conditions
df.iloc[
    list((df.Humidity > 50) & (df.Weather == 'Shower')), 
    :,
]

[False, True, False, False, True, True, False]

Unnamed: 0_level_0,Weather,Temperature,Wind,Humidity
Day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Tue,Sunny,19.67,28,96
Fri,Shower,10.51,26,79
Sat,Shower,11.07,27,62


Unnamed: 0_level_0,Weather,Temperature,Wind,Humidity
Day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Fri,Shower,10.51,26,79
Sat,Shower,11.07,27,62


**Callable**

In [10]:
# Selecting columns
df.loc[:, lambda df: ['Humidity', 'Wind']]

# With condition
df.loc[lambda df: df.Humidity > 50, :]

df.iloc[lambda df: [0,1], :]

df.iloc[lambda df: list(df.Humidity > 50), :]

Unnamed: 0_level_0,Humidity,Wind
Day,Unnamed: 1_level_1,Unnamed: 2_level_1
Mon,30,13
Tue,96,28
Wed,20,16
Thu,22,11
Fri,79,26
Sat,62,27
Sun,10,20


Unnamed: 0_level_0,Weather,Temperature,Wind,Humidity
Day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Tue,Sunny,19.67,28,96
Fri,Shower,10.51,26,79
Sat,Shower,11.07,27,62


Unnamed: 0_level_0,Weather,Temperature,Wind,Humidity
Day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Mon,Sunny,12.79,13,30
Tue,Sunny,19.67,28,96


Unnamed: 0_level_0,Weather,Temperature,Wind,Humidity
Day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Tue,Sunny,19.67,28,96
Fri,Shower,10.51,26,79
Sat,Shower,11.07,27,62


### `loc` and `iloc` are interchangeable when labels are 0-based integers

In [11]:
df = pd.read_csv(
    './pandasData/weather.csv', 
    header=None, 
    skiprows=[0],
)
df

Unnamed: 0,0,1,2,3,4
0,Day,Weather,Temperature,Wind,Humidity
1,Mon,Sunny,12.79,13,30
2,Tue,Sunny,19.67,28,96
3,Wed,Sunny,17.51,16,20
4,Thu,Cloudy,14.44,11,22
5,Fri,Shower,10.51,26,79
6,Sat,Shower,11.07,27,62
7,Sun,Sunny,17.5,20,10


In [12]:
df = pd.read_csv(
    './pandasData/weather.csv')
df

Unnamed: 0,Day,Weather,Temperature,Wind,Humidity
0,Mon,Sunny,12.79,13,30
1,Tue,Sunny,19.67,28,96
2,Wed,Sunny,17.51,16,20
3,Thu,Cloudy,14.44,11,22
4,Fri,Shower,10.51,26,79
5,Sat,Shower,11.07,27,62
6,Sun,Sunny,17.5,20,10


In [13]:
df = pd.read_csv(
    './pandasData/weather.csv', 

    skiprows=[4] # it starts with number 1 (header)
)
df

Unnamed: 0,Day,Weather,Temperature,Wind,Humidity
0,Mon,Sunny,12.79,13,30
1,Tue,Sunny,19.67,28,96
2,Thu,Cloudy,14.44,11,22
3,Fri,Shower,10.51,26,79
4,Sat,Shower,11.07,27,62
5,Sun,Sunny,17.5,20,10


In [14]:
df = pd.read_csv(
    './pandasData/weather.csv', 
    header=None,
    skiprows=[1]

    
)
df

Unnamed: 0,0,1,2,3,4
0,Mon,Sunny,12.79,13,30
1,Tue,Sunny,19.67,28,96
2,Wed,Sunny,17.51,16,20
3,Thu,Cloudy,14.44,11,22
4,Fri,Shower,10.51,26,79
5,Sat,Shower,11.07,27,62
6,Sun,Sunny,17.5,20,10


In [15]:
df.loc[1, 2]
df.loc[1, [1, 2]]

19.67

1    Sunny
2    19.67
Name: 1, dtype: object

In [16]:
# `loc` and `iloc` are interchangeable when selecting via a single value or a list of values.

df.loc[1, 2] == df.iloc[1, 2]
df.loc[1, [1, 2]] == df.iloc[1, [1, 2]]

True

1    True
2    True
Name: 1, dtype: bool