## Slicing and Dicing Pandas DataFrames

In [1]:
import numpy as np
import pandas as pd

### Our Bogus Data

In [3]:
df = pd.DataFrame(data=[['HR', 'Orange', 'Wheat', 30, 165, 4.6],
                        ['DL', 'Purple', 'Flour', 2, 70, 8.3],
                        ['MH', 'Red', 'Mango', 12, 120, 9.0],
                        ['AS', 'Black', 'Apple', 4, 80, 3.3],
                        ['GJ', 'Blue', 'Milk', 32, 180, 1.8],
                        ['KL', 'Green', 'Melon', 33, 172, 9.5],
                        ['PB', 'Magenta', 'Beans', 69, 150, 2.2]],
                    columns=['State', 'Color', 'Food', 'Average Age', 'Average Height', 'Score']
                    )

df

Unnamed: 0,State,Color,Food,Average Age,Average Height,Score
0,HR,Orange,Wheat,30,165,4.6
1,DL,Purple,Flour,2,70,8.3
2,MH,Red,Mango,12,120,9.0
3,AS,Black,Apple,4,80,3.3
4,GJ,Blue,Milk,32,180,1.8
5,KL,Green,Melon,33,172,9.5
6,PB,Magenta,Beans,69,150,2.2


In [23]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7 entries, 0 to 6
Data columns (total 6 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   State           7 non-null      object 
 1   Color           7 non-null      object 
 2   Food            7 non-null      object 
 3   Average Age     7 non-null      int64  
 4   Average Height  7 non-null      int64  
 5   Score           7 non-null      float64
dtypes: float64(1), int64(2), object(3)
memory usage: 464.0+ bytes


In [24]:
df.describe()

Unnamed: 0,Average Age,Average Height,Score
count,7.0,7.0,7.0
mean,26.0,133.857143,5.528571
std,23.115651,44.693134,3.324512
min,2.0,70.0,1.8
25%,8.0,100.0,2.75
50%,30.0,150.0,4.6
75%,32.5,168.5,8.65
max,69.0,180.0,9.5


In [25]:
df.dtypes

State              object
Color              object
Food               object
Average Age         int64
Average Height      int64
Score             float64
dtype: object

### Basic Column Selection

In [4]:
df[['Color', 'Score']]

Unnamed: 0,Color,Score
0,Orange,4.6
1,Purple,8.3
2,Red,9.0
3,Black,3.3
4,Blue,1.8
5,Green,9.5
6,Magenta,2.2


### Column Selection By Slicing

Last two columns only:

In [6]:
df[df.columns[-2:]]

Unnamed: 0,Average Height,Score
0,165,4.6
1,70,8.3
2,120,9.0
3,80,3.3
4,180,1.8
5,172,9.5
6,150,2.2


Everything **BUT** the last two columns:

In [7]:
df[df.columns[:-2]]

Unnamed: 0,State,Color,Food,Average Age
0,HR,Orange,Wheat,30
1,DL,Purple,Flour,2
2,MH,Red,Mango,12
3,AS,Black,Apple,4
4,GJ,Blue,Milk,32
5,KL,Green,Melon,33
6,PB,Magenta,Beans,69


### Selection by List

In [14]:
df[df['State'].isin(['HR', 'DL'])]

Unnamed: 0,State,Color,Food,Average Age,Average Height,Score
0,HR,Orange,Wheat,30,165,4.6
1,DL,Purple,Flour,2,70,8.3


### Using .loc (by name) and .iloc (by index):

By Name...

In [15]:
df.loc[:, 'Food']

0    Wheat
1    Flour
2    Mango
3    Apple
4     Milk
5    Melon
6    Beans
Name: Food, dtype: object

In [16]:
df.loc[:, ['State', 'Food']]

Unnamed: 0,State,Food
0,HR,Wheat
1,DL,Flour
2,MH,Mango
3,AS,Apple
4,GJ,Milk
5,KL,Melon
6,PB,Beans


Combo!!

In [17]:
df.loc[df['State'].isin(['HR', 'DL']), ['State', 'Food']]

Unnamed: 0,State,Food
0,HR,Wheat
1,DL,Flour


By index...

In [18]:
df.iloc[:, 0]

0    HR
1    DL
2    MH
3    AS
4    GJ
5    KL
6    PB
Name: State, dtype: object

In [19]:
df.iloc[:, [0, 2, 3]]

Unnamed: 0,State,Food,Average Age
0,HR,Wheat,30
1,DL,Flour,2
2,MH,Mango,12
3,AS,Apple,4
4,GJ,Milk,32
5,KL,Melon,33
6,PB,Beans,69


In [20]:
df.iloc[[0, 3], [0, 2]]

Unnamed: 0,State,Food
0,HR,Wheat
3,AS,Apple


### Using Filters:

In [21]:
df.filter(like='Average')

Unnamed: 0,Average Age,Average Height
0,30,165
1,2,70
2,12,120
3,4,80
4,32,180
5,33,172
6,69,150


### By Data Type:

In [26]:
df.select_dtypes(include=['int64', 'float64'])

Unnamed: 0,Average Age,Average Height,Score
0,30,165,4.6
1,2,70,8.3
2,12,120,9.0
3,4,80,3.3
4,32,180,1.8
5,33,172,9.5
6,69,150,2.2
