## How to select multiple rows and cols with pandas

In [28]:
import pandas as pd

In [29]:
#create datasets
drinks_path = 'http://bit.ly/drinksbycountry'
ufo_path = 'http://bit.ly/uforeports'
drinks = pd.read_csv(drinks_path)
ufo = pd.read_csv(ufo_path)

In [30]:
ufo.head(3)

Unnamed: 0,City,Colors Reported,Shape Reported,State,Time
0,Ithaca,,TRIANGLE,NY,6/1/1930 22:00
1,Willingboro,,OTHER,NJ,6/30/1930 20:00
2,Holyoke,,OVAL,CO,2/15/1931 14:00


### How to use the loc-function

In [31]:
#IMPORTANT: PANDAS
#loc #select #pandas

#loc for selecting by label (rows or cols)
#loc is inclusive on both sides

#show row = 0, all cols
ufo.loc[0,:] #[rows, cols], ':' for all values

City                       Ithaca
Colors Reported               NaN
Shape Reported           TRIANGLE
State                          NY
Time               6/1/1930 22:00
Name: 0, dtype: object

In [32]:
#show first 3 rows, all cols
ufo.loc[[0,1,2,],:]

Unnamed: 0,City,Colors Reported,Shape Reported,State,Time
0,Ithaca,,TRIANGLE,NY,6/1/1930 22:00
1,Willingboro,,OTHER,NJ,6/30/1930 20:00
2,Holyoke,,OVAL,CO,2/15/1931 14:00


In [33]:
#loc is inclusive on both sides
ufo.loc[0:2,:]

Unnamed: 0,City,Colors Reported,Shape Reported,State,Time
0,Ithaca,,TRIANGLE,NY,6/1/1930 22:00
1,Willingboro,,OTHER,NJ,6/30/1930 20:00
2,Holyoke,,OVAL,CO,2/15/1931 14:00


In [34]:
#show all rows, just col City
ufo.loc[:,'City']

0                      Ithaca
1                 Willingboro
2                     Holyoke
3                     Abilene
4        New York Worlds Fair
                 ...         
18236              Grant Park
18237             Spirit Lake
18238             Eagle River
18239             Eagle River
18240                    Ybor
Name: City, Length: 18241, dtype: object

In [35]:
#show first 3 rows, cols Shape Reported to State
ufo.loc[0:2,'Shape Reported':'State']

Unnamed: 0,Shape Reported,State
0,TRIANGLE,NY
1,OTHER,NJ
2,OVAL,CO


In [36]:
#use drop cut Time off
ufo.head(3).drop('Time',axis=1)

Unnamed: 0,City,Colors Reported,Shape Reported,State
0,Ithaca,,TRIANGLE,NY
1,Willingboro,,OTHER,NJ
2,Holyoke,,OVAL,CO


In [37]:
#Selecting with conditions
ufo[ufo.City=='Oakland']

Unnamed: 0,City,Colors Reported,Shape Reported,State,Time
1694,Oakland,,CIGAR,CA,7/21/1968 14:00
2144,Oakland,,DISK,CA,8/19/1971 0:00
4686,Oakland,,LIGHT,MD,6/1/1982 0:00
7293,Oakland,,LIGHT,CA,3/28/1994 17:00
8488,Oakland,,,CA,8/10/1995 21:45
8768,Oakland,,,CA,10/10/1995 22:40
10816,Oakland,,LIGHT,OR,10/1/1997 21:30
10948,Oakland,,DISK,CA,11/14/1997 19:55
11045,Oakland,,TRIANGLE,CA,12/10/1997 1:30
12322,Oakland,,FIREBALL,CA,10/9/1998 19:40


### Avoid Chained Indexing

In [38]:
#Chained Indexing
#Will cause problem under certain scenarios (best practice shown above)
#this code needs two internal operations within pandas
#best practice see below
ufo.loc[ufo.City=='Oakland'].State

1694     CA
2144     CA
4686     MD
7293     CA
8488     CA
8768     CA
10816    OR
10948    CA
11045    CA
12322    CA
12941    CA
16803    MD
17322    CA
Name: State, dtype: object

In [39]:
ufo.loc[ufo.City=='Oakland','State']

1694     CA
2144     CA
4686     MD
7293     CA
8488     CA
8768     CA
10816    OR
10948    CA
11045    CA
12322    CA
12941    CA
16803    MD
17322    CA
Name: State, dtype: object

### How to use the iloc-function

In [40]:
#IMPORTANT: PANDAS
#iloc #select #pandas

#iloc is for selecting by integer-position
#iloc is inclusive first number, exlusive last number
ufo.iloc[:,0:4].head(3)

Unnamed: 0,City,Colors Reported,Shape Reported,State
0,Ithaca,,TRIANGLE,NY
1,Willingboro,,OTHER,NJ
2,Holyoke,,OVAL,CO


In [41]:
#row 3 exclusive
ufo.iloc[0:3,:]

Unnamed: 0,City,Colors Reported,Shape Reported,State,Time
0,Ithaca,,TRIANGLE,NY,6/1/1930 22:00
1,Willingboro,,OTHER,NJ,6/30/1930 20:00
2,Holyoke,,OVAL,CO,2/15/1931 14:00


In [42]:
#loc works also with variables
row=2
ufo.loc[0:row,['City','State']]

Unnamed: 0,City,State
0,Ithaca,NY
1,Willingboro,NJ
2,Holyoke,CO


### ix function: deprecated but same functionalities by loc and iloc

In [43]:
#IMPORTANT: PANDAS
#index_col #index #select #read #pandas
drinks = pd.read_csv(drinks_path, index_col='country')
drinks.head()

Unnamed: 0_level_0,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,continent
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Afghanistan,0,0,0,0.0,Asia
Albania,89,132,54,4.9,Europe
Algeria,25,0,14,0.7,Africa
Andorra,245,138,312,12.4,Europe
Angola,217,57,45,5.9,Africa


In [44]:
#IMPORTANT: PANDAS
#ix #loc #iloc #select #pandas
#ix function deprecated since version 020.0
#ix for mixing loc in iloc
#alternative use by loc
drinks.loc['Angola',drinks.columns[1]]

57

In [45]:
#IMPORTANT: PANDAS
#ix #loc #iloc #select #pandas
#ix function deprecated since version 020.0
#ix for mixing loc in iloc
#alternative use by iloc
drinks.iloc[drinks.index.get_loc('Angola'), 1]

57