In [1]:
import pandas as pd

In [2]:
url = 'https://raw.githubusercontent.com/mattharrison/datasets/master/data/siena2018-pres.csv'
df = pd.read_csv(url)

In [3]:
def tweak_sienna(df):
    def int64_uint8(df_):
        cols = df_.select_dtypes('int64')
        return (df_
                .astype({col: 'uint8' for col in cols}))
    
    return (df
            .rename(columns={'Seq.': 'Seq'})
            .rename(columns={k:v.replace(' ', '_') for k, v in 
                    {'Bg ': ' Background ',
                     'PL ': ' Party leadership ', 
                     'CAb ': ' Communication ability ' ,
                     'RC ': ' Relations with Congress ', 
                     'CAp ': ' Court appointments ' ,
                     'HE ': ' Handling of economy ', 
                     'L ': 'Luck ' ,
                     'AC ': ' Ability to compromise ',
                     'WR ': ' Willing to take risks ' ,
                     'EAp ': ' Executive appointments ', 
                     'OA ': ' Overall ability ' ,
                     'Im ': ' Imagination ', 
                     'DA ': ' Domestic accomplishments ' ,
                     'Int ': ' Integrity ', 
                     'EAb ': ' Executive ability ' ,
                     'FPA ': ' Foreign policy accomplishments ' ,
                     'LA ': ' Leadership ability ',
                     'IQ ': ' Intelligence ', 
                     'AM ': ' Avoid crucial mistakes ' ,
                     'EV ': " Experts ' view ", 
                     'O ': ' Overall '}.items()})
           .astype({'Party': 'category'})
           .pipe(int64_uint8)
           .assign(Average_rank=lambda df_:(df_.select_dtypes('uint8')
                        .sum(axis=1).rank(method='dense').astype('uint8')),
                   Quartile=lambda df_:pd.cut(df_.Average_rank, 4, 
                                              labels='1st 2nd 3rd 4th'.split())))

In [4]:
df = tweak_sienna(df).drop(columns=['Unnamed: 0'])

In [5]:
df.head()

Unnamed: 0,Seq,President,Party,Bg,Im,Int,IQ,L,WR,AC,...,CAp,HE,EAp,DA,FPA,AM,EV,O,Average_rank,Quartile
0,1,George Washington,Independent,7,7,1,10,1,6,2,...,1,1,1,2,2,1,2,1,1,1st
1,2,John Adams,Federalist,3,13,4,4,24,14,31,...,4,13,15,19,13,16,10,14,12,2nd
2,3,Thomas Jefferson,Democratic-Republican,2,2,14,1,8,5,14,...,7,20,4,6,9,7,5,5,5,1st
3,4,James Madison,Democratic-Republican,4,6,7,3,16,15,6,...,6,14,7,11,19,11,8,7,6,1st
4,5,James Monroe,Democratic-Republican,9,14,11,18,6,16,7,...,11,9,9,10,5,6,9,8,7,1st


### Filtering

In [6]:
mask = df.Average_rank < 10
tmp_df = df[mask]

In [7]:
tmp_df.head()

Unnamed: 0,Seq,President,Party,Bg,Im,Int,IQ,L,WR,AC,...,CAp,HE,EAp,DA,FPA,AM,EV,O,Average_rank,Quartile
0,1,George Washington,Independent,7,7,1,10,1,6,2,...,1,1,1,2,2,1,2,1,1,1st
2,3,Thomas Jefferson,Democratic-Republican,2,2,14,1,8,5,14,...,7,20,4,6,9,7,5,5,5,1st
3,4,James Madison,Democratic-Republican,4,6,7,3,16,15,6,...,6,14,7,11,19,11,8,7,6,1st
4,5,James Monroe,Democratic-Republican,9,14,11,18,6,16,7,...,11,9,9,10,5,6,9,8,7,1st
15,16,Abraham Lincoln,Republican,28,1,2,2,18,1,1,...,3,4,2,1,6,2,1,3,2,1st


In [8]:
# another example
tmp_df = df[mask & (df.Party == 'Republican')]

In [9]:
tmp_df.head()

Unnamed: 0,Seq,President,Party,Bg,Im,Int,IQ,L,WR,AC,...,CAp,HE,EAp,DA,FPA,AM,EV,O,Average_rank,Quartile
15,16,Abraham Lincoln,Republican,28,1,2,2,18,1,1,...,3,4,2,1,6,2,1,3,2,1st
24,26,Theodore Roosevelt,Republican,5,4,8,6,2,2,15,...,9,3,5,4,3,5,4,4,4,1st
32,34,Dwight D. Eisenhower,Republican,11,18,5,17,7,21,5,...,5,6,11,8,7,3,6,6,8,1st


### Filtering using query

In [10]:
# NOTE: query method using lowercase only
tmp_df = df.query('Average_rank < 10 and Party == "Republican"')

In [11]:
tmp_df.head()

Unnamed: 0,Seq,President,Party,Bg,Im,Int,IQ,L,WR,AC,...,CAp,HE,EAp,DA,FPA,AM,EV,O,Average_rank,Quartile
15,16,Abraham Lincoln,Republican,28,1,2,2,18,1,1,...,3,4,2,1,6,2,1,3,2,1st
24,26,Theodore Roosevelt,Republican,5,4,8,6,2,2,15,...,9,3,5,4,3,5,4,4,4,1st
32,34,Dwight D. Eisenhower,Republican,11,18,5,17,7,21,5,...,5,6,11,8,7,3,6,6,8,1st


In [16]:
# NOTE: using @ as variables for query
var = df.Average_rank < 10
tmp_df = df.query('@var and Party == "Republican"')

In [17]:
tmp_df.head()

Unnamed: 0,Seq,President,Party,Bg,Im,Int,IQ,L,WR,AC,...,CAp,HE,EAp,DA,FPA,AM,EV,O,Average_rank,Quartile
15,16,Abraham Lincoln,Republican,28,1,2,2,18,1,1,...,3,4,2,1,6,2,1,3,2,1st
24,26,Theodore Roosevelt,Republican,5,4,8,6,2,2,15,...,9,3,5,4,3,5,4,4,4,1st
32,34,Dwight D. Eisenhower,Republican,11,18,5,17,7,21,5,...,5,6,11,8,7,3,6,6,8,1st


### Indexing by position adn column name

In [21]:
df.iloc[1, 1]

'John Adams'

In [23]:
df.loc[1, 'President']

'John Adams'

In [25]:
df.iloc[[1, 10, 21], [1, 2]]

Unnamed: 0,President,Party
1,John Adams,Federalist
10,James K. Polk,Democratic
21,Grover Cleveland,Democratic


In [26]:
df.loc[[1, 10, 21], ['President', 'Party']]

Unnamed: 0,President,Party
1,John Adams,Federalist
10,James K. Polk,Democratic
21,Grover Cleveland,Democratic
