### Set-up

In [1]:
import pandas as pd
import numpy as np

In [4]:
adult_income = pd.read_csv("adult-income.csv")
adult_income = adult_income.rename(columns={"39": "Age", " State-gov": "Workclass", " 77516": "Final_weight", 
                             " Bachelors": "Education", " 13": "Education_num", " Never-married": "Marital_status", 
                             " Adm-clerical": "Occupation", " Not-in-family": "Relationship", " White": "Race", 
                             " Male": "Gender", " 2174": "Capital_gain", " 0": "Capital_loss", 
                             " 40": "hours-per-week", " United-States": "Native_country", 
                             " <=50K": "Income_bracket"})
adult_income.head()

Unnamed: 0,Age,Workclass,Final_weight,Education,Education_num,Marital_status,Occupation,Relationship,Race,Gender,Capital_gain,Capital_loss,hours-per-week,Native_country,Income_bracket
0,50,Self-emp-not-inc,83311,Bachelors,13,Married-civ-spouse,Exec-managerial,Husband,White,Male,0,0,13,United-States,<=50K
1,38,Private,215646,HS-grad,9,Divorced,Handlers-cleaners,Not-in-family,White,Male,0,0,40,United-States,<=50K
2,53,Private,234721,11th,7,Married-civ-spouse,Handlers-cleaners,Husband,Black,Male,0,0,40,United-States,<=50K
3,28,Private,338409,Bachelors,13,Married-civ-spouse,Prof-specialty,Wife,Black,Female,0,0,40,Cuba,<=50K
4,37,Private,284582,Masters,14,Married-civ-spouse,Exec-managerial,Wife,White,Female,0,0,40,United-States,<=50K


### Indexing and Slicing

In [9]:
selected_col = adult_income["Income_bracket"]
 
# display first few elements
selected_col.head()

0     <=50K
1     <=50K
2     <=50K
3     <=50K
4     <=50K
Name: Income_bracket, dtype: object

In [11]:
# form a list of column labels 
cols = ["Education", "Gender", "hours-per-week", "Income_bracket"]
 
# form the subset
subset = adult_income[cols]
 
# display first few rows
subset.head()

Unnamed: 0,Education,Gender,hours-per-week,Income_bracket
0,Bachelors,Male,13,<=50K
1,HS-grad,Male,40,<=50K
2,11th,Male,40,<=50K
3,Bachelors,Female,40,<=50K
4,Masters,Female,40,<=50K


In [13]:
cols = ["Education"]
subset = adult_income[cols]
subset.head()

Unnamed: 0,Education
0,Bachelors
1,HS-grad
2,11th
3,Bachelors
4,Masters


In [14]:
selected_col = adult_income.Education
 
# display first few elements
selected_col.head()

0     Bachelors
1       HS-grad
2          11th
3     Bachelors
4       Masters
Name: Education, dtype: object

In [16]:
# subset of DataFrame from row 5 to row 10
adult_income[5:11]

Unnamed: 0,Age,Workclass,Final_weight,Education,Education_num,Marital_status,Occupation,Relationship,Race,Gender,Capital_gain,Capital_loss,hours-per-week,Native_country,Income_bracket
5,49,Private,160187,9th,5,Married-spouse-absent,Other-service,Not-in-family,Black,Female,0,0,16,Jamaica,<=50K
6,52,Self-emp-not-inc,209642,HS-grad,9,Married-civ-spouse,Exec-managerial,Husband,White,Male,0,0,45,United-States,>50K
7,31,Private,45781,Masters,14,Never-married,Prof-specialty,Not-in-family,White,Female,14084,0,50,United-States,>50K
8,42,Private,159449,Bachelors,13,Married-civ-spouse,Exec-managerial,Husband,White,Male,5178,0,40,United-States,>50K
9,37,Private,280464,Some-college,10,Married-civ-spouse,Exec-managerial,Husband,Black,Male,0,0,80,United-States,>50K
10,30,State-gov,141297,Bachelors,13,Married-civ-spouse,Prof-specialty,Husband,Asian-Pac-Islander,Male,0,0,40,India,>50K


In [18]:
# subset of DataFrame from row 1 to row 5
adult_income[:6]

Unnamed: 0,Age,Workclass,Final_weight,Education,Education_num,Marital_status,Occupation,Relationship,Race,Gender,Capital_gain,Capital_loss,hours-per-week,Native_country,Income_bracket
0,50,Self-emp-not-inc,83311,Bachelors,13,Married-civ-spouse,Exec-managerial,Husband,White,Male,0,0,13,United-States,<=50K
1,38,Private,215646,HS-grad,9,Divorced,Handlers-cleaners,Not-in-family,White,Male,0,0,40,United-States,<=50K
2,53,Private,234721,11th,7,Married-civ-spouse,Handlers-cleaners,Husband,Black,Male,0,0,40,United-States,<=50K
3,28,Private,338409,Bachelors,13,Married-civ-spouse,Prof-specialty,Wife,Black,Female,0,0,40,Cuba,<=50K
4,37,Private,284582,Masters,14,Married-civ-spouse,Exec-managerial,Wife,White,Female,0,0,40,United-States,<=50K
5,49,Private,160187,9th,5,Married-spouse-absent,Other-service,Not-in-family,Black,Female,0,0,16,Jamaica,<=50K


In [20]:
# the last row 
adult_income[-1:]

Unnamed: 0,Age,Workclass,Final_weight,Education,Education_num,Marital_status,Occupation,Relationship,Race,Gender,Capital_gain,Capital_loss,hours-per-week,Native_country,Income_bracket
32559,52,Self-emp-inc,287927,HS-grad,9,Married-civ-spouse,Exec-managerial,Wife,White,Female,15024,0,40,United-States,>50K


### Indexing and slicing with iloc

In [21]:
subset = adult_income.iloc[[0,1,2,5], :]
subset

Unnamed: 0,Age,Workclass,Final_weight,Education,Education_num,Marital_status,Occupation,Relationship,Race,Gender,Capital_gain,Capital_loss,hours-per-week,Native_country,Income_bracket
0,50,Self-emp-not-inc,83311,Bachelors,13,Married-civ-spouse,Exec-managerial,Husband,White,Male,0,0,13,United-States,<=50K
1,38,Private,215646,HS-grad,9,Divorced,Handlers-cleaners,Not-in-family,White,Male,0,0,40,United-States,<=50K
2,53,Private,234721,11th,7,Married-civ-spouse,Handlers-cleaners,Husband,Black,Male,0,0,40,United-States,<=50K
5,49,Private,160187,9th,5,Married-spouse-absent,Other-service,Not-in-family,Black,Female,0,0,16,Jamaica,<=50K


In [22]:
subset = adult_income.iloc[:, 0:3] 
subset.head()

Unnamed: 0,Age,Workclass,Final_weight
0,50,Self-emp-not-inc,83311
1,38,Private,215646
2,53,Private,234721
3,28,Private,338409
4,37,Private,284582


In [23]:
column = adult_income.iloc[:, 2]
column.head()

0     83311
1    215646
2    234721
3    338409
4    284582
Name: Final_weight, dtype: int64

In [24]:
row = adult_income.iloc[4, :]
row

Age                                37
Workclass                     Private
Final_weight                   284582
Education                     Masters
Education_num                      14
Marital_status     Married-civ-spouse
Occupation            Exec-managerial
Relationship                     Wife
Race                            White
Gender                         Female
Capital_gain                        0
Capital_loss                        0
hours-per-week                     40
Native_country          United-States
Income_bracket                  <=50K
Name: 4, dtype: object

In [25]:
value = adult_income.iloc[4, 2]
value

284582

### Indexing and slicing with loc

In [26]:
subset = adult_income.loc[10:15, 'Education_num':'hours-per-week']
subset

Unnamed: 0,Education_num,Marital_status,Occupation,Relationship,Race,Gender,Capital_gain,Capital_loss,hours-per-week
10,13,Married-civ-spouse,Prof-specialty,Husband,Asian-Pac-Islander,Male,0,0,40
11,13,Never-married,Adm-clerical,Own-child,White,Female,0,0,30
12,12,Never-married,Sales,Not-in-family,Black,Male,0,0,50
13,11,Married-civ-spouse,Craft-repair,Husband,Asian-Pac-Islander,Male,0,0,40
14,4,Married-civ-spouse,Transport-moving,Husband,Amer-Indian-Eskimo,Male,0,0,45
15,9,Never-married,Farming-fishing,Own-child,White,Male,0,0,35


In [29]:
subset = adult_income.loc[[2, 4, 6, 8], :]
subset

Unnamed: 0,Age,Workclass,Final_weight,Education,Education_num,Marital_status,Occupation,Relationship,Race,Gender,Capital_gain,Capital_loss,hours-per-week,Native_country,Income_bracket
2,53,Private,234721,11th,7,Married-civ-spouse,Handlers-cleaners,Husband,Black,Male,0,0,40,United-States,<=50K
4,37,Private,284582,Masters,14,Married-civ-spouse,Exec-managerial,Wife,White,Female,0,0,40,United-States,<=50K
6,52,Self-emp-not-inc,209642,HS-grad,9,Married-civ-spouse,Exec-managerial,Husband,White,Male,0,0,45,United-States,>50K
8,42,Private,159449,Bachelors,13,Married-civ-spouse,Exec-managerial,Husband,White,Male,5178,0,40,United-States,>50K


In [30]:
df_100 = adult_income.iloc[100:, :]
# display first few rows
df_100.head()

Unnamed: 0,Age,Workclass,Final_weight,Education,Education_num,Marital_status,Occupation,Relationship,Race,Gender,Capital_gain,Capital_loss,hours-per-week,Native_country,Income_bracket
100,44,Private,198282,Bachelors,13,Married-civ-spouse,Exec-managerial,Husband,White,Male,15024,0,60,United-States,>50K
101,47,Self-emp-not-inc,149116,Masters,14,Never-married,Prof-specialty,Not-in-family,White,Female,0,0,50,United-States,<=50K
102,20,Private,188300,Some-college,10,Never-married,Tech-support,Own-child,White,Female,0,0,40,United-States,<=50K
103,29,Private,103432,HS-grad,9,Never-married,Craft-repair,Not-in-family,White,Male,0,0,40,United-States,<=50K
104,32,Self-emp-inc,317660,HS-grad,9,Married-civ-spouse,Craft-repair,Husband,White,Male,7688,0,40,United-States,>50K


In [31]:
print('.loc[105:110, :] -- gives rows matching that *index*')
print(df_100.loc[105:110, :])
print('') 
print('[105:110] -- (without loc) gives rows from *position* 105 to 110')
print(df_100[105:110])
print('')
print('iloc[105:110, :] -- also gives rows from *position* 105 to 110')
print(df_100.iloc[105:110, :])

.loc[105:110, :] -- gives rows matching that *index*
     Age   Workclass  Final_weight     Education  Education_num  \
105   17           ?        304873          10th              6   
106   30     Private        194901          11th              7   
107   31   Local-gov        189265       HS-grad              9   
108   42     Private        124692       HS-grad              9   
109   24     Private        432376     Bachelors             13   
110   38     Private         65324   Prof-school             15   

          Marital_status          Occupation     Relationship    Race  \
105        Never-married                   ?        Own-child   White   
106        Never-married   Handlers-cleaners        Own-child   White   
107        Never-married        Adm-clerical    Not-in-family   White   
108   Married-civ-spouse   Handlers-cleaners          Husband   White   
109        Never-married               Sales   Other-relative   White   
110   Married-civ-spouse      Prof-spec

### Single value with loc

In [33]:
column = adult_income.loc[:, "Gender"]
column.head()

0       Male
1       Male
2       Male
3     Female
4     Female
Name: Gender, dtype: object

In [34]:
row = adult_income.loc[2, :] 
row

Age                                53
Workclass                     Private
Final_weight                   234721
Education                        11th
Education_num                       7
Marital_status     Married-civ-spouse
Occupation          Handlers-cleaners
Relationship                  Husband
Race                            Black
Gender                           Male
Capital_gain                        0
Capital_loss                        0
hours-per-week                     40
Native_country          United-States
Income_bracket                  <=50K
Name: 2, dtype: object

In [37]:
print('sex of person at row index 2')
print(adult_income.loc[2, 'Gender'])
print('education of person at row index 4')
print(adult_income.loc[4, 'Education'])

sex of person at row index 2
 Male
education of person at row index 4
 Masters


### Chained Indexing

In [38]:
subset = adult_income.iloc[100:, :].iloc[105:110, :]
subset

Unnamed: 0,Age,Workclass,Final_weight,Education,Education_num,Marital_status,Occupation,Relationship,Race,Gender,Capital_gain,Capital_loss,hours-per-week,Native_country,Income_bracket
205,36,Private,128757,Bachelors,13,Married-civ-spouse,Other-service,Husband,Black,Male,7298,0,36,United-States,>50K
206,35,Private,36270,HS-grad,9,Divorced,Craft-repair,Not-in-family,White,Male,0,0,60,United-States,<=50K
207,58,Self-emp-inc,210563,HS-grad,9,Married-civ-spouse,Sales,Wife,White,Female,15024,0,35,United-States,>50K
208,17,Private,65368,11th,7,Never-married,Sales,Own-child,White,Female,0,0,12,United-States,<=50K
209,44,Local-gov,160943,HS-grad,9,Married-civ-spouse,Transport-moving,Husband,Black,Male,0,0,40,United-States,<=50K


In [40]:
subset = adult_income[25:30][['Education', 'Income_bracket']]
subset

Unnamed: 0,Education,Income_bracket
25,HS-grad,<=50K
26,Some-college,>50K
27,HS-grad,<=50K
28,HS-grad,<=50K
29,Assoc-acdm,<=50K


In [41]:
print(adult_income.iloc[10, :].loc[:, 'hours_per_week'])

IndexingError: Too many indexers

In [43]:
print('selecting the hours_per_week value from row at position 10')
print(adult_income.iloc[10, :].loc['hours-per-week'])

selecting the hours_per_week value from row at position 10
40
