## <span style="color:darkred"> Subsetting Dataframes </span>

In [3]:
# Load the pandas package
import pandas as pd

In [9]:
# Load the country.csv data
country = pd.read_csv('country_subset.csv')

In [11]:
# Display the country dataframe
country

Unnamed: 0,Name,Continent,Population
0,Afghanistan,Asia,22720000
1,Albania,Europe,3401200
2,Algeria,Africa,31471000
3,American Samoa,Oceania,68000
4,Andorra,Europe,78000
...,...,...,...
234,Western Sahara,Africa,293000
235,Yemen,Asia,18112000
236,Yugoslavia,Europe,10640000
237,Zambia,Africa,9169000


In [23]:
# Display the country dataframe
country.head()
country.tail()

Unnamed: 0,Name,Continent,Population
234,Western Sahara,Africa,293000
235,Yemen,Asia,18112000
236,Yugoslavia,Europe,10640000
237,Zambia,Africa,9169000
238,Zimbabwe,Africa,11669000


In [15]:
# Select the 'Name' column
country['Name']

0         Afghanistan
1             Albania
2             Algeria
3      American Samoa
4             Andorra
            ...      
234    Western Sahara
235             Yemen
236        Yugoslavia
237            Zambia
238          Zimbabwe
Name: Name, Length: 239, dtype: object

### <span style="color:orangered">Show the type of *country['Name']* </span>

In [17]:
# Put an extra bracket around the column label to return a dataframe
country[['Name']]

Unnamed: 0,Name
0,Afghanistan
1,Albania
2,Algeria
3,American Samoa
4,Andorra
...,...
234,Western Sahara
235,Yemen
236,Yugoslavia
237,Zambia


### <span style="color:orangered">Show the type of *country[['Name']]* </span>

In [33]:
#country[['Name'].type] ❌
type(country[['Name','Population']])

pandas.core.frame.DataFrame

## <span style = "color:darkred"> Subsetting a Dataframe using either the `iloc()` or `loc()` method. </span>
- use iloc() when using integer indices to subset the dataframe
- use loc() when using names to subset the dataframe.

### `pandas.DataFrame.iloc()`
https://pandas.pydata.org/pandas-docs/version/2.0/reference/api/pandas.DataFrame.iloc.html 


`.iloc[]` is primarily integer position based (from 0 to length-1 of the axis), but may also be used with a boolean array.

Allowed inputs are:

- An integer, e.g. 5.
- A list or array of integers, e.g. [4, 3, 0].
- A slice object with ints, e.g. 1:7.
- ...

###  Use iloc() when using integer indices to subset the dataframe.

In [35]:
# Select the element in row 0 and column 1
country.iloc[0, 1] # row 0, column 1
#在 iloc 中，切片是 不包含结束索引 的。这与 loc 不同，loc 会包括结束标签

'Asia'

In [37]:
# Select rows 0 and 1 and column 1
country.iloc[0:2, 1] # rows 0 and 1, column 1

0      Asia
1    Europe
Name: Continent, dtype: object

In [39]:
# Select the 'Name' and 'Continent' columns. 
country.iloc[[2,3,4], :] # row 2, 3, and 4, all columns

Unnamed: 0,Name,Continent,Population
2,Algeria,Africa,31471000
3,American Samoa,Oceania,68000
4,Andorra,Europe,78000


In [None]:
# Select all rows before row 7 and columns 1 thru 2
country.iloc[:7, 1:3] # rows 0 through 6, columns 1 and 2

Unnamed: 0,Continent,Population
0,Asia,22720000
1,Europe,3401200
2,Africa,31471000
3,Oceania,68000
4,Europe,78000
5,Africa,12878000
6,North America,8000


In [None]:
# Select rows 10 thru 20 and all columns from column 1 onwards
country.iloc[10:15, 1:] # rows 10 through 14, columns 1 through the end (col1 and col2)

Unnamed: 0,Continent,Population
10,Asia,3520000
11,North America,103000
12,Oceania,18886000
13,Europe,8091800
14,Asia,7734000


### <span style="color:orangered"> Practice 1 </span>
Show all rows and the "country" and "Polulation" columns.

In [49]:
country.iloc[:, [0,2]]


Unnamed: 0,Name,Population
0,Afghanistan,22720000
1,Albania,3401200
2,Algeria,31471000
3,American Samoa,68000
4,Andorra,78000
...,...,...
234,Western Sahara,293000
235,Yemen,18112000
236,Yugoslavia,10640000
237,Zambia,9169000


In [51]:
country.iloc[2:5, :]

Unnamed: 0,Name,Continent,Population
2,Algeria,Africa,31471000
3,American Samoa,Oceania,68000
4,Andorra,Europe,78000


### `pandas.DataFrame.loc()`
https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.loc.html

.loc[] is primarily label based, but may also be used with a boolean array.

Allowed inputs are:

- A single label, e.g. 5 or 'a', (note that 5 is interpreted as a label of the index, and never as an integer position along the index).
- A list or array of labels, e.g. ['a', 'b', 'c'].
- A slice object with labels, e.g. 'a':'f'.
- ...

###  Use loc() when using names to subset the dataframe.

In [None]:
# Select rows 10 thru 20 and the Continent and Population columns
country.loc[10:20, ['Continent', 'Population']] # rows 10 through 20, columns 'Continent' and 'Population'

Unnamed: 0,Continent,Population
10,Asia,3520000
11,North America,103000
12,Oceania,18886000
13,Europe,8091800
14,Asia,7734000
15,North America,307000
16,Asia,617000
17,Asia,129155000
18,North America,270000
19,Europe,10236000


In [None]:
country.iloc[-5:,[0,2]] # last 5 rows, first and third columns

Unnamed: 0,Name,Population
234,Western Sahara,293000
235,Yemen,18112000
236,Yugoslavia,10640000
237,Zambia,9169000
238,Zimbabwe,11669000


### <span style="color:orangered">Practice 2:  </span>
Use loc() to show all rows and the "country" and "Polulation" columns.

In [61]:
country.loc[:, ['Name','Continent', 'Population']]
#country.loc[:, 1:3]❌

Unnamed: 0,Name,Continent,Population
0,Afghanistan,Asia,22720000
1,Albania,Europe,3401200
2,Algeria,Africa,31471000
3,American Samoa,Oceania,68000
4,Andorra,Europe,78000
...,...,...,...
234,Western Sahara,Africa,293000
235,Yemen,Asia,18112000
236,Yugoslavia,Europe,10640000
237,Zambia,Africa,9169000


### Subsetting only columns. 
- use [[ ]] to get one or more columns
- use pandas.DataFrame.loc()
- use pandas.DataFrame.iloc()

In [11]:
# Select the 'Name' and 'Continent' columns. 
country[['Name', 'Continent']] # Note: The column labels are passed as a list.

Unnamed: 0,Name,Continent
0,Afghanistan,Asia
1,Albania,Europe
2,Algeria,Africa
3,American Samoa,Oceania
4,Andorra,Europe
...,...,...
234,Western Sahara,Africa
235,Yemen,Asia
236,Yugoslavia,Europe
237,Zambia,Africa


### To return a dataframe with only one column, use `[[]]`.

In [18]:
# Select the 'Name' and 'Continent' columns. 
country[['Name']].head() #The list has only one element, so it returns a dataframe.

Unnamed: 0,Name
0,Afghanistan
1,Albania
2,Algeria
3,American Samoa
4,Andorra


### To return a series with only one column, use `[]`

In [19]:
# Select the 'Name' and 'Continent' columns. 
country['Name'].head() #The list has only one element, so it returns a series.

0       Afghanistan
1           Albania
2           Algeria
3    American Samoa
4           Andorra
Name: Name, dtype: object

In [37]:
# Select the 'Name' and 'Continent' columns. 
country.loc[:,['Name', 'Continent']] #all rows, columns 'Name' and 'Continent'

Unnamed: 0,Name,Continent
0,Afghanistan,Asia
1,Albania,Europe
2,Algeria,Africa
3,American Samoa,Oceania
4,Andorra,Europe
...,...,...
234,Western Sahara,Africa
235,Yemen,Asia
236,Yugoslavia,Europe
237,Zambia,Africa


In [38]:
# Select the 'Name' and 'Continent' columns. 
country.iloc[:,:2] # all rows, first two columns

Unnamed: 0,Name,Continent
0,Afghanistan,Asia
1,Albania,Europe
2,Algeria,Africa
3,American Samoa,Oceania
4,Andorra,Europe
...,...,...
234,Western Sahara,Africa
235,Yemen,Asia
236,Yugoslavia,Europe
237,Zambia,Africa


### Subsetting only rows. 
- use single [ ]
- use pandas.DataFrame.iloc()
- use pandas.DataFrame.loc()

In [None]:
# Select the first 5 rows. 
country[0:5]

Unnamed: 0,Name,Continent,Population
0,Afghanistan,Asia,22720000
1,Albania,Europe,3401200
2,Algeria,Africa,31471000
3,American Samoa,Oceania,68000
4,Andorra,Europe,78000


In [None]:
# Select the first 5 rows. 
country.iloc[0:5]

Unnamed: 0,Name,Continent,Population
0,Afghanistan,Asia,22720000
1,Albania,Europe,3401200
2,Algeria,Africa,31471000
3,American Samoa,Oceania,68000
4,Andorra,Europe,78000


In [None]:
# Select the first 5 rows. 
country.loc[0:5] # Note: This will return the first 6 rows, as index values are considered labels.

Unnamed: 0,Name,Continent,Population
0,Afghanistan,Asia,22720000
1,Albania,Europe,3401200
2,Algeria,Africa,31471000
3,American Samoa,Oceania,68000
4,Andorra,Europe,78000
5,Angola,Africa,12878000
