In [76]:
import pandas as pd
import numpy as np

In [77]:
# List of population and countries
pop = [30.55, 2.77, 39.21]
countries = ['afghanistan', 'albania', 'algeria']

In [78]:
# Extract population of albania
alb = countries.index('albania')
pop[alb]

2.77

This way of extracting values is not adaptable because:
- Not convenient
- Not intutive

Solution is **Dictionaries**

In [79]:
# Dictionary of countries (keys) & population (values) respectively
world = {'afghanistan': 30.55, 'albania': 2.77, 'algeria': 39.21}
world

{'afghanistan': 30.55, 'albania': 2.77, 'algeria': 39.21}

In [80]:
# Extract population of albania
world['albania']

2.77

In [81]:
# Dictionary also accepts multiple data types
multi_dict = {0: 'hello', True: 'dear', 'two': 'world'}
multi_dict

{0: 'hello', True: 'dear', 'two': 'world'}

In [75]:
# Keys are immutable (will error)
dict_err = {['how', 'is', 'it']: 'possible'}
dict_err

TypeError: unhashable type: 'list'

In [None]:
# Right way to assign key to the values
dict_right = {'possible': ['this', 'is', 'it']}
dict_right

{'possible': ['this', 'is', 'it']}

In [None]:
# Add new key-value pair to an existing dictionary
world['turkey'] = 19.60
world

{'afghanistan': 30.55, 'albania': 2.77, 'algeria': 39.21, 'turkey': 19.6}

In [None]:
# Find if the key exist
'turkey' in world

True

In [None]:
# Delete the key-value pair
del(world['afghanistan'])
world


{'albania': 2.77, 'algeria': 39.21, 'turkey': 19.6}

## Pandas (part1)

- high level  data manipulation
- Wes Mckinney
- built on Numpy
- Data Frame
- special concept of Series

In [None]:
# Create lists of dictionary
bricks = {'Country': ['Brazil', 'Russia', 'India', 'China', 'South Africa'],
            'Captial': ['Brazilia', 'Moscow', 'New Delhi', 'Beijing', 'Pretoria'],
            'Area': [8.516, 17.100, 3.286, 9.597, 1.221],
            'Population': [200.40, 143.50, 1252.00, 1357.00, 52.98]}

In [None]:
# Convert bricks into DataFrame
bricks_df = pd.DataFrame(bricks)
bricks_df

Unnamed: 0,Country,Captial,Area,Population
0,Brazil,Brazilia,8.516,200.4
1,Russia,Moscow,17.1,143.5
2,India,New Delhi,3.286,1252.0
3,China,Beijing,9.597,1357.0
4,South Africa,Pretoria,1.221,52.98


In [None]:
# Name the index (row)
bricks_df.index = ['BU', 'RU', 'IN', 'CH', 'SU']
bricks_df

Unnamed: 0,Country,Captial,Area,Population
BU,Brazil,Brazilia,8.516,200.4
RU,Russia,Moscow,17.1,143.5
IN,India,New Delhi,3.286,1252.0
CH,China,Beijing,9.597,1357.0
SU,South Africa,Pretoria,1.221,52.98


In [None]:
# Import csv file into DataFrame
bricks_csv = pd.read_csv('./data/bricks.csv')
bricks_csv

Unnamed: 0.1,Unnamed: 0,country,capital,area,population
0,BR,Brazil,Brasilia,8.516,200.4
1,RU,Russia,Moscow,17.1,143.5
2,IN,India,New Delhi,3.286,1252.0
3,CH,China,Beijing,9.597,1357.0
4,SA,South Africa,Pretoria,1.221,52.98


In [None]:
# index_col=0 to avoid using row as the first column
bricks_csv = pd.read_csv('./data/bricks.csv', index_col=0)
bricks_csv


Unnamed: 0,country,capital,area,population
BR,Brazil,Brasilia,8.516,200.4
RU,Russia,Moscow,17.1,143.5
IN,India,New Delhi,3.286,1252.0
CH,China,Beijing,9.597,1357.0
SA,South Africa,Pretoria,1.221,52.98


## Pandas (part2)

***index and select data***
- square brakets
- advance methods
    - loc
    - iloc

In [None]:
# Extract 'country' to make Series
country_ser =  bricks_csv['country']  # notice we use single square brackets for Series
country_ser

BR          Brazil
RU          Russia
IN           India
CH           China
SA    South Africa
Name: country, dtype: object

In [None]:
# View the 'country' type (it is pandas Series)
type(country_ser), country_ser.ndim

(pandas.core.series.Series, 1)

In [None]:
# Use the same concept but to make DataFrame
country_frame = bricks_csv[['country']]
country_frame

Unnamed: 0,country
BR,Brazil
RU,Russia
IN,India
CH,China
SA,South Africa


In [None]:
# View country_frame type and dimension (it is pandas Dataframe)
type(country_frame), country.ndim

(pandas.core.frame.DataFrame, 1)

In [None]:
bricks_csv[['country', 'capital']]

Unnamed: 0,country,capital
BR,Brazil,Brasilia
RU,Russia,Moscow
IN,India,New Delhi
CH,China,Beijing
SA,South Africa,Pretoria


In [None]:
# Row can be accessed using indexing
bricks_csv[0:2]

Unnamed: 0,country,capital,area,population
BR,Brazil,Brasilia,8.516,200.4
RU,Russia,Moscow,17.1,143.5


In [None]:
bricks_csv[1:4]

Unnamed: 0,country,capital,area,population
RU,Russia,Moscow,17.1,143.5
IN,India,New Delhi,3.286,1252.0
CH,China,Beijing,9.597,1357.0


In [None]:
bricks_csv[:4]

Unnamed: 0,country,capital,area,population
BR,Brazil,Brasilia,8.516,200.4
RU,Russia,Moscow,17.1,143.5
IN,India,New Delhi,3.286,1252.0
CH,China,Beijing,9.597,1357.0


In [None]:
bricks_csv[2:]

Unnamed: 0,country,capital,area,population
IN,India,New Delhi,3.286,1252.0
CH,China,Beijing,9.597,1357.0
SA,South Africa,Pretoria,1.221,52.98


In [None]:
# Indexing can't be used to access pandas DataFrame rows & the columns (will error)
bricks_csv[2:, :]

TypeError: '(slice(2, None, None), slice(None, None, None))' is an invalid key

### Question What if we want specific columns and rows ?

#### Key points about [ ] square brakets

- square brakets have limited functionality
- ideally 
    - Remember **2D Numpy array** in numpy
    - **my_arr[row, col]**
- pandas
    - loc(label based)
    - iloc (index based)

In [None]:
bricks_csv

Unnamed: 0,country,capital,area,population
BR,Brazil,Brasilia,8.516,200.4
RU,Russia,Moscow,17.1,143.5
IN,India,New Delhi,3.286,1252.0
CH,China,Beijing,9.597,1357.0
SA,South Africa,Pretoria,1.221,52.98


In [None]:
# Access information using loc
bricks_csv.loc['RU']  # access row as Series

country       Russia
capital       Moscow
area            17.1
population     143.5
Name: RU, dtype: object

In [None]:
bricks_csv.loc[['RU']]   # access row as DataFrame

Unnamed: 0,country,capital,area,population
RU,Russia,Moscow,17.1,143.5


In [None]:
# Extract specific rows
bricks_csv.loc[['RU', 'CH']]

Unnamed: 0,country,capital,area,population
RU,Russia,Moscow,17.1,143.5
CH,China,Beijing,9.597,1357.0


In [None]:
# Extract rows & columns
bricks_csv.loc[['RU', 'CH'], ['country', 'population']]

Unnamed: 0,country,population
RU,Russia,143.5
CH,China,1357.0


In [None]:
# Slicing through rows
bricks_csv.loc['RU':'CH']

Unnamed: 0,country,capital,area,population
RU,Russia,Moscow,17.1,143.5
IN,India,New Delhi,3.286,1252.0
CH,China,Beijing,9.597,1357.0


In [None]:
bricks_csv.loc['RU':'CH', 'capital']

RU       Moscow
IN    New Delhi
CH      Beijing
Name: capital, dtype: object

In [None]:
bricks_csv.loc['RU':'CH', 'capital':'population']

Unnamed: 0,capital,area,population
RU,Moscow,17.1,143.5
IN,New Delhi,3.286,1252.0
CH,Beijing,9.597,1357.0


In [None]:
bricks_csv.loc[:, ['capital','area']]

Unnamed: 0,capital,area
BR,Brasilia,8.516
RU,Moscow,17.1
IN,New Delhi,3.286
CH,Beijing,9.597
SA,Pretoria,1.221


### QUICK Recap

- Square brakets

    - column access --> **bricks.csv[['country', 'capital']]**
    - row access (only through index slicing) --> **bricks_csv[1:4]**
    

    
- loc(label-based) remeber arr[rows, columns]
    - row access --> **bricks_csv.loc[['BR', 'IN']]**
    - columns access --> **bricks_csv.loc[:['country', 'capital']]**
    - both rows and columns --> **bricks_csv.loc[['BR', 'IN'], ['country', 'capital']]**

### Row access with iloc

In [None]:
bricks_csv

Unnamed: 0,country,capital,area,population
BR,Brazil,Brasilia,8.516,200.4
RU,Russia,Moscow,17.1,143.5
IN,India,New Delhi,3.286,1252.0
CH,China,Beijing,9.597,1357.0
SA,South Africa,Pretoria,1.221,52.98


In [None]:
bricks_csv.iloc[[1]]

Unnamed: 0,country,capital,area,population
RU,Russia,Moscow,17.1,143.5


In [None]:
# Fancing indexing
bricks_csv.iloc[[1, 3, 4]]

Unnamed: 0,country,capital,area,population
RU,Russia,Moscow,17.1,143.5
CH,China,Beijing,9.597,1357.0
SA,South Africa,Pretoria,1.221,52.98


In [None]:
bricks_csv

Unnamed: 0,country,capital,area,population
BR,Brazil,Brasilia,8.516,200.4
RU,Russia,Moscow,17.1,143.5
IN,India,New Delhi,3.286,1252.0
CH,China,Beijing,9.597,1357.0
SA,South Africa,Pretoria,1.221,52.98


In [None]:
bricks_csv.iloc[[1, 3, 4], [0, 3]]

Unnamed: 0,country,population
RU,Russia,143.5
CH,China,1357.0
SA,South Africa,52.98


In [None]:
bricks_csv.iloc[:, [0,3]]

Unnamed: 0,country,population
BR,Brazil,200.4
RU,Russia,143.5
IN,India,1252.0
CH,China,1357.0
SA,South Africa,52.98


In [None]:
bricks_csv.iloc[[1, 3, 4]]

Unnamed: 0,country,capital,area,population
RU,Russia,Moscow,17.1,143.5
CH,China,Beijing,9.597,1357.0
SA,South Africa,Pretoria,1.221,52.98


In [None]:
# It is same as above
bricks_csv.iloc[[1, 3, 4], :]

Unnamed: 0,country,capital,area,population
RU,Russia,Moscow,17.1,143.5
CH,China,Beijing,9.597,1357.0
SA,South Africa,Pretoria,1.221,52.98
