### Importing basic libraries

In [1]:
import pandas as pd

### Creating a pd.DataFrame from a dictionary

In [3]:
# Pre-defined lists
names = ['United States', 'Australia', 'Japan', 'India', 'Russia', 'Morocco', 'Egypt']
dr =  [True, False, False, False, True, True, True]
cpc = [809, 731, 588, 18, 200, 70, 45]

# Create dictionary my_dict with three key:value pairs: my_dict
my_dict = {'country':names, 'drives_right':dr, 'cars_per_cap':cpc}

# Build a DataFrame cars from my_dict: cars
cars = pd.DataFrame(my_dict)

# Print cars
print(cars)

         country  drives_right  cars_per_cap
0  United States          True           809
1      Australia         False           731
2          Japan         False           588
3          India         False            18
4         Russia          True           200
5        Morocco          True            70
6          Egypt          True            45


In [4]:
# Definition of row_labels
row_labels = ['US', 'AUS', 'JPN', 'IN', 'RU', 'MOR', 'EG']

# Specify row labels of cars
cars.index = row_labels

# Print cars
print(cars)

           country  drives_right  cars_per_cap
US   United States          True           809
AUS      Australia         False           731
JPN          Japan         False           588
IN           India         False            18
RU          Russia          True           200
MOR        Morocco          True            70
EG           Egypt          True            45


### Creating a pd.DataFrame from a .csv

In [17]:
# Importing a .csv from a url
url = 'https://raw.githubusercontent.com/GabrielReisR/R/master/estrutura%20de%20dados/dados/libraries.csv'
libraries = pd.read_csv(url, encoding = 'latin_1')

libraries

Unnamed: 0.1,Unnamed: 0,Country,Region,Expenditures \r\r\n(US Dollars),Total Libraries,Total Librarians,Total Volumes,Total Users
0,1,Afghanistan,Asia,,150,,577300,48133.0
1,2,Albania,Europe,134036.0,2107,32.0,12398795,581298.0
2,3,Algeria,Africa,641176.0,760,56.0,4363718,1430.0
3,4,American Samoa,Oceania,,32,,170009,
4,5,Andorra,Europe,5777651.0,39,6.0,222869,16596.0
...,...,...,...,...,...,...,...,...
214,215,"Virgin Islands, British",Latin America,,7,,36000,8815.0
215,216,"Virgin Islands, U.S.",Latin America,,45,,495810,
216,217,Yemen,Middle East,,12,,360500,
217,218,Zambia,Africa,6050.0,247,,4226783,950.0


In [16]:
# Fixing the indexing
libraries = pd.read_csv(url, encoding = 'latin_1', index_col = 0)

libraries

Unnamed: 0,Country,Region,Expenditures \r\r\n(US Dollars),Total Libraries,Total Librarians,Total Volumes,Total Users
1,Afghanistan,Asia,,150,,577300,48133.0
2,Albania,Europe,134036.0,2107,32.0,12398795,581298.0
3,Algeria,Africa,641176.0,760,56.0,4363718,1430.0
4,American Samoa,Oceania,,32,,170009,
5,Andorra,Europe,5777651.0,39,6.0,222869,16596.0
...,...,...,...,...,...,...,...
215,"Virgin Islands, British",Latin America,,7,,36000,8815.0
216,"Virgin Islands, U.S.",Latin America,,45,,495810,
217,Yemen,Middle East,,12,,360500,
218,Zambia,Africa,6050.0,247,,4226783,950.0


In [24]:
# Fixing the column names
libraries = pd.read_csv(url, 
                        encoding = 'latin_1',
                        index_col = 0,
                        names = ['country', 'region', 'invests_us_dollars',
                                 'libraries_total', 'librarians_total', 'volumes_total', 'users_total'],
                        header = 0 # this is needed when overriding column names
                       )

libraries

Unnamed: 0,country,region,invests_us_dollars,libraries_total,librarians_total,volumes_total,users_total
1,Afghanistan,Asia,,150,,577300,48133.0
2,Albania,Europe,134036.0,2107,32.0,12398795,581298.0
3,Algeria,Africa,641176.0,760,56.0,4363718,1430.0
4,American Samoa,Oceania,,32,,170009,
5,Andorra,Europe,5777651.0,39,6.0,222869,16596.0
...,...,...,...,...,...,...,...
215,"Virgin Islands, British",Latin America,,7,,36000,8815.0
216,"Virgin Islands, U.S.",Latin America,,45,,495810,
217,Yemen,Middle East,,12,,360500,
218,Zambia,Africa,6050.0,247,,4226783,950.0


### Accessing and subsetting rows and columns

In [27]:
# Print out one column as Pandas Series
print(libraries['country'])

# Print out one column as Pandas DataFrame
print(libraries[['country']])

# Print out DataFrame with two columns
print(libraries[['country', 'region']])

1                  Afghanistan
2                      Albania
3                      Algeria
4               American Samoa
5                      Andorra
                ...           
215    Virgin Islands, British
216       Virgin Islands, U.S.
217                      Yemen
218                     Zambia
219                   Zimbabwe
Name: country, Length: 219, dtype: object
                     country
1                Afghanistan
2                    Albania
3                    Algeria
4             American Samoa
5                    Andorra
..                       ...
215  Virgin Islands, British
216     Virgin Islands, U.S.
217                    Yemen
218                   Zambia
219                 Zimbabwe

[219 rows x 1 columns]
                     country         region
1                Afghanistan           Asia
2                    Albania         Europe
3                    Algeria         Africa
4             American Samoa        Oceania
5                    Ando

In [29]:
# Print out first 3 observations
print(libraries[0:3]) # in pandas, the interval is INCLUSIVE

# Print out fourth, fifth and sixth observation
libraries[3:6] #

       country  region  invests_us_dollars  libraries_total  librarians_total  \
1  Afghanistan    Asia                 NaN              150               NaN   
2      Albania  Europe            134036.0             2107              32.0   
3      Algeria  Africa            641176.0              760              56.0   

   volumes_total  users_total  
1         577300      48133.0  
2       12398795     581298.0  
3        4363718       1430.0  


Unnamed: 0,country,region,invests_us_dollars,libraries_total,librarians_total,volumes_total,users_total
4,American Samoa,Oceania,,32,,170009,
5,Andorra,Europe,5777651.0,39,6.0,222869,16596.0
6,Angola,Africa,,62,,392870,


#### Using loc and iloc

In [37]:
'''
While loc can 'localize' rows or columns with a name, iloc can 'localize indexes' by its location.

That said, loc is used when we have a name we want to call in rows or columns.
For that to work, our columns or rows must have names. Otherwise, we're using the same thing as iloc.
However, while using loc, we can not create a pd.DataFrame by subsetting using intervals (because it 
doesn't make sense to subset 'name1' through 'name2')
'''

# USING LOC: Print out first 3 observations as a series
print(libraries.loc[1:3])

       country  region  invests_us_dollars  libraries_total  librarians_total  \
1  Afghanistan    Asia                 NaN              150               NaN   
2      Albania  Europe            134036.0             2107              32.0   
3      Algeria  Africa            641176.0              760              56.0   

   volumes_total  users_total  
1         577300      48133.0  
2       12398795     581298.0  
3        4363718       1430.0  


In [43]:
# USING LOC: Print out first 3 observations as a pd.DataFrame
print(libraries.loc[[1, 2, 3]]) # a comma is needed here

# IMPORTANT: while loc prints the first row with '1', in iloc we need to specify '0'
# This happens in loc because it considers the column name as an element 0 by default

       country  region  invests_us_dollars  libraries_total  librarians_total  \
1  Afghanistan    Asia                 NaN              150               NaN   
2      Albania  Europe            134036.0             2107              32.0   
3      Algeria  Africa            641176.0              760              56.0   

   volumes_total  users_total  
1         577300      48133.0  
2       12398795     581298.0  
3        4363718       1430.0  


In [39]:
# USING ILOC: Print out first 3 observations as a series
print(libraries.iloc[1:3])

   country  region  invests_us_dollars  libraries_total  librarians_total  \
2  Albania  Europe            134036.0             2107              32.0   
3  Algeria  Africa            641176.0              760              56.0   

   volumes_total  users_total  
2       12398795     581298.0  
3        4363718       1430.0  


In [49]:
# USING ILOC: Print out first 3 observations as a pd.DataFrame
print(libraries.iloc[[0, 1, 2]]) 

       country  region  invests_us_dollars  libraries_total  librarians_total  \
1  Afghanistan    Asia                 NaN              150               NaN   
2      Albania  Europe            134036.0             2107              32.0   
3      Algeria  Africa            641176.0              760              56.0   

   volumes_total  users_total  
1         577300      48133.0  
2       12398795     581298.0  
3        4363718       1430.0  


In [48]:
# Creating a series
# Subsetting using loc and iloc
libraries.loc[1, 'country']

'Afghanistan'

In [52]:
# Creating a series
# Subsetting using loc and iloc
libraries.iloc[0, 0]

'Afghanistan'

In [53]:
# Creating a series
# Subsetting using loc and iloc
libraries.loc[1:5, 'country']

1       Afghanistan
2           Albania
3           Algeria
4    American Samoa
5           Andorra
Name: country, dtype: object

In [54]:
# Creating a series
# Subsetting using loc and iloc
libraries.iloc[0:4, 0:2]

Unnamed: 0,country,region
1,Afghanistan,Asia
2,Albania,Europe
3,Algeria,Africa
4,American Samoa,Oceania


In [59]:
# Creating a pd.DataFrame
# Subsetting using loc and iloc
libraries.loc[[1, 2, 3, 4, 5], :] # choosing all columns with ':'; can be replaced by list

Unnamed: 0,country,region,invests_us_dollars,libraries_total,librarians_total,volumes_total,users_total
1,Afghanistan,Asia,,150,,577300,48133.0
2,Albania,Europe,134036.0,2107,32.0,12398795,581298.0
3,Algeria,Africa,641176.0,760,56.0,4363718,1430.0
4,American Samoa,Oceania,,32,,170009,
5,Andorra,Europe,5777651.0,39,6.0,222869,16596.0


In [70]:
# Creating a pd.DataFrame
# Subsetting using loc and iloc
indexes = list(range(0, 10)) # considera 10 casos porque o caso '0' está numerado com '1'
libraries.iloc[indexes, [0,1]]

Unnamed: 0,country,region
1,Afghanistan,Asia
2,Albania,Europe
3,Algeria,Africa
4,American Samoa,Oceania
5,Andorra,Europe
6,Angola,Africa
7,Anguilla,Latin America
8,Antigua and Barbuda,Latin America
9,Argentina,Latin America
10,Armenia,Asia
