In [1]:
# import packages
import pandas as pd
import numpy as np

In [2]:
# https://www.datacamp.com/community/tutorials/python-dictionaries
# To see why dictionaries are useful,
# have a look at the two lists: 
# countries contains the names of some European countries 
# capitals lists the corresponding names of their capital
countries = ['spain', 'france', 'germany', 'norway']
capitals = ['madrid', 'paris', 'berlin', 'oslo']

In [3]:
# to get the capital of germany:
# Get index of 'germany': ind_ger
ind_ger = countries.index('germany')
ind_ger

2

In [4]:
# Use ind_ger to get the capital of Germany
capitals[ind_ger]

'berlin'

In [5]:
# this solution is not convenient & not intuitive

In [6]:
# by using dictionaries: {key:value} pairs
# # Define a dictionary of europe
europe ={'spain':'madrid',
         'france':'paris',
         'germany':'berlin',
         'norway':'oslo'}
europe

{'spain': 'madrid', 'france': 'paris', 'germany': 'berlin', 'norway': 'oslo'}

In [7]:
# the capital of germany
# access the dictionary by passing the key of germany in a square brackets
europe['germany']

'berlin'

In [8]:
# Definition of dictionary
europe = {'spain':'madrid', 'france':'paris', 'germany':'bonn', 'norway':'oslo', 'australia':'vienna'}
europe

{'spain': 'madrid',
 'france': 'paris',
 'germany': 'bonn',
 'norway': 'oslo',
 'australia': 'vienna'}

In [9]:
# Add italy to europe
europe['italy'] = 'rome'
europe

{'spain': 'madrid',
 'france': 'paris',
 'germany': 'bonn',
 'norway': 'oslo',
 'australia': 'vienna',
 'italy': 'rome'}

In [10]:
# to assert that 'italy' is now a key in europe
'italy' in europe

True

In [11]:
# Add poland to europe
europe['poland'] = 'warsaw'
europe

{'spain': 'madrid',
 'france': 'paris',
 'germany': 'bonn',
 'norway': 'oslo',
 'australia': 'vienna',
 'italy': 'rome',
 'poland': 'warsaw'}

In [12]:
# The capital of Germany is not 'bonn'; it's 'berlin'
# # Update capital of germany
europe['germany'] = 'berlin'
europe

{'spain': 'madrid',
 'france': 'paris',
 'germany': 'berlin',
 'norway': 'oslo',
 'australia': 'vienna',
 'italy': 'rome',
 'poland': 'warsaw'}

In [13]:
# Australia is not in Europe, Austria is! Remove the key 'australia' from europe
# Remove australia
del(europe['australia'])
europe

{'spain': 'madrid',
 'france': 'paris',
 'germany': 'berlin',
 'norway': 'oslo',
 'italy': 'rome',
 'poland': 'warsaw'}

In [14]:
# Note: keys have to be "immutable" objects that can not be changed after they are created
# "immutable" objects like integers, floats, strings and booleans
# lists are not "immutable" objects. lists are mutable objects
# https://dev.to/nexttech/immutable-vs-mutable-data-types-in-python-543a
# https://medium.com/@meghamohan/mutable-and-immutable-side-of-python-c2145cf72747
# valid dictionaries
valid_dict ={0:"hello", True:"dear", "two":"world"}
valid_dict

{0: 'hello', True: 'dear', 'two': 'world'}

In [15]:
# not valid dictionary as the key is a list
# not_valid_dict ={["just", "to", "test"]:"value"}
# not_valid_dict

![image.png](attachment:image.png)

In [16]:
# Dictionary of dictionaries
# Nested dictionaries
europe = { 'spain': { 'capital':'madrid', 'population':46.77 },
           'france': { 'capital':'paris', 'population':66.03 },
           'germany': { 'capital':'berlin', 'population':80.62 },
           'norway': { 'capital':'oslo', 'population':5.084 } }
europe

{'spain': {'capital': 'madrid', 'population': 46.77},
 'france': {'capital': 'paris', 'population': 66.03},
 'germany': {'capital': 'berlin', 'population': 80.62},
 'norway': {'capital': 'oslo', 'population': 5.084}}

In [17]:
# the capital of France
# using chain square brackets
europe['france']['capital']

'paris'

In [18]:
# Add italy data to europe
europe['italy'] = {'capital':'rome', 'population':59.83}
europe

{'spain': {'capital': 'madrid', 'population': 46.77},
 'france': {'capital': 'paris', 'population': 66.03},
 'germany': {'capital': 'berlin', 'population': 80.62},
 'norway': {'capital': 'oslo', 'population': 5.084},
 'italy': {'capital': 'rome', 'population': 59.83}}

In [19]:
# https://www.datacamp.com/community/tutorials/pandas
# Pandas is an open source library
# High level data manipulation tool
# Built on NumPy
# Easy-to-use data structures and data analysis tools for Python
# DataFrame is one of Pandas' most important data structures
# It's basically a way to store tabular data where you can label the rows and the columns

In [20]:
# Dictionary to DataFrame
my_dict = {'country':['United States', 'Australia', 'Japan', 'India', 'Russia', 'Morocco', 'Egypt'], 
           'drives_right':[True, False, False, False, True, True, True], 
           'cars_per_cap':[809, 731, 588, 18, 200, 70, 45]}
my_dict

{'country': ['United States',
  'Australia',
  'Japan',
  'India',
  'Russia',
  'Morocco',
  'Egypt'],
 'drives_right': [True, False, False, False, True, True, True],
 'cars_per_cap': [809, 731, 588, 18, 200, 70, 45]}

In [21]:
# Build a DataFrame cars from my_dict: cars
cars_df = pd.DataFrame(my_dict)
cars_df

Unnamed: 0,country,drives_right,cars_per_cap
0,United States,True,809
1,Australia,False,731
2,Japan,False,588
3,India,False,18
4,Russia,True,200
5,Morocco,True,70
6,Egypt,True,45


In [22]:
# Specify row labels of cars
# by setting the index attribute of cars
cars_df.index = ['US', 'AUS', 'JPN', 'IN', 'RU', 'MOR', 'EG']
cars_df

Unnamed: 0,country,drives_right,cars_per_cap
US,United States,True,809
AUS,Australia,False,731
JPN,Japan,False,588
IN,India,False,18
RU,Russia,True,200
MOR,Morocco,True,70
EG,Egypt,True,45


In [23]:
# CSV to DataFrame
cars_df = pd.read_csv('cars.csv')
cars_df

Unnamed: 0.1,Unnamed: 0,cars_per_cap,country,drives_right
0,US,809,United States,True
1,AUS,731,Australia,False
2,JAP,588,Japan,False
3,IN,18,India,False
4,RU,200,Russia,True
5,MOR,70,Morocco,True
6,EG,45,Egypt,True


In [24]:
# Fix import by including index_col
cars_df = pd.read_csv('cars.csv', index_col = 0)
cars_df

Unnamed: 0,cars_per_cap,country,drives_right
US,809,United States,True
AUS,731,Australia,False
JAP,588,Japan,False
IN,18,India,False
RU,200,Russia,True
MOR,70,Morocco,True
EG,45,Egypt,True


In [25]:
# dataframe columns
cars_df.columns

Index(['cars_per_cap', 'country', 'drives_right'], dtype='object')

In [26]:
# single bracket version gives a Pandas Series
# the double bracket version gives a Pandas DataFrame

# country column as Pandas Series
cars_df['country']

US     United States
AUS        Australia
JAP            Japan
IN             India
RU            Russia
MOR          Morocco
EG             Egypt
Name: country, dtype: object

In [27]:
# country column as Pandas DataFrame
cars_df[['country']]

Unnamed: 0,country
US,United States
AUS,Australia
JAP,Japan
IN,India
RU,Russia
MOR,Morocco
EG,Egypt


In [28]:
# DataFrame with country and drives_right columns
cars_df[['country','drives_right']]

Unnamed: 0,country,drives_right
US,United States,True
AUS,Australia,False
JAP,Japan,False
IN,India,False
RU,Russia,True
MOR,Morocco,True
EG,Egypt,True


In [29]:
# by using loc and iloc you can do practically any data selection operation on DataFrames
# select observation for Japan as a Pandas series
cars_df.loc['JAP']

cars_per_cap      588
country         Japan
drives_right    False
Name: JAP, dtype: object

In [30]:
type(cars_df.loc['JAP'])

pandas.core.series.Series

In [31]:
# select observation for Japan as a Pandas DataFrame using .loc
cars_df.loc[['JAP']]

Unnamed: 0,cars_per_cap,country,drives_right
JAP,588,Japan,False


In [32]:
type(cars_df.loc[['JAP']])

pandas.core.frame.DataFrame

In [33]:
# select observation for Japan as a Pandas series using .iloc
cars_df.iloc[2]

cars_per_cap      588
country         Japan
drives_right    False
Name: JAP, dtype: object

In [34]:
# select observation for Japan as a Pandas DataFrame using .iloc
cars_df.iloc[[2]]

Unnamed: 0,cars_per_cap,country,drives_right
JAP,588,Japan,False


In [35]:
# select observations for Australia and Egypt as a Pandas DataFrame using .loc
cars_df.loc[['AUS','EG']]

Unnamed: 0,cars_per_cap,country,drives_right
AUS,731,Australia,False
EG,45,Egypt,True


In [36]:
type(cars_df.loc[['AUS','EG']])

pandas.core.frame.DataFrame

In [37]:
# select observations for Australia and Egypt as a Pandas DataFrame using .iloc
cars_df.iloc[[0,6]]

Unnamed: 0,cars_per_cap,country,drives_right
US,809,United States,True
EG,45,Egypt,True


In [38]:
cars_df

Unnamed: 0,cars_per_cap,country,drives_right
US,809,United States,True
AUS,731,Australia,False
JAP,588,Japan,False
IN,18,India,False
RU,200,Russia,True
MOR,70,Morocco,True
EG,45,Egypt,True


In [39]:
# select drives_right value of Morocco
cars_df.loc[['MOR'],['drives_right']]

Unnamed: 0,drives_right
MOR,True


In [40]:
cars_df

Unnamed: 0,cars_per_cap,country,drives_right
US,809,United States,True
AUS,731,Australia,False
JAP,588,Japan,False
IN,18,India,False
RU,200,Russia,True
MOR,70,Morocco,True
EG,45,Egypt,True


In [41]:
# print out sub-DataFrame, containing the observations for Russia and Morocco and the columns country and drives_right.
cars_df.loc[['RU','MOR'],['country','drives_right']]

Unnamed: 0,country,drives_right
RU,Russia,True
MOR,Morocco,True


In [42]:
cars_df

Unnamed: 0,cars_per_cap,country,drives_right
US,809,United States,True
AUS,731,Australia,False
JAP,588,Japan,False
IN,18,India,False
RU,200,Russia,True
MOR,70,Morocco,True
EG,45,Egypt,True


In [43]:
# Print out drives_right column as a Pandas Series using .loc
cars_df.loc[:,'drives_right']

US      True
AUS    False
JAP    False
IN     False
RU      True
MOR     True
EG      True
Name: drives_right, dtype: bool

In [44]:
# Print out drives_right column as a Pandas Series using .iloc
cars_df.iloc[:,2]

US      True
AUS    False
JAP    False
IN     False
RU      True
MOR     True
EG      True
Name: drives_right, dtype: bool

In [45]:
# Print out drives_right column as a Pandas DataFrame using .loc
cars_df.loc[:,['drives_right']]

Unnamed: 0,drives_right
US,True
AUS,False
JAP,False
IN,False
RU,True
MOR,True
EG,True


In [46]:
# Print out drives_right column as a Pandas DataFrame using .iloc
cars_df.iloc[:,[2]]

Unnamed: 0,drives_right
US,True
AUS,False
JAP,False
IN,False
RU,True
MOR,True
EG,True


In [47]:
# Print out cars_per_cap and drives_right as a Pandas DataFrame using .loc
cars_df.loc[:,['cars_per_cap','drives_right']]

Unnamed: 0,cars_per_cap,drives_right
US,809,True
AUS,731,False
JAP,588,False
IN,18,False
RU,200,True
MOR,70,True
EG,45,True


In [48]:
# Print out cars_per_cap and drives_right as a Pandas DataFrame using .iloc
cars_df.iloc[:,[0,2]]

Unnamed: 0,cars_per_cap,drives_right
US,809,True
AUS,731,False
JAP,588,False
IN,18,False
RU,200,True
MOR,70,True
EG,45,True
