In [None]:
# Import the numpy package under the name np
import numpy as np

# Import the pandas package under the name pd
import pandas as pd

# Print the pandas version and the configuration
print(pd.__version__)

2.2.2


# Pandas Series
We'll start analyzing "The Group of Seven". Which is a political formed by Canada, France, Germany, Italy, Japan, the United Kingdom and the United States. We'll start by analyzing population, and for that, we'll use a **pandas Series object.**

In [None]:
g7_pop = pd.Series([35.467, 63.951, 80.940, 60.665, 127.061, 64.511, 318.523])
g7_pop

Unnamed: 0,0
0,35.467
1,63.951
2,80.94
3,60.665
4,127.061
5,64.511
6,318.523


Someone might not know we're representing population in millions of inhabitants. Series can have a name, to better document the purpose of the Series:

In [None]:
g7_pop.name = 'G7 population in millions'                    # Prints the name on top of the table
g7_pop

Unnamed: 0,G7 population in millions
0,35.467
1,63.951
2,80.94
3,60.665
4,127.061
5,64.511
6,318.523


Series are pretty similar to numpy arrays:

In [None]:
g7_pop.dtype                            # Prints dtype('float64')

dtype('float64')

In [None]:
g7_pop.values                             # Prints the values array([ 35.467,  63.951,  80.94 ,  60.665, 127.061,  64.511, 318.523])
type(g7_pop.values)

numpy.ndarray

In [None]:
g7_pop[0].item()                        # Prints the first element i.e 35.467
g7_pop[1].item()                        # Prints the first element i.e 63.951
g7_pop.index                            # Prints RangeIndex(start=0, stop=7, step=1)
g7_pop[1:-1]

Unnamed: 0,G7 population in millions
1,63.951
2,80.94
3,60.665
4,127.061
5,64.511


But, in contrast to lists, we can explicitly define the index:

In [None]:
l = ['a','b','c','d']
l[1:3]

['b', 'c']

In [None]:
g7_pop.index = [
    'Canada',
    'France',
    'Germany',
    'Italy',
    'Japan',
    'United Kingdom',
    'United States'
]


In [None]:
g7_pop

Unnamed: 0,G7 population in millions
Canada,35.467
France,63.951
Germany,80.94
Italy,60.665
Japan,127.061
United Kingdom,64.511
United States,318.523


In [None]:
# We can say that Series look like "ordered dictionaries". We can actually create Series out of dictionaries:
pd.Series({
            'Canada' : 35.467,
            'France ' : 63.951,
            'Germany' : 80.940,
            'Italy '  : 60.665,
            'Japan'  : 127.061,
            'United Kingdom' : 64.511,
            'United States'   : 318.523
}, name = 'G7 population in millions')

Unnamed: 0,G7 population in millions
Canada,35.467
France,63.951
Germany,80.94
Italy,60.665
Japan,127.061
United Kingdom,64.511
United States,318.523


In [None]:
# You can also create Series out of other series, specifying indexes:
pd.Series(
    [35.467, 63.951, 80.94, 60.665, 127.061, 64.511, 318.523],
    index=['Canada', 'France', 'Germany', 'Italy', 'Japan', 'United Kingdom',
       'United States'],
    name='G7 Population in millions')

Unnamed: 0,G7 Population in millions
Canada,35.467
France,63.951
Germany,80.94
Italy,60.665
Japan,127.061
United Kingdom,64.511
United States,318.523


In [None]:
pd.Series(g7_pop, index = ['France','spain','america'])                # It returns the indexes which are available else returns NaN

Unnamed: 0,G7 population in millions
France,63.951
spain,
america,


---
# Indexing
Indexing works similarly to lists and dictionaries, you use the index of the element you're looking for:

In [None]:
g7_pop

Unnamed: 0,G7 population in millions
Canada,35.467
France,63.951
Germany,80.94
Italy,60.665
Japan,127.061
United Kingdom,64.511
United States,318.523


In [None]:
g7_pop['Canada'].item()

35.467

In [None]:
g7_pop['Italy'].item()

60.665

Numeric positions can also be used, with the iloc attribute:

In [None]:
g7_pop.iloc[0].item()                 # Prints 35.467
g7_pop.iloc[1].item()                 # Prints 63.951
g7_pop.iloc[-1].item()                # Prints the last element i.e 318.523

318.523

Selecting multiple elements at once:

In [None]:
g7_pop[['Italy','Japan']]

Unnamed: 0,G7 population in millions
Italy,60.665
Japan,127.061


In [None]:
g7_pop.iloc[[0,3]]

Unnamed: 0,G7 population in millions
Canada,35.467
Italy,60.665


In [None]:
g7_pop['Canada':'Japan']

Unnamed: 0,G7 population in millions
Canada,35.467
France,63.951
Germany,80.94
Italy,60.665
Japan,127.061


---
# Conditional selection (boolean arrays)
The same boolean array techniques applied to numpy arrays can be used for Pandas Series:

In [None]:
g7_pop

Unnamed: 0,G7 population in millions
Canada,35.467
France,63.951
Germany,80.94
Italy,60.665
Japan,127.061
United Kingdom,64.511
United States,318.523


In [None]:
g7_pop > 70                               # It will return boolean values i.e True | False

Unnamed: 0,G7 population in millions
Canada,False
France,False
Germany,True
Italy,False
Japan,True
United Kingdom,False
United States,True


In [None]:
g7_pop[g7_pop > 70]                                 # Now it will return actual values instead of boolean values

Unnamed: 0,G7 population in millions
Germany,80.94
Japan,127.061
United States,318.523


In [None]:
g7_pop.mean().item()

107.30257142857144

In [None]:
g7_pop[g7_pop > g7_pop.mean()]

Unnamed: 0,G7 population in millions
Japan,127.061
United States,318.523


In [None]:
g7_pop.std()

97.24996987121581

---
# Operations and methods
Series also support vectorized operations and aggregation functions as Numpy:

In [None]:
g7_pop

Unnamed: 0,G7 population in millions
Canada,35.467
France,63.951
Germany,80.94
Italy,60.665
Japan,127.061
United Kingdom,64.511
United States,318.523


In [None]:
# Suppose you wanna show the numbers in millions i.e Million with the zeros for better understanding

g7_pop * 1_000_000


Unnamed: 0,G7 population in millions
Canada,35467000.0
France,63951000.0
Germany,80940000.0
Italy,60665000.0
Japan,127061000.0
United Kingdom,64511000.0
United States,318523000.0


In [None]:
g7_pop.mean()                        # Prints 107.30257142857144

np.log(g7_pop)

Unnamed: 0,G7 population in millions
Canada,3.568603
France,4.158117
Germany,4.393708
Italy,4.105367
Japan,4.844667
United Kingdom,4.166836
United States,5.763695


In [None]:
g7_pop['France' : 'Italy'].mean().item()

68.51866666666666

# Boolean arrays
(Work in the same way as numpy)

In [None]:
g7_pop

Unnamed: 0,G7 population in millions
Canada,35.467
France,63.951
Germany,80.94
Italy,60.665
Japan,127.061
United Kingdom,64.511
United States,318.523


In [None]:
g7_pop[g7_pop < 70]

Unnamed: 0,G7 population in millions
Canada,35.467
France,63.951
Italy,60.665
United Kingdom,64.511


In [None]:
g7_pop[(g7_pop > 80) | (g7_pop < 40)]

Unnamed: 0,G7 population in millions
Canada,35.467
Germany,80.94
Japan,127.061
United States,318.523


In [None]:
g7_pop[(g7_pop > 80) & (g7_pop < 200)]

Unnamed: 0,G7 population in millions
Germany,80.94
Japan,127.061


---
# Modifying series

In [None]:
g7_pop['Japan'] = 128.5

In [None]:
g7_pop

Unnamed: 0,G7 population in millions
Canada,35.467
France,63.951
Germany,80.94
Italy,60.665
Japan,128.5
United Kingdom,64.511
United States,318.523


In [None]:
g7_pop.iloc[-1] = 319.75

In [None]:
g7_pop

Unnamed: 0,G7 Population in millions
Canada,35.467
France,63.951
Germany,80.94
Italy,60.665
Japan,127.061
United Kingdom,64.511
United States,319.75


In [None]:
g7_pop[g7_pop < 70] = 90.99

In [None]:
g7_pop

Unnamed: 0,G7 Population in millions
Canada,90.99
France,90.99
Germany,80.94
Italy,90.99
Japan,127.061
United Kingdom,90.99
United States,319.75


In [None]:
g7_pop['Germany':'Japan'] = 120.760

In [None]:
g7_pop

Unnamed: 0,G7 Population in millions
Canada,90.99
France,90.99
Germany,120.76
Italy,120.76
Japan,120.76
United Kingdom,90.99
United States,319.75


---
