# Pandas - Series

## Hands on!

In [9]:
import pandas as pd
import numpy as np 

## Pandas Series

I'll start analyzing __"The Group of Seven"__. Which is a political organization formed by Canada, France, Germany, Italy, Japan, the UK and the US. I'll start by analyzing population and for that, I'll use a __pandas.Series__ object.

In [10]:
# in millions
g7_pop = pd.Series([35.467,63.951,80.940,60.665,127.061,64.511,318.523])

In [11]:
g7_pop

0     35.467
1     63.951
2     80.940
3     60.665
4    127.061
5     64.511
6    318.523
dtype: float64

In [12]:
g7_pop.name = "G7 Population in millions"

In [13]:
g7_pop

0     35.467
1     63.951
2     80.940
3     60.665
4    127.061
5     64.511
6    318.523
Name: G7 Population in millions, dtype: float64

In [14]:
g7_pop.dtype

dtype('float64')

In [15]:
g7_pop.values

array([ 35.467,  63.951,  80.94 ,  60.665, 127.061,  64.511, 318.523])

In [16]:
type(g7_pop.values)

numpy.ndarray

In [17]:
g7_pop

0     35.467
1     63.951
2     80.940
3     60.665
4    127.061
5     64.511
6    318.523
Name: G7 Population in millions, dtype: float64

### It is more like a dictionary than a list!

In [18]:
g7_pop[0]

35.467

In [19]:
g7_pop[1]

63.951

In [20]:
g7_pop.index

RangeIndex(start=0, stop=7, step=1)

In [21]:
g7_pop.index = [
    'Canada',
    'France',
    'Germany',
    'Italy',
    'Japan',
    'United Kingdom',
    'United States',
]

In [22]:
g7_pop

Canada             35.467
France             63.951
Germany            80.940
Italy              60.665
Japan             127.061
United Kingdom     64.511
United States     318.523
Name: G7 Population in millions, dtype: float64

In [27]:
pd.Series({
    'Canada': 35.467,
    'France': 63.951,
    'Germany': 80.94,
    'Italy': 60.665,
    'Japan': 127.061,
    'United Kingdom': 64.511,
    'United States': 318.523
},name="G7 Population in millions")

Canada             35.467
France             63.951
Germany            80.940
Italy              60.665
Japan             127.061
United Kingdom     64.511
United States     318.523
Name: G7 Population in millions, dtype: float64

In [43]:
pd.Series([35.467,63.951, 80.94, 60.665, 127.061, 64.511, 318.523], index=['Canada','France','Germany','Italy','Japan','United Kingdom', 'United States'], name="G7 Population in millions")

Canada             35.467
France             63.951
Germany            80.940
Italy              60.665
Japan             127.061
United Kingdom     64.511
United States     318.523
Name: G7 Population in millions, dtype: float64

In [44]:
pd.Series(g7_pop, index=['France','Germany', 'Italy', 'Spain'])

France     63.951
Germany    80.940
Italy      60.665
Spain         NaN
Name: G7 Population in millions, dtype: float64

In [45]:
g7_pop['Canada']

35.467

In [46]:
g7_pop['France']

63.951

In [47]:
g7_pop.iloc[0]

35.467

In [48]:
g7_pop.iloc[-1]

318.523

Selecting multiple at once

In [49]:
g7_pop[['Italy','France']]

Italy     60.665
France    63.951
Name: G7 Population in millions, dtype: float64

(The result is another series)

In [50]:
g7_pop.iloc[[0,1]]

Canada    35.467
France    63.951
Name: G7 Population in millions, dtype: float64

slicing also works but __in pandas, the upper limit is also included__

In [51]:
g7_pop['Canada':'Italy']

Canada     35.467
France     63.951
Germany    80.940
Italy      60.665
Name: G7 Population in millions, dtype: float64

## Conditional selection (boolean arrays)

In [52]:
g7_pop

Canada             35.467
France             63.951
Germany            80.940
Italy              60.665
Japan             127.061
United Kingdom     64.511
United States     318.523
Name: G7 Population in millions, dtype: float64

In [53]:
g7_pop > 70

Canada            False
France            False
Germany            True
Italy             False
Japan              True
United Kingdom    False
United States      True
Name: G7 Population in millions, dtype: bool

In [54]:
g7_pop[g7_pop > 70]

Germany           80.940
Japan            127.061
United States    318.523
Name: G7 Population in millions, dtype: float64

In [55]:
g7_pop.mean()

107.30257142857144

In [56]:
g7_pop[g7_pop > g7_pop.mean()]

Japan            127.061
United States    318.523
Name: G7 Population in millions, dtype: float64

In [57]:
g7_pop.std()

97.24996987121581

In [58]:
g7_pop[(g7_pop > g7_pop.mean() - g7_pop.std() /2) | (g7_pop > g7_pop.mean() + g7_pop.std() /2)]

France             63.951
Germany            80.940
Italy              60.665
Japan             127.061
United Kingdom     64.511
United States     318.523
Name: G7 Population in millions, dtype: float64