In [1]:
import pandas as pd
import numpy as np

# Series

In [2]:
# In millions "The Group of Seven"
g7_pop = pd.Series([35.467, 63.951, 80.940, 60.665, 127.061, 64.511, 318.523])
print(g7_pop)

0     35.467
1     63.951
2     80.940
3     60.665
4    127.061
5     64.511
6    318.523
dtype: float64


In [4]:
# Series can have a name
g7_pop.name = 'G7 Population in millions'
print(g7_pop)

0     35.467
1     63.951
2     80.940
3     60.665
4    127.061
5     64.511
6    318.523
Name: G7 Population in millions, dtype: float64


In [6]:
print(g7_pop.dtype)
print(g7_pop.values)
print(type(g7_pop.values))

float64
[ 35.467  63.951  80.94   60.665 127.061  64.511 318.523]
<class 'numpy.ndarray'>


In [7]:
g7_pop[0]

35.467

In [8]:
g7_pop.index

RangeIndex(start=0, stop=7, step=1)

In [9]:
# we can explicitly define the index
g7_pop.index = [
    'Canada',
    'France',
    'Germany',
    'Italy',
    'Japan',
    'United Kingdom',
    'United States'
]
print(g7_pop)

Canada             35.467
France             63.951
Germany            80.940
Italy              60.665
Japan             127.061
United Kingdom     64.511
United States     318.523
Name: G7 Population in millions, dtype: float64


In [10]:
# Create Series out of dictionaries
pd.Series({
    'Canada': 35.457,
    'France': 63.951,
    'Germany': 80.94,
    'Italy': 60.665,
    'Japan': 127.061,
    'United Kingdom': 64.511,
    'United States': 318.523
}, name='G7 Population in millions')

Canada             35.457
France             63.951
Germany            80.940
Italy              60.665
Japan             127.061
United Kingdom     64.511
United States     318.523
Name: G7 Population in millions, dtype: float64

In [11]:
pd.Series(
    [35.467, 63.951, 80.940, 60.665, 127.061, 64.511, 318.523],
    index= [
        'Canada',
        'France',
        'Germany',
        'Italy',
        'Japan',
        'United Kingdom',
        'United States'
    ],
    name='G7 Population in millions'
)

Canada             35.467
France             63.951
Germany            80.940
Italy              60.665
Japan             127.061
United Kingdom     64.511
United States     318.523
Name: G7 Population in millions, dtype: float64

In [12]:
# create Series out of other series, specifying indexes
pd.Series(g7_pop, index=['France', 'Germany', 'Italy', 'Spain'])

France     63.951
Germany    80.940
Italy      60.665
Spain         NaN
Name: G7 Population in millions, dtype: float64

# Indexing

In [13]:
g7_pop['Canada']

35.467

In [14]:
# Numerical positions can be used with iloc attribute
g7_pop.iloc[0]

35.467

In [16]:
g7_pop[['Italy', 'France']]

Italy     60.665
France    63.951
Name: G7 Population in millions, dtype: float64

In [17]:
g7_pop.iloc[[0, 1]]

Canada    35.467
France    63.951
Name: G7 Population in millions, dtype: float64

In [18]:
# the upper limit is included
g7_pop['Canada': 'Italy']

Canada     35.467
France     63.951
Germany    80.940
Italy      60.665
Name: G7 Population in millions, dtype: float64

# Conditional selection (boolean arrays)

In [19]:
g7_pop > 70

Canada            False
France            False
Germany            True
Italy             False
Japan              True
United Kingdom    False
United States      True
Name: G7 Population in millions, dtype: bool

In [20]:
g7_pop[g7_pop > 70]

Germany           80.940
Japan            127.061
United States    318.523
Name: G7 Population in millions, dtype: float64

In [21]:
g7_pop.mean()

107.30257142857144

In [22]:
g7_pop[g7_pop > g7_pop.mean()]

Japan            127.061
United States    318.523
Name: G7 Population in millions, dtype: float64

~ not
| or
& and

In [23]:
g7_pop[(g7_pop > g7_pop.mean() - g7_pop.std() / 2) | (g7_pop > g7_pop.mean() + g7_pop.std() / 2)]

France             63.951
Germany            80.940
Italy              60.665
Japan             127.061
United Kingdom     64.511
United States     318.523
Name: G7 Population in millions, dtype: float64

# Operations and methods

In [24]:
g7_pop * 1_000_000

Canada             35467000.0
France             63951000.0
Germany            80940000.0
Italy              60665000.0
Japan             127061000.0
United Kingdom     64511000.0
United States     318523000.0
Name: G7 Population in millions, dtype: float64

In [25]:
g7_pop.mean()

107.30257142857144

In [27]:
np.log(g7_pop)

Canada            3.568603
France            4.158117
Germany           4.393708
Italy             4.105367
Japan             4.844667
United Kingdom    4.166836
United States     5.763695
Name: G7 Population in millions, dtype: float64

In [26]:
g7_pop['France' : 'Italy'].mean()

68.51866666666666

# Boolean arrays

In [28]:
# same as np boolean arrays
print(g7_pop > 80)
print(g7_pop[g7_pop > 80])
print(g7_pop[(g7_pop > 80) | (g7_pop < 40)])
print(g7_pop[(g7_pop > 80) & (g7_pop < 200)])

Canada            False
France            False
Germany            True
Italy             False
Japan              True
United Kingdom    False
United States      True
Name: G7 Population in millions, dtype: bool
Germany           80.940
Japan            127.061
United States    318.523
Name: G7 Population in millions, dtype: float64
Canada            35.467
Germany           80.940
Japan            127.061
United States    318.523
Name: G7 Population in millions, dtype: float64
Germany     80.940
Japan      127.061
Name: G7 Population in millions, dtype: float64


# Modifying Series

In [30]:
g7_pop['Canada'] = 40.5
g7_pop

Canada             40.500
France             63.951
Germany            80.940
Italy              60.665
Japan             127.061
United Kingdom     64.511
United States     318.523
Name: G7 Population in millions, dtype: float64

In [32]:
g7_pop.iloc[-1] = 500
g7_pop

Canada             40.500
France             63.951
Germany            80.940
Italy              60.665
Japan             127.061
United Kingdom     64.511
United States     500.000
Name: G7 Population in millions, dtype: float64

In [34]:
g7_pop[g7_pop < 70] = 99.99
g7_pop

Canada             99.990
France             99.990
Germany            80.940
Italy              99.990
Japan             127.061
United Kingdom     99.990
United States     500.000
Name: G7 Population in millions, dtype: float64