In [1]:
import pandas as pd
import numpy as np

In [2]:
#Pandas is an important library that we use in our daily data analysis with Python.
#Panda has 2data structures that it uses; the 'Series' and 'Data Frame' (looks more like an excel table)

#'The Group of Seven'. is a political formed by Canada, France, Germany, Italy, Japan, United Kingdom and United States

In [3]:
# Storing in the series the Population in millions
g7_pop = pd.Series([35.467, 63.951, 80.940, 60.665, 127.061, 64.511, 318.523])

In [4]:
g7_pop

0     35.467
1     63.951
2     80.940
3     60.665
4    127.061
5     64.511
6    318.523
dtype: float64

In [5]:
#Series can have a Name to better docunment the purpose

In [6]:
g7_pop.name = 'G7 Population in millions'

In [7]:
g7_pop

0     35.467
1     63.951
2     80.940
3     60.665
4    127.061
5     64.511
6    318.523
Name: G7 Population in millions, dtype: float64

In [8]:
#Series are similar to numpy arrays:

In [9]:
g7_pop.dtype

dtype('float64')

In [10]:
g7_pop.values

array([ 35.467,  63.951,  80.94 ,  60.665, 127.061,  64.511, 318.523])

In [11]:
#Getting an array that is backing the series which is a numpay array

In [12]:
type(g7_pop.values)

numpy.ndarray

In [13]:
#Series has an index similiar to the automatic index assigned to python lists

In [14]:
g7_pop

0     35.467
1     63.951
2     80.940
3     60.665
4    127.061
5     64.511
6    318.523
Name: G7 Population in millions, dtype: float64

In [15]:
g7_pop[0]

35.467

In [16]:
g7_pop[1]

63.951

In [17]:
g7_pop.index

RangeIndex(start=0, stop=7, step=1)

In [18]:
l = ['a', 'b', 'c']

In [19]:
#In contrast to lists, we can explicitly define the index in Pandas

In [20]:
g7_pop.index = ['Canada', 'France', 'Germany', 'Italy', 'Japan', 'United Kingdom', 'United States']

In [21]:
g7_pop

Canada             35.467
France             63.951
Germany            80.940
Italy              60.665
Japan             127.061
United Kingdom     64.511
United States     318.523
Name: G7 Population in millions, dtype: float64

In [22]:
#Series look like 'ordered dictionaries'. we can creat Series out of dictionaries

In [23]:
pd.Series({'Canada': 35.467, 'France': 63.951, 'Germany': 80.94, 'Italy': 60.665, 'Japan': 127.061, 'United Kingdom': 64.511, 'United States': 318.523}, name = 'G7 Population in millions')

Canada             35.467
France             63.951
Germany            80.940
Italy              60.665
Japan             127.061
United Kingdom     64.511
United States     318.523
Name: G7 Population in millions, dtype: float64

In [24]:
#Creating Series out of other series, specifying indexes

In [25]:
pd.Series(g7_pop, index = ['France', 'Germany', 'Italy', 'Spain'])

France     63.951
Germany    80.940
Italy      60.665
Spain         NaN
Name: G7 Population in millions, dtype: float64

### Indexing and Conditional Selections

In [26]:
g7_pop['Canada']

35.467

In [27]:
g7_pop['Japan']

127.061

In [28]:
#Numeric positions can also be used with the iloc attribute

In [29]:
g7_pop.iloc[0]

35.467

In [30]:
g7_pop.iloc[-1]

318.523

In [31]:
#Selecting Multiple elements at once

In [32]:
g7_pop[['Italy', 'France']]

Italy     60.665
France    63.951
Name: G7 Population in millions, dtype: float64

In [33]:
#Result in another Series

In [34]:
g7_pop.iloc[[0,1]]

Canada    35.467
France    63.951
Name: G7 Population in millions, dtype: float64

In [35]:
#Slicing also works but in Pandas, the upper limit is also included

In [36]:
g7_pop['Canada': 'Italy']

Canada     35.467
France     63.951
Germany    80.940
Italy      60.665
Name: G7 Population in millions, dtype: float64

In [37]:
l

['a', 'b', 'c']

In [38]:
#up to 2 but not including 2; in Numpy but upper limit is included in Pandas

l[:2]

['a', 'b']

### Conditional Selection (Boolean Arrays)

In [39]:
g7_pop

Canada             35.467
France             63.951
Germany            80.940
Italy              60.665
Japan             127.061
United Kingdom     64.511
United States     318.523
Name: G7 Population in millions, dtype: float64

In [40]:
g7_pop>70

Canada            False
France            False
Germany            True
Italy             False
Japan              True
United Kingdom    False
United States      True
Name: G7 Population in millions, dtype: bool

In [41]:
g7_pop[g7_pop>70]

Germany           80.940
Japan            127.061
United States    318.523
Name: G7 Population in millions, dtype: float64

In [43]:
g7_pop.mean()

107.30257142857144

In [44]:
g7_pop[g7_pop>g7_pop.mean()]

Japan            127.061
United States    318.523
Name: G7 Population in millions, dtype: float64

In [45]:
g7_pop.std()

97.24996987121581

In [48]:
#~ not
#| or
#& and

In [49]:
g7_pop [(g7_pop > g7_pop.mean() - g7_pop.std() / 2) | (g7_pop > g7_pop.mean() + g7_pop.std() / 2)] 

France             63.951
Germany            80.940
Italy              60.665
Japan             127.061
United Kingdom     64.511
United States     318.523
Name: G7 Population in millions, dtype: float64

### Operations and Methods

In [50]:
#Series also support vectorized operations and aggregation functions as NumPy

In [51]:
g7_pop

Canada             35.467
France             63.951
Germany            80.940
Italy              60.665
Japan             127.061
United Kingdom     64.511
United States     318.523
Name: G7 Population in millions, dtype: float64

In [52]:
g7_pop * 1_000_000

Canada             35467000.0
France             63951000.0
Germany            80940000.0
Italy              60665000.0
Japan             127061000.0
United Kingdom     64511000.0
United States     318523000.0
Name: G7 Population in millions, dtype: float64

In [53]:
g7_pop.mean()

107.30257142857144

In [54]:
np.log(g7_pop)

Canada            3.568603
France            4.158117
Germany           4.393708
Italy             4.105367
Japan             4.844667
United Kingdom    4.166836
United States     5.763695
Name: G7 Population in millions, dtype: float64

In [55]:
g7_pop['France': 'Italy'].mean()

68.51866666666666

### Boolean Arrays

In [56]:
#works in the same way as nympy

In [57]:
g7_pop

Canada             35.467
France             63.951
Germany            80.940
Italy              60.665
Japan             127.061
United Kingdom     64.511
United States     318.523
Name: G7 Population in millions, dtype: float64

In [58]:
g7_pop > 80

Canada            False
France            False
Germany            True
Italy             False
Japan              True
United Kingdom    False
United States      True
Name: G7 Population in millions, dtype: bool

In [59]:
g7_pop[g7_pop > 80]

Germany           80.940
Japan            127.061
United States    318.523
Name: G7 Population in millions, dtype: float64

In [60]:
g7_pop[(g7_pop >80) | (g7_pop < 40)]

Canada            35.467
Germany           80.940
Japan            127.061
United States    318.523
Name: G7 Population in millions, dtype: float64

In [62]:
g7_pop[(g7_pop > 80) & (g7_pop < 200)]

Germany     80.940
Japan      127.061
Name: G7 Population in millions, dtype: float64

### Modifying Series

In [63]:
g7_pop["Canada"] = 40.5

In [64]:
g7_pop

Canada             40.500
France             63.951
Germany            80.940
Italy              60.665
Japan             127.061
United Kingdom     64.511
United States     318.523
Name: G7 Population in millions, dtype: float64

In [65]:
g7_pop.iloc[-1] = 500

In [66]:
g7_pop

Canada             40.500
France             63.951
Germany            80.940
Italy              60.665
Japan             127.061
United Kingdom     64.511
United States     500.000
Name: G7 Population in millions, dtype: float64

In [68]:
g7_pop[g7_pop < 70] = 99.99

In [69]:
g7_pop

Canada             99.990
France             99.990
Germany            80.940
Italy              99.990
Japan             127.061
United Kingdom     99.990
United States     500.000
Name: G7 Population in millions, dtype: float64