<img src="https://pandas.pydata.org/_static/pandas_logo.png" width="400" align="left"/>

In [110]:
from IPython.display import IFrame

In [111]:
IFrame(src='https://www.youtube.com/embed/26ZioEwRw00', width=640, height=400)

In [112]:
import pandas as pd

# Python Dictionary

In [113]:
emissions2016_dict = { 'Estonia' : 15.0, 
              'Luxembourg' : 19.8, 
              'Netherlands': 12.2,
              'Sweden': 5.6 } # tonnes per capita of greenhouse gases

https://ec.europa.eu/eurostat/web/climate-change/data/database

In [114]:
emissions2016_dict.keys()

dict_keys(['Estonia', 'Luxembourg', 'Netherlands', 'Sweden'])

In [115]:
emissions2016_dict.values()

dict_values([15.0, 19.8, 12.2, 5.6])

# Pandas Series

In [116]:
emissions2016 = pd.Series(data=[15.0,19.8,12.2,5.6],
                         index=['Estonia','Luxembourg','Netherlands','Sweden'],
                         name=2016)

In [117]:
emissions2016

Estonia        15.0
Luxembourg     19.8
Netherlands    12.2
Sweden          5.6
Name: 2016, dtype: float64

In [118]:
emissions2016 = pd.Series(data=emissions2016_dict,name=2016)

In [119]:
emissions2016

Estonia        15.0
Luxembourg     19.8
Netherlands    12.2
Sweden          5.6
Name: 2016, dtype: float64

In [120]:
emissions2016.values

array([15. , 19.8, 12.2,  5.6])

In [121]:
type(emissions2016.values)

numpy.ndarray

In [122]:
emissions2016 * 1000

Estonia        15000.0
Luxembourg     19800.0
Netherlands    12200.0
Sweden          5600.0
Name: 2016, dtype: float64

In [123]:
pd.Series(data=[15.0,19.8,12.2,5.6],name=2016)

0    15.0
1    19.8
2    12.2
3     5.6
Name: 2016, dtype: float64

# Querying a Series

In [124]:
emissions2016.loc['Sweden']

5.6

In [125]:
emissions2016.iloc[0] #index location

15.0

# DataFrame

In [126]:
emissions2014_dict = { 'Estonia' : 16.1, 
              'Luxembourg' : 21.5, 
              'Netherlands': 11.7,
              'Sweden': 5.8, 
              'Italy': 7.1 }

In [127]:
emissions2014 = pd.Series(data=emissions2014_dict,name=2014)

In [128]:
df = pd.DataFrame(data=[emissions2014,emissions2016])
df

Unnamed: 0,Estonia,Luxembourg,Netherlands,Sweden,Italy
2014,16.1,21.5,11.7,5.8,7.1
2016,15.0,19.8,12.2,5.6,


In [129]:
pd.DataFrame(data=[emissions2014_dict,emissions2016_dict],index=[2014,2016])

Unnamed: 0,Estonia,Luxembourg,Netherlands,Sweden,Italy
2014,16.1,21.5,11.7,5.8,7.1
2016,15.0,19.8,12.2,5.6,


In [130]:
df.T

Unnamed: 0,2014,2016
Estonia,16.1,15.0
Luxembourg,21.5,19.8
Netherlands,11.7,12.2
Sweden,5.8,5.6
Italy,7.1,


In [131]:
df = pd.DataFrame(data={2014:emissions2014_dict,2016:emissions2016_dict})
df

Unnamed: 0,2014,2016
Estonia,16.1,15.0
Luxembourg,21.5,19.8
Netherlands,11.7,12.2
Sweden,5.8,5.6
Italy,7.1,


# Querying a DataFrame

In [132]:
df.loc['Italy']

2014    7.1
2016    NaN
Name: Italy, dtype: float64

In [133]:
df.iloc[1]

2014    21.5
2016    19.8
Name: Luxembourg, dtype: float64

In [134]:
df[2016]

Estonia        15.0
Luxembourg     19.8
Netherlands    12.2
Sweden          5.6
Italy           NaN
Name: 2016, dtype: float64

In [137]:
df.loc[:,2016]

Estonia        15.0
Luxembourg     19.8
Netherlands    12.2
Sweden          5.6
Italy           NaN
Name: 2016, dtype: float64

In [140]:
df.loc['Estonia':'Luxembourg',2016]

Estonia       15.0
Luxembourg    19.8
Name: 2016, dtype: float64

In [141]:
df.iloc[:,0]

Estonia        16.1
Luxembourg     21.5
Netherlands    11.7
Sweden          5.8
Italy           7.1
Name: 2014, dtype: float64

In [142]:
df.iloc[0:3,1]

Estonia        15.0
Luxembourg     19.8
Netherlands    12.2
Name: 2016, dtype: float64

# Adding Rows and Columns

In [143]:
df.loc['Italy',2016] = 7.2

In [144]:
df

Unnamed: 0,2014,2016
Estonia,16.1,15.0
Luxembourg,21.5,19.8
Netherlands,11.7,12.2
Sweden,5.8,5.6
Italy,7.1,7.2


In [145]:
df.loc['Germany',:] = [11.4, 11.4]

In [146]:
df

Unnamed: 0,2014,2016
Estonia,16.1,15.0
Luxembourg,21.5,19.8
Netherlands,11.7,12.2
Sweden,5.8,5.6
Italy,7.1,7.2
Germany,11.4,11.4


In [147]:
df.loc[:,2006] = [13.8, 10.0, 29.7, 13.5, 7.6, 12.4]
df

Unnamed: 0,2014,2016,2006
Estonia,16.1,15.0,13.8
Luxembourg,21.5,19.8,10.0
Netherlands,11.7,12.2,29.7
Sweden,5.8,5.6,13.5
Italy,7.1,7.2,7.6
Germany,11.4,11.4,12.4


In [148]:
df.loc[:,2020] = None
df

Unnamed: 0,2014,2016,2006,2020
Estonia,16.1,15.0,13.8,
Luxembourg,21.5,19.8,10.0,
Netherlands,11.7,12.2,29.7,
Sweden,5.8,5.6,13.5,
Italy,7.1,7.2,7.6,
Germany,11.4,11.4,12.4,


# Deleting Rows and Columns

In [149]:
df.drop(index='Germany')

Unnamed: 0,2014,2016,2006,2020
Estonia,16.1,15.0,13.8,
Luxembourg,21.5,19.8,10.0,
Netherlands,11.7,12.2,29.7,
Sweden,5.8,5.6,13.5,
Italy,7.1,7.2,7.6,


In [150]:
df.drop(columns=[2014,2016])

Unnamed: 0,2006,2020
Estonia,13.8,
Luxembourg,10.0,
Netherlands,29.7,
Sweden,13.5,
Italy,7.6,
Germany,12.4,


In [151]:
df

Unnamed: 0,2014,2016,2006,2020
Estonia,16.1,15.0,13.8,
Luxembourg,21.5,19.8,10.0,
Netherlands,11.7,12.2,29.7,
Sweden,5.8,5.6,13.5,
Italy,7.1,7.2,7.6,
Germany,11.4,11.4,12.4,


In [152]:
df.drop(columns=[2014,2016],index='Germany',inplace=True)
df

Unnamed: 0,2006,2020
Estonia,13.8,
Luxembourg,10.0,
Netherlands,29.7,
Sweden,13.5,
Italy,7.6,
