# Pandas DataFrame

https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.html#pandas.DataFrame

In [3]:
import pandas as pd
import numpy as np

In [4]:
data = {'country':['Italy', 'Netherlands', 'France', 'Germany'],
        'short code': ['I', 'NL', 'F', 'G'],
        'population': [59320118, 17084459, 65233271, 82521653]}

frame = pd.DataFrame(data, columns=['country', 'short code', 'population', 'capital'])
frame

Unnamed: 0,country,short code,population,capital
0,Italy,I,59320118,
1,Netherlands,NL,17084459,
2,France,F,65233271,
3,Germany,G,82521653,


In [82]:
frame.set_index('short code')

Unnamed: 0_level_0,country,population,capital
short code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
I,Italy,59320118,
NL,Netherlands,17084459,
F,France,65233271,
G,Germany,82521653,


In [5]:
list(frame.columns)

['country', 'short code', 'population', 'capital']

### Column as a Series

In [6]:
s = frame['short code']
s

0     I
1    NL
2     F
3     G
Name: short code, dtype: object

In [7]:
type(s)

pandas.core.series.Series

In [8]:
frame.country

0          Italy
1    Netherlands
2         France
3        Germany
Name: country, dtype: object

In [11]:
frame['capital'] = '-'
frame

Unnamed: 0,country,short code,population,capital
0,Italy,I,59320118,-
1,Netherlands,NL,17084459,-
2,France,F,65233271,-
3,Germany,G,82521653,-


In [13]:
frame['capital'] = pd.Series({1:'Amsterdam', 0:'Rome', 3:'Berlin'})
frame

Unnamed: 0,country,short code,population,capital
0,Italy,I,59320118,Rome
1,Netherlands,NL,17084459,Amsterdam
2,France,F,65233271,
3,Germany,G,82521653,Berlin


In [75]:
frame['xxx'] = '???'
# del frame['xxx']
frame.drop('xxx', axis=1, inplace=True)
frame

Unnamed: 0,country,population,capital
F,France,65233271,
G,Germany,82521653,


In [84]:
d = {'country':{'I':'Italy', 'NL':'Netherlands', 'G':'Germany', 'F':'France'},
     'population':{'I':59320118, 'NL':17084459, 'F':65233271, 'G':82521653},
     'capital':{'I':'Rome', 'NL':'Amsterdam'}}
frame = pd.DataFrame(d)
frame

Unnamed: 0,country,population,capital
F,France,65233271,
G,Germany,82521653,
I,Italy,59320118,Rome
NL,Netherlands,17084459,Amsterdam


In [83]:
frame.loc[:, ['country','population']]
frame[['country','population']]

Unnamed: 0,country,population
0,Italy,59320118
1,Netherlands,17084459
2,France,65233271
3,Germany,82521653


In [74]:
# frame.loc['NL']
# frame.iloc[2:4][['country','population']].describe().loc[['mean','std']]
# frame[frame.index=='NL']
np.array(frame['population'])

array([65233271, 82521653, 59320118, 17084459])

In [29]:
print(frame.columns)
print(frame.index)

Index(['country', 'population', 'capital'], dtype='object')
Index(['F', 'G', 'I', 'NL'], dtype='object')


In [85]:
f = frame.reindex(['F', 'G', 'I', 'NL', 'ES'], columns = ['country','capital'])
f

Unnamed: 0,country,capital
F,France,
G,Germany,
I,Italy,Rome
NL,Netherlands,Amsterdam
ES,,


In [33]:
f.drop('ES')

Unnamed: 0,country,capital
F,France,
G,Germany,
I,Italy,Rome
NL,Netherlands,Amsterdam


In [89]:
f.drop('capital', axis='columns')
# or f.drop('capital', axis=1)

Unnamed: 0,country
F,France
G,Germany
I,Italy
NL,Netherlands
ES,


In [20]:
f['capital']

F           NaN
G           NaN
I          Rome
NL    Amsterdam
ES          NaN
Name: capital, dtype: object

In [21]:
f[:2]

Unnamed: 0,country,capital
F,France,
G,Germany,


In [23]:
f[pd.notnull(f['capital'])]

Unnamed: 0,country,capital
I,Italy,Rome
NL,Netherlands,Amsterdam


In [40]:
frame

Unnamed: 0,capital,country,population
F,,France,65233271
G,,Germany,82521653
I,Rome,Italy,59320118
NL,Amsterdam,Netherlands,17084459


In [45]:
frame.loc['I']

capital           Rome
country          Italy
population    59320118
Name: I, dtype: object

In [49]:
frame.loc['I',['capital','country']]

capital     Rome
country    Italy
Name: I, dtype: object