# Pandas Documentation on DataFrame

In this notebook, you will work through the Pandas documentation on DataFrames.

## Imports

In [2]:
import numpy as np
import pandas as pd

## DataFrame

In this notebook, you are going to learn how to use `pandas.DataFrame` by typing the code from the Pandas documentation into this notebook.

* Go to the Pandas [DataFrame Documentation](http://pandas.pydata.org/pandas-docs/stable/dsintro.html#dataframe).
* Type all of the code from that section of the documentation into this notebook and get it working.
* **To learn this API well, you must type the code rather than copy and pasting it**.
* Create a new cell in this section for each `In[]` prompt in the documentation.
* Ignore the cells in the **Grading** section below.
* No Markdown comments are needed.
* Skip the following sub-sections:
  - From structured or record array
  - Alternate Constructors
  - Assigning New Columns in Method Chains
  - Console display
  - DataFrame column attribute access and IPython completion

In [3]:
d = {'one': pd.Series([1., 2., 3.], index=['a', 'b', 'c']),
     'two': pd.Series([1., 2., 3., 4.], index=['a', 'b', 'c', 'd'])}

In [4]:
df = pd.DataFrame(d)

In [5]:
df

Unnamed: 0,one,two
a,1.0,1
b,2.0,2
c,3.0,3
d,,4


In [6]:
pd.DataFrame(d, index=['d', 'b', 'a'])

Unnamed: 0,one,two
d,,4
b,2.0,2
a,1.0,1


In [9]:
pd.DataFrame(d, index=['d', 'b', 'a'], columns=['two', 'three'])

Unnamed: 0,two,three
d,4,
b,2,
a,1,


In [10]:
df.index

Index(['a', 'b', 'c', 'd'], dtype='object')

In [11]:
df.columns

Index(['one', 'two'], dtype='object')

In [12]:
d = {'one': [1., 2., 3., 4.],
     'two': [4., 3., 2., 1.]}

In [13]:
pd.DataFrame(d)

Unnamed: 0,one,two
0,1,4
1,2,3
2,3,2
3,4,1


In [14]:
pd.DataFrame(d, index=['a', 'b', 'c', 'd'])

Unnamed: 0,one,two
a,1,4
b,2,3
c,3,2
d,4,1


In [15]:
data2 = [
    {'a': 1, 'b': 2},
    {'a': 5, 'b': 10, 'c': 20}
]

In [16]:
pd.DataFrame(data2)

Unnamed: 0,a,b,c
0,1,2,
1,5,10,20.0


In [18]:
pd.DataFrame(data2, index=['first', 'second'])

Unnamed: 0,a,b,c
first,1,2,
second,5,10,20.0


In [19]:
pd.DataFrame(data2, columns=['a', 'b'])

Unnamed: 0,a,b
0,1,2
1,5,10


In [20]:
pd.DataFrame({
        ('a', 'b'): {('A', 'B'): 1, ('A', 'C'): 2},
        ('a', 'a'): {('A', 'C'): 3, ('A', 'B'): 4},
        ('a', 'c'): {('A', 'B'): 5, ('A', 'C'): 6},
        ('b', 'a'): {('A', 'D'): 9, ('A', 'B'): 10}
    })

Unnamed: 0_level_0,Unnamed: 1_level_0,a,a,a,b
Unnamed: 0_level_1,Unnamed: 1_level_1,a,b,c,a
A,B,4.0,1.0,5.0,10.0
A,C,3.0,2.0,6.0,
A,D,,,,9.0


In [21]:
df['one']

a     1
b     2
c     3
d   NaN
Name: one, dtype: float64

In [22]:
df['three'] = df['one'] * df['two']

In [23]:
df['flag'] = df['one'] > 2

In [24]:
df

Unnamed: 0,one,two,three,flag
a,1.0,1,1.0,False
b,2.0,2,4.0,False
c,3.0,3,9.0,True
d,,4,,False


In [25]:
del df['two']

In [26]:
three = df.pop('three')

In [27]:
df

Unnamed: 0,one,flag
a,1.0,False
b,2.0,False
c,3.0,True
d,,False


In [28]:
df['foo'] = 'bar'

In [29]:
df

Unnamed: 0,one,flag,foo
a,1.0,False,bar
b,2.0,False,bar
c,3.0,True,bar
d,,False,bar


In [30]:
df['one_trunc'] = df['one'][:2]

In [31]:
df

Unnamed: 0,one,flag,foo,one_trunc
a,1.0,False,bar,1.0
b,2.0,False,bar,2.0
c,3.0,True,bar,
d,,False,bar,


In [32]:
df.insert(1, 'bar', df['one'])

In [33]:
df

Unnamed: 0,one,bar,flag,foo,one_trunc
a,1.0,1.0,False,bar,1.0
b,2.0,2.0,False,bar,2.0
c,3.0,3.0,True,bar,
d,,,False,bar,


In [34]:
df.loc['b']

one              2
bar              2
flag         False
foo            bar
one_trunc        2
Name: b, dtype: object

In [35]:
df.iloc[2]

one             3
bar             3
flag         True
foo           bar
one_trunc     NaN
Name: c, dtype: object

In [37]:
df = pd.DataFrame(np.random.randn(10, 4), columns=['A', 'B', 'C', 'D'])

In [38]:
df2 = pd.DataFrame(np.random.randn(7, 3), columns=['A', 'B', 'C'])

In [39]:
df + df2

Unnamed: 0,A,B,C,D
0,1.108077,2.138368,-1.674614,
1,-0.102637,0.941747,-0.681519,
2,0.195906,-0.328015,-2.852444,
3,0.770947,0.879437,-0.945949,
4,0.950728,0.60016,2.081761,
5,0.115459,-0.371794,2.143689,
6,-0.330863,2.667168,0.914195,
7,,,,
8,,,,
9,,,,


In [40]:
df - df.iloc[0]

Unnamed: 0,A,B,C,D
0,0.0,0.0,0.0,0.0
1,-1.799203,-0.455972,-0.263857,-2.036326
2,-0.325766,-0.797322,0.289465,-0.343086
3,-0.700945,0.080053,-0.672129,-1.024293
4,0.242108,0.27541,1.913078,-0.627751
5,-0.935248,-0.07379,1.766016,-1.018129
6,-1.257363,0.577301,1.24375,-0.577255
7,0.42254,0.577562,0.875662,-0.085801
8,-1.010463,-2.889907,0.720437,0.032172
9,-1.5416,0.247994,0.952687,-2.536932


In [41]:
index = pd.date_range('1/1/2000', periods=8)

In [42]:
df = pd.DataFrame(np.random.randn(8, 3), index=index, columns=list('ABC'))

In [43]:
df

Unnamed: 0,A,B,C
2000-01-01,0.749004,1.537169,2.185882
2000-01-02,-0.369547,-1.167983,-1.552432
2000-01-03,-0.711558,0.076194,-0.037168
2000-01-04,-0.352357,-0.02055,1.795811
2000-01-05,0.481433,0.33475,0.115762
2000-01-06,-0.987867,0.51568,0.037385
2000-01-07,0.745187,-0.593085,-0.284997
2000-01-08,-0.415705,0.758606,-1.430111


In [44]:
type(df['A'])

pandas.core.series.Series

In [46]:
df.sub(df['A'], axis=0)

Unnamed: 0,A,B,C
2000-01-01,0,0.788165,1.436878
2000-01-02,0,-0.798436,-1.182884
2000-01-03,0,0.787752,0.674391
2000-01-04,0,0.331807,2.148168
2000-01-05,0,-0.146683,-0.365672
2000-01-06,0,1.503547,1.025252
2000-01-07,0,-1.338272,-1.030184
2000-01-08,0,1.174311,-1.014405


In [47]:
df * 5 + 2

Unnamed: 0,A,B,C
2000-01-01,5.745021,9.685845,12.929409
2000-01-02,0.152263,-3.839915,-5.762158
2000-01-03,-1.557791,2.380969,1.814162
2000-01-04,0.238215,1.897251,10.979055
2000-01-05,4.407167,3.673751,2.578809
2000-01-06,-2.939336,4.578398,2.186924
2000-01-07,5.725937,-0.965423,0.575016
2000-01-08,-0.078526,5.793028,-5.150554


In [48]:
1 / df

Unnamed: 0,A,B,C
2000-01-01,1.335106,0.650547,0.457481
2000-01-02,-2.706012,-0.856177,-0.644151
2000-01-03,-1.405366,13.124425,-26.905199
2000-01-04,-2.838031,-48.662368,0.556851
2000-01-05,2.077131,2.987302,8.638431
2000-01-06,-1.012282,1.939189,26.748872
2000-01-07,1.341944,-1.6861,-3.508812
2000-01-08,-2.405551,1.318208,-0.699247


In [49]:
df ** 4

Unnamed: 0,A,B,C
2000-01-01,0.314729,5.583243,22.830044
2000-01-02,0.01865,1.860999,5.80831
2000-01-03,0.256355,3.37038e-05,2e-06
2000-01-04,0.015415,1.783311e-07,10.400221
2000-01-05,0.053721,0.01255693,0.00018
2000-01-06,0.952345,0.07071634,2e-06
2000-01-07,0.308363,0.1237276,0.006597
2000-01-08,0.029864,0.3311801,4.182911


In [50]:
df1 = pd.DataFrame({'a': [1, 0, 1], 'b': [0, 1, 1]}, dtype=bool)

In [51]:
df2 = pd.DataFrame({'a': [0, 1, 1], 'b': [1, 1, 0]}, dtype=bool)

In [52]:
df1 & df2

Unnamed: 0,a,b
0,False,False
1,False,True
2,True,False


In [53]:
df1 | df2

Unnamed: 0,a,b
0,True,True
1,True,True
2,True,True


In [54]:
df1 ^ df2

Unnamed: 0,a,b
0,True,True
1,True,False
2,False,True


In [55]:
-df1

Unnamed: 0,a,b
0,False,True
1,True,False
2,False,False


In [56]:
df[:5].T

Unnamed: 0,2000-01-01 00:00:00,2000-01-02 00:00:00,2000-01-03 00:00:00,2000-01-04 00:00:00,2000-01-05 00:00:00
A,0.749004,-0.369547,-0.711558,-0.352357,0.481433
B,1.537169,-1.167983,0.076194,-0.02055,0.33475
C,2.185882,-1.552432,-0.037168,1.795811,0.115762


In [58]:
np.exp(df)

Unnamed: 0,A,B,C
2000-01-01,2.114893,4.651404,8.898492
2000-01-02,0.691047,0.310994,0.211733
2000-01-03,0.490879,1.079172,0.963515
2000-01-04,0.703029,0.97966,6.024359
2000-01-05,1.618392,1.397591,1.122728
2000-01-06,0.37237,1.674776,1.038092
2000-01-07,2.106836,0.55262,0.752017
2000-01-08,0.659875,2.135297,0.239282


In [59]:
np.asarray(df)

array([[ 0.74900419,  1.53716905,  2.18588187],
       [-0.36954745, -1.16798302, -1.55243151],
       [-0.71155822,  0.07619381, -0.03716754],
       [-0.35235702, -0.02054976,  1.79581104],
       [ 0.48143332,  0.33475024,  0.11576177],
       [-0.98786721,  0.51567959,  0.03738475],
       [ 0.74518746, -0.59308455, -0.28499672],
       [-0.41570525,  0.75860563, -1.43011071]])

In [60]:
df.T.dot(df)

Unnamed: 0,A,B,C
A,3.263818,0.430417,2.005541
B,0.430417,5.038516,4.275715
C,2.005541,4.275715,12.55568


In [61]:
s1 = pd.Series(np.arange(5,10))

In [62]:
s1.dot(s1)

255

## Grading

YOUR ANSWER HERE