http://pandas.pydata.org/pandas-docs/stable/10min.html

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
s = pd.Series([1,3,5,np.nan,6,8])

In [3]:
s

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64

In [4]:
data = pd.date_range('20130101',periods=6)

In [5]:
data

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [6]:
df = pd.DataFrame(np.random.randn(6,4), 
                  index=data, columns=list('ABCD'))

In [7]:
df

Unnamed: 0,A,B,C,D
2013-01-01,1.344851,1.055483,-0.823238,-1.379411
2013-01-02,-0.084883,0.169683,-0.283265,-0.992474
2013-01-03,0.055404,-2.857177,0.258784,-0.292726
2013-01-04,-1.258711,-1.36346,-0.658652,-1.229647
2013-01-05,1.073347,-0.076793,0.800809,-0.484912
2013-01-06,-0.491312,1.536822,-0.351503,-0.566802


In [8]:
df2 = pd.DataFrame({'A': 1.,
                    'B': pd.Timestamp('20130102'),
                    'C': pd.Series(1,index=list(range(4)),dtype='float32'),
                    'D': np.array([3]*4,dtype='int32'),
                    'E': pd.Categorical(["test","train","test","train"]),
                    'F':'foo'})

In [9]:
df2

Unnamed: 0,A,B,C,D,E,F
0,1.0,2013-01-02,1.0,3,test,foo
1,1.0,2013-01-02,1.0,3,train,foo
2,1.0,2013-01-02,1.0,3,test,foo
3,1.0,2013-01-02,1.0,3,train,foo


In [10]:
df2.dtypes

A           float64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object

# View

###### See the top & bottom rows of the frame

In [13]:
df.head()

Unnamed: 0,A,B,C,D
2013-01-01,1.344851,1.055483,-0.823238,-1.379411
2013-01-02,-0.084883,0.169683,-0.283265,-0.992474
2013-01-03,0.055404,-2.857177,0.258784,-0.292726
2013-01-04,-1.258711,-1.36346,-0.658652,-1.229647
2013-01-05,1.073347,-0.076793,0.800809,-0.484912


In [14]:
df.tail(3)

Unnamed: 0,A,B,C,D
2013-01-04,-1.258711,-1.36346,-0.658652,-1.229647
2013-01-05,1.073347,-0.076793,0.800809,-0.484912
2013-01-06,-0.491312,1.536822,-0.351503,-0.566802


###### Display the index, columns, and the underlying numpy data

In [15]:
df.index

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [16]:
df.columns

Index(['A', 'B', 'C', 'D'], dtype='object')

In [17]:
df.values

array([[ 1.34485082,  1.05548333, -0.82323825, -1.37941145],
       [-0.08488264,  0.16968282, -0.28326546, -0.99247362],
       [ 0.05540423, -2.8571773 ,  0.25878417, -0.29272571],
       [-1.25871109, -1.36346004, -0.65865246, -1.22964663],
       [ 1.07334666, -0.07679323,  0.800809  , -0.48491185],
       [-0.49131247,  1.5368215 , -0.35150349, -0.56680246]])

##### Describe shows a quick statistic summary of your data

In [18]:
df.describe()

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,0.106449,-0.255907,-0.176178,-0.824329
std,0.972494,1.621117,0.606197,0.439272
min,-1.258711,-2.857177,-0.823238,-1.379411
25%,-0.389705,-1.041793,-0.581865,-1.170353
50%,-0.014739,0.046445,-0.317384,-0.779638
75%,0.818861,0.834033,0.123272,-0.505385
max,1.344851,1.536822,0.800809,-0.292726


In [19]:
df.T

Unnamed: 0,2013-01-01 00:00:00,2013-01-02 00:00:00,2013-01-03 00:00:00,2013-01-04 00:00:00,2013-01-05 00:00:00,2013-01-06 00:00:00
A,1.344851,-0.084883,0.055404,-1.258711,1.073347,-0.491312
B,1.055483,0.169683,-2.857177,-1.36346,-0.076793,1.536822
C,-0.823238,-0.283265,0.258784,-0.658652,0.800809,-0.351503
D,-1.379411,-0.992474,-0.292726,-1.229647,-0.484912,-0.566802


In [20]:
df.sort_index(axis=1,ascending=False)

Unnamed: 0,D,C,B,A
2013-01-01,-1.379411,-0.823238,1.055483,1.344851
2013-01-02,-0.992474,-0.283265,0.169683,-0.084883
2013-01-03,-0.292726,0.258784,-2.857177,0.055404
2013-01-04,-1.229647,-0.658652,-1.36346,-1.258711
2013-01-05,-0.484912,0.800809,-0.076793,1.073347
2013-01-06,-0.566802,-0.351503,1.536822,-0.491312


In [21]:
df.sort_values(by='B')

Unnamed: 0,A,B,C,D
2013-01-03,0.055404,-2.857177,0.258784,-0.292726
2013-01-04,-1.258711,-1.36346,-0.658652,-1.229647
2013-01-05,1.073347,-0.076793,0.800809,-0.484912
2013-01-02,-0.084883,0.169683,-0.283265,-0.992474
2013-01-01,1.344851,1.055483,-0.823238,-1.379411
2013-01-06,-0.491312,1.536822,-0.351503,-0.566802


# Selection

###### Selecting a single column, which yields a Series, equivalent to df.A

In [22]:
df['A']

2013-01-01    1.344851
2013-01-02   -0.084883
2013-01-03    0.055404
2013-01-04   -1.258711
2013-01-05    1.073347
2013-01-06   -0.491312
Freq: D, Name: A, dtype: float64

###### Selecting via [], which slices the rows.

In [24]:
df[0:3]

Unnamed: 0,A,B,C,D
2013-01-01,1.344851,1.055483,-0.823238,-1.379411
2013-01-02,-0.084883,0.169683,-0.283265,-0.992474
2013-01-03,0.055404,-2.857177,0.258784,-0.292726


In [25]:
df['20130102':'20130104']

Unnamed: 0,A,B,C,D
2013-01-02,-0.084883,0.169683,-0.283265,-0.992474
2013-01-03,0.055404,-2.857177,0.258784,-0.292726
2013-01-04,-1.258711,-1.36346,-0.658652,-1.229647


In [26]:
df.loc[data[0]]

A    1.344851
B    1.055483
C   -0.823238
D   -1.379411
Name: 2013-01-01 00:00:00, dtype: float64

In [27]:
df.loc[:, ['A','B']]

Unnamed: 0,A,B
2013-01-01,1.344851,1.055483
2013-01-02,-0.084883,0.169683
2013-01-03,0.055404,-2.857177
2013-01-04,-1.258711,-1.36346
2013-01-05,1.073347,-0.076793
2013-01-06,-0.491312,1.536822


In [28]:
df.loc['20130102',['A','B']]

A   -0.084883
B    0.169683
Name: 2013-01-02 00:00:00, dtype: float64

In [29]:
df.loc['20130102':'20130104',['A','B']]

Unnamed: 0,A,B
2013-01-02,-0.084883,0.169683
2013-01-03,0.055404,-2.857177
2013-01-04,-1.258711,-1.36346


In [30]:
df.iloc[3]

A   -1.258711
B   -1.363460
C   -0.658652
D   -1.229647
Name: 2013-01-04 00:00:00, dtype: float64

In [45]:
df.loc[:,['20130101']]

KeyError: "None of [['20130101']] are in the [columns]"