In [32]:
import pandas as pd

In [33]:
import numpy as np

In [34]:
import matplotlib.pyplot as plt

Creating a Series by passing a list of values, letting pandas create a default integer index:

In [35]:
s = pd.Series([1,3,4,np.nan,6,8])

In [36]:
s

0    1.0
1    3.0
2    4.0
3    NaN
4    6.0
5    8.0
dtype: float64

Creating a DataFrame by passing a numpy array, with a datetime index and labeled columns:

In [37]:
dates = pd.date_range('20120101', periods=6)

In [38]:
dates

DatetimeIndex(['2012-01-01', '2012-01-02', '2012-01-03', '2012-01-04',
               '2012-01-05', '2012-01-06'],
              dtype='datetime64[ns]', freq='D')

In [39]:
df = pd.DataFrame(np.random.randn(6,4), index=dates, columns=list('ABCD'))

In [40]:
df

Unnamed: 0,A,B,C,D
2012-01-01,-0.002534,-1.286109,0.873722,0.686788
2012-01-02,-0.620083,-0.735683,0.375891,0.849857
2012-01-03,-0.513469,-0.248377,0.765546,-0.474191
2012-01-04,0.648233,1.247924,0.592879,1.126082
2012-01-05,-0.079265,1.381555,-1.318551,0.920177
2012-01-06,1.467949,-1.117533,0.140306,-0.771816


Creating a DataFrame by passing a dict of objects that can be converted to series-like.

In [41]:
df2 = pd.DataFrame({'A': 1.,
                   'B' : pd.Timestamp('20130102'),
                   'C' : pd.Series(1, index=list(range(4)),dtype='float32'),
                   'D' : np.array([3] *4, dtype='int32'),
                   'E' : pd.Categorical(["test","train","test","train"]),
                   'F' : 'foo'})

In [42]:
df2

Unnamed: 0,A,B,C,D,E,F
0,1.0,2013-01-02,1.0,3,test,foo
1,1.0,2013-01-02,1.0,3,train,foo
2,1.0,2013-01-02,1.0,3,test,foo
3,1.0,2013-01-02,1.0,3,train,foo


Having specific dtypes

In [43]:
df2.dtypes

A           float64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object

If you’re using IPython, tab completion for column names (as well as public attributes) is automatically enabled. Here’s a subset of the attributes that will be completed:

df2.<TAB>

See the top & bottom rows of the frame

In [20]:
df.head()

Unnamed: 0,A,B,C,D
2012-01-01,-0.434059,-0.000569,-1.810357,0.565488
2012-01-02,-0.684676,-1.720374,0.622417,0.361492
2012-01-03,0.051762,-1.012609,-0.207803,1.249396
2012-01-04,-1.58515,-0.059833,0.645171,0.519209
2012-01-05,-0.553733,0.486481,-1.184094,-0.944812


In [21]:
df.tail(3)

Unnamed: 0,A,B,C,D
2012-01-04,-1.58515,-0.059833,0.645171,0.519209
2012-01-05,-0.553733,0.486481,-1.184094,-0.944812
2012-01-06,0.398856,-0.464529,0.263256,1.8028


Display the index, columns, and the underlying numpy data

In [22]:
df.index

DatetimeIndex(['2012-01-01', '2012-01-02', '2012-01-03', '2012-01-04',
               '2012-01-05', '2012-01-06'],
              dtype='datetime64[ns]', freq='D')

In [23]:
df.columns

Index([u'A', u'B', u'C', u'D'], dtype='object')

In [24]:
df.values

array([[ -4.34059309e-01,  -5.69307223e-04,  -1.81035686e+00,
          5.65487893e-01],
       [ -6.84675647e-01,  -1.72037417e+00,   6.22416593e-01,
          3.61491516e-01],
       [  5.17618919e-02,  -1.01260921e+00,  -2.07803200e-01,
          1.24939554e+00],
       [ -1.58514993e+00,  -5.98333291e-02,   6.45170986e-01,
          5.19208848e-01],
       [ -5.53732936e-01,   4.86480975e-01,  -1.18409405e+00,
         -9.44812015e-01],
       [  3.98855611e-01,  -4.64528862e-01,   2.63255658e-01,
          1.80279992e+00]])

Describe shows a quick statistic summary of your data

In [26]:
df.describe()

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,-0.467833,-0.461906,-0.278568,0.592262
std,0.681919,0.795104,1.012917,0.929423
min,-1.58515,-1.720374,-1.810357,-0.944812
25%,-0.65194,-0.875589,-0.940021,0.400921
50%,-0.493896,-0.262181,0.027726,0.542348
75%,-0.069693,-0.015385,0.532626,1.078419
max,0.398856,0.486481,0.645171,1.8028


Transposing your data

In [27]:
df.T

Unnamed: 0,2012-01-01 00:00:00,2012-01-02 00:00:00,2012-01-03 00:00:00,2012-01-04 00:00:00,2012-01-05 00:00:00,2012-01-06 00:00:00
A,-0.434059,-0.684676,0.051762,-1.58515,-0.553733,0.398856
B,-0.000569,-1.720374,-1.012609,-0.059833,0.486481,-0.464529
C,-1.810357,0.622417,-0.207803,0.645171,-1.184094,0.263256
D,0.565488,0.361492,1.249396,0.519209,-0.944812,1.8028


Sorting by an axis

In [30]:
df.sort_index(axis=1, ascending=False)

Unnamed: 0,D,C,B,A
2012-01-01,0.565488,-1.810357,-0.000569,-0.434059
2012-01-02,0.361492,0.622417,-1.720374,-0.684676
2012-01-03,1.249396,-0.207803,-1.012609,0.051762
2012-01-04,0.519209,0.645171,-0.059833,-1.58515
2012-01-05,-0.944812,-1.184094,0.486481,-0.553733
2012-01-06,1.8028,0.263256,-0.464529,0.398856


Sorting by an values

In [31]:
df.sort_values(by='B')

Unnamed: 0,A,B,C,D
2012-01-02,-0.684676,-1.720374,0.622417,0.361492
2012-01-03,0.051762,-1.012609,-0.207803,1.249396
2012-01-06,0.398856,-0.464529,0.263256,1.8028
2012-01-04,-1.58515,-0.059833,0.645171,0.519209
2012-01-01,-0.434059,-0.000569,-1.810357,0.565488
2012-01-05,-0.553733,0.486481,-1.184094,-0.944812
