http://pandas.pydata.org/pandas-docs/stable/10min.html#min

This is a short introduction to pandas, geared mainly for new users. You can see more complex recipes in the [Cookbook](http://pandas.pydata.org/pandas-docs/stable/cookbook.html#cookbook)

Customarily, we import as follows:

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Object Creation


In [4]:
s = pd.Series([1,3,5,np.nan,6,8])
s

0     1
1     3
2     5
3   NaN
4     6
5     8
dtype: float64

In [6]:
#Creating a DataFrame by passing a numpy array, 
# with a datetime index and labeled columns:
dates = pd.date_range('20130101', periods=6)
dates

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [7]:
df = pd.DataFrame(np.random.randn(6,4), index=dates, columns=list('ABCD'))
df

Unnamed: 0,A,B,C,D
2013-01-01,-0.455228,0.248434,1.268511,-1.822317
2013-01-02,0.561572,0.299449,-0.262305,0.908257
2013-01-03,0.011252,-0.275731,-1.239358,0.557978
2013-01-04,1.417153,0.025428,-0.518902,1.059422
2013-01-05,-1.1922,0.191904,0.864021,0.503188
2013-01-06,-0.483929,-0.420921,1.075531,-1.593207


In [13]:
df2 = pd.DataFrame({ 'A' : 1.,
                 'B' : pd.Timestamp('20130102'),
                 'C' : pd.Series(1,index=list(range(4)),dtype='float32'),
                 'D' : np.array([3] * 4,dtype='int32'),
                 'E' : pd.Categorical(["test","train","test","train"]),
                 'F' : 'foo' })
df2  

Unnamed: 0,A,B,C,D,E,F
0,1,2013-01-02,1,3,test,foo
1,1,2013-01-02,1,3,train,foo
2,1,2013-01-02,1,3,test,foo
3,1,2013-01-02,1,3,train,foo


In [14]:
df2.dtypes

A           float64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object

# Viewing Data

In [15]:
df.head()

Unnamed: 0,A,B,C,D
2013-01-01,-0.455228,0.248434,1.268511,-1.822317
2013-01-02,0.561572,0.299449,-0.262305,0.908257
2013-01-03,0.011252,-0.275731,-1.239358,0.557978
2013-01-04,1.417153,0.025428,-0.518902,1.059422
2013-01-05,-1.1922,0.191904,0.864021,0.503188


In [16]:
df.tail(3)

Unnamed: 0,A,B,C,D
2013-01-04,1.417153,0.025428,-0.518902,1.059422
2013-01-05,-1.1922,0.191904,0.864021,0.503188
2013-01-06,-0.483929,-0.420921,1.075531,-1.593207


In [17]:
df.index

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [18]:
df.columns

Index([u'A', u'B', u'C', u'D'], dtype='object')

In [19]:
df.values

array([[-0.45522803,  0.24843447,  1.26851128, -1.82231745],
       [ 0.56157196,  0.29944854, -0.2623048 ,  0.90825653],
       [ 0.01125158, -0.27573137, -1.23935799,  0.55797756],
       [ 1.41715306,  0.0254285 , -0.51890151,  1.05942152],
       [-1.19220008,  0.19190438,  0.8640208 ,  0.50318758],
       [-0.48392914, -0.42092135,  1.0755312 , -1.59320744]])

In [20]:
df.describe()

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,-0.023563,0.011427,0.197916,-0.064447
std,0.914685,0.29708,1.015034,1.291981
min,-1.1922,-0.420921,-1.239358,-1.822317
25%,-0.476754,-0.200441,-0.454752,-1.069109
50%,-0.221988,0.108666,0.300858,0.530583
75%,0.423992,0.234302,1.022654,0.820687
max,1.417153,0.299449,1.268511,1.059422


In [23]:
df.T #Transposing

Unnamed: 0,2013-01-01 00:00:00,2013-01-02 00:00:00,2013-01-03 00:00:00,2013-01-04 00:00:00,2013-01-05 00:00:00,2013-01-06 00:00:00
A,-0.455228,0.561572,0.011252,1.417153,-1.1922,-0.483929
B,0.248434,0.299449,-0.275731,0.025428,0.191904,-0.420921
C,1.268511,-0.262305,-1.239358,-0.518902,0.864021,1.075531
D,-1.822317,0.908257,0.557978,1.059422,0.503188,-1.593207


In [24]:
df.sort_index(axis=1, ascending=False)

Unnamed: 0,D,C,B,A
2013-01-01,-1.822317,1.268511,0.248434,-0.455228
2013-01-02,0.908257,-0.262305,0.299449,0.561572
2013-01-03,0.557978,-1.239358,-0.275731,0.011252
2013-01-04,1.059422,-0.518902,0.025428,1.417153
2013-01-05,0.503188,0.864021,0.191904,-1.1922
2013-01-06,-1.593207,1.075531,-0.420921,-0.483929


In [25]:
df.sort_values(by='B')

Unnamed: 0,A,B,C,D
2013-01-06,-0.483929,-0.420921,1.075531,-1.593207
2013-01-03,0.011252,-0.275731,-1.239358,0.557978
2013-01-04,1.417153,0.025428,-0.518902,1.059422
2013-01-05,-1.1922,0.191904,0.864021,0.503188
2013-01-01,-0.455228,0.248434,1.268511,-1.822317
2013-01-02,0.561572,0.299449,-0.262305,0.908257


# Selection

In [26]:
df['A']

2013-01-01   -0.455228
2013-01-02    0.561572
2013-01-03    0.011252
2013-01-04    1.417153
2013-01-05   -1.192200
2013-01-06   -0.483929
Freq: D, Name: A, dtype: float64

In [27]:
df[0:3]

Unnamed: 0,A,B,C,D
2013-01-01,-0.455228,0.248434,1.268511,-1.822317
2013-01-02,0.561572,0.299449,-0.262305,0.908257
2013-01-03,0.011252,-0.275731,-1.239358,0.557978


In [28]:
 df['20130102':'20130104']

Unnamed: 0,A,B,C,D
2013-01-02,0.561572,0.299449,-0.262305,0.908257
2013-01-03,0.011252,-0.275731,-1.239358,0.557978
2013-01-04,1.417153,0.025428,-0.518902,1.059422


In [29]:
df.loc[dates[0]]

A   -0.455228
B    0.248434
C    1.268511
D   -1.822317
Name: 2013-01-01 00:00:00, dtype: float64

In [30]:
df.loc['20130102':'20130104',['A','B']]

Unnamed: 0,A,B
2013-01-02,0.561572,0.299449
2013-01-03,0.011252,-0.275731
2013-01-04,1.417153,0.025428


In [31]:
df.loc['20130102',['A','B']]

A    0.561572
B    0.299449
Name: 2013-01-02 00:00:00, dtype: float64

In [32]:
df.loc[dates[0],'A']

-0.45522803371969817

In [33]:
df.at[dates[0],'A']

-0.45522803371969817

In [34]:
df.iloc[3]


A    1.417153
B    0.025428
C   -0.518902
D    1.059422
Name: 2013-01-04 00:00:00, dtype: float64

In [35]:
 df[df.A > 0]

Unnamed: 0,A,B,C,D
2013-01-02,0.561572,0.299449,-0.262305,0.908257
2013-01-03,0.011252,-0.275731,-1.239358,0.557978
2013-01-04,1.417153,0.025428,-0.518902,1.059422
