In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

#### Creating a Series by passing a list of values, letting pandas create a default integer index

In [7]:
s = pd.Series([1,3,5,np.nan,6,8])
s

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64

#### Creating a DataFrame by passing a numpy array, with a datetime index and labelled columns

In [5]:
dates = pd.date_range('20130101', periods=6)

In [6]:
dates

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [8]:
df = pd.DataFrame(np.random.randn(6,4), index=dates, columns=list('ABCD'))

In [9]:
df

Unnamed: 0,A,B,C,D
2013-01-01,-0.346949,-2.280955,1.790562,-0.332762
2013-01-02,1.124129,-1.375961,0.386705,-0.88021
2013-01-03,-1.066036,-1.086047,-0.485264,-0.792797
2013-01-04,1.460987,1.868682,0.06371,0.787726
2013-01-05,-0.299283,-0.541487,1.573057,0.312755
2013-01-06,-0.824289,-0.254704,-1.103082,0.60336


#### Creating a DataFrame by passing a dict of objects that can be converted to series-like

In [10]:
df2 = pd.DataFrame({'A' : 1.,
                    'B' : pd.Timestamp('20130102'),
                    'C' : pd.Series(1, index=list(range(4)),dtype='float32'),
                    'D' : np.array([3] * 4, dtype='int32'),
                    'E' : pd.Categorical(["test","train","test","train"]),
                    'F' : 'foo' })

In [11]:
df2

Unnamed: 0,A,B,C,D,E,F
0,1.0,2013-01-02,1.0,3,test,foo
1,1.0,2013-01-02,1.0,3,train,foo
2,1.0,2013-01-02,1.0,3,test,foo
3,1.0,2013-01-02,1.0,3,train,foo


In [12]:
df2.dtypes

A           float64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object

#### Tab completion... try df2. < TAB >

#### See the top and bottom rows of the frame

In [13]:
df.head()

Unnamed: 0,A,B,C,D
2013-01-01,-0.346949,-2.280955,1.790562,-0.332762
2013-01-02,1.124129,-1.375961,0.386705,-0.88021
2013-01-03,-1.066036,-1.086047,-0.485264,-0.792797
2013-01-04,1.460987,1.868682,0.06371,0.787726
2013-01-05,-0.299283,-0.541487,1.573057,0.312755


In [15]:
df.tail(2)

Unnamed: 0,A,B,C,D
2013-01-05,-0.299283,-0.541487,1.573057,0.312755
2013-01-06,-0.824289,-0.254704,-1.103082,0.60336


#### Display the index and underlying numpy data

In [16]:
df.index

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [17]:
df.columns

Index(['A', 'B', 'C', 'D'], dtype='object')

In [18]:
df.values

array([[-0.3469492 , -2.28095496,  1.79056226, -0.3327625 ],
       [ 1.12412947, -1.375961  ,  0.38670512, -0.88020962],
       [-1.06603632, -1.08604712, -0.48526419, -0.7927971 ],
       [ 1.46098659,  1.86868232,  0.06370975,  0.78772625],
       [-0.29928306, -0.54148689,  1.57305662,  0.31275489],
       [-0.82428889, -0.25470367, -1.10308243,  0.60335953]])

#### Describe shows a quick statistic summary of your data

In [20]:
df.describe()

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,0.008093,-0.611745,0.370948,-0.050321
std,1.041498,1.405716,1.136739,0.718499
min,-1.066036,-2.280955,-1.103082,-0.88021
25%,-0.704954,-1.303483,-0.348021,-0.677788
50%,-0.323116,-0.813767,0.225207,-0.010004
75%,0.768276,-0.326399,1.276469,0.530708
max,1.460987,1.868682,1.790562,0.787726


#### Transposing your data

In [21]:
df.T

Unnamed: 0,2013-01-01 00:00:00,2013-01-02 00:00:00,2013-01-03 00:00:00,2013-01-04 00:00:00,2013-01-05 00:00:00,2013-01-06 00:00:00
A,-0.346949,1.124129,-1.066036,1.460987,-0.299283,-0.824289
B,-2.280955,-1.375961,-1.086047,1.868682,-0.541487,-0.254704
C,1.790562,0.386705,-0.485264,0.06371,1.573057,-1.103082
D,-0.332762,-0.88021,-0.792797,0.787726,0.312755,0.60336
