In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
# Creating a Series by passing a list of values, letting pandas create a default integer index.
s = pd.Series([1, 3, 5, np.nan, 6, 8])

In [3]:
s

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64

In [4]:
# Creating a DataFrame by passing a Numpy array, with a datetime index and labeled columns.
dates = pd.date_range('20130101', periods=6)

In [5]:
dates

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [6]:
type(dates)

pandas.core.indexes.datetimes.DatetimeIndex

In [7]:
df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list('ABCD'))

In [8]:
df

Unnamed: 0,A,B,C,D
2013-01-01,-0.033023,-1.505219,1.187196,-1.895326
2013-01-02,1.501467,-0.77658,0.458607,1.041455
2013-01-03,0.80918,-1.969029,-0.224759,-0.496739
2013-01-04,-1.059223,1.30561,0.106716,-0.199083
2013-01-05,-0.739974,0.127757,-0.401292,0.21977
2013-01-06,0.16194,1.357966,0.468767,-0.586061


In [9]:
# Creating a DataFrame by passing a dict of objects that can be converted to series-like.
df2 = pd.DataFrame({'A': 1.,
                    'B': pd.Timestamp('20130102'),
                    'C': pd.Series(1, index=list(range(4)), dtype=np.float32),
                    'D': np.array([3]*4, dtype=np.int32),
                    'E': pd.Categorical(['test', 'train', 'test', 'train']),
                    'F': 'foo'},)

In [10]:
df2

Unnamed: 0,A,B,C,D,E,F
0,1.0,2013-01-02,1.0,3,test,foo
1,1.0,2013-01-02,1.0,3,train,foo
2,1.0,2013-01-02,1.0,3,test,foo
3,1.0,2013-01-02,1.0,3,train,foo


In [11]:
# The columns of the resulting DataFrame have different dtypes.
df2.dtypes

A           float64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object

### Her is how to view the top and bottom rows of the frame.

In [16]:
df.head(2)

Unnamed: 0,A,B,C,D
2013-01-01,-0.033023,-1.505219,1.187196,-1.895326
2013-01-02,1.501467,-0.77658,0.458607,1.041455


In [17]:
df.tail(3)

Unnamed: 0,A,B,C,D
2013-01-04,-1.059223,1.30561,0.106716,-0.199083
2013-01-05,-0.739974,0.127757,-0.401292,0.21977
2013-01-06,0.16194,1.357966,0.468767,-0.586061


### Display the index, columns, and the underlying Numpy data.

In [18]:
df.index

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [19]:
df.columns

Index(['A', 'B', 'C', 'D'], dtype='object')

In [20]:
df.values

array([[-0.03302315, -1.50521903,  1.18719554, -1.89532586],
       [ 1.50146733, -0.77658038,  0.45860735,  1.04145526],
       [ 0.80918011, -1.96902936, -0.22475853, -0.49673935],
       [-1.05922313,  1.30561026,  0.10671602, -0.19908282],
       [-0.7399739 ,  0.12775661, -0.40129176,  0.21977041],
       [ 0.1619402 ,  1.35796587,  0.46876722, -0.58606088]])

In [21]:
# shows a quick statistic summary of your data.
df.describe()

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,0.106728,-0.243249,0.265873,-0.319331
std,0.952645,1.411087,0.572417,0.973982
min,-1.059223,-1.969029,-0.401292,-1.895326
25%,-0.563236,-1.323059,-0.14189,-0.56373
50%,0.064459,-0.324412,0.282662,-0.347911
75%,0.64737,1.011147,0.466227,0.115057
max,1.501467,1.357966,1.187196,1.041455


In [22]:
# Transposing your data.
df.T

Unnamed: 0,2013-01-01 00:00:00,2013-01-02 00:00:00,2013-01-03 00:00:00,2013-01-04 00:00:00,2013-01-05 00:00:00,2013-01-06 00:00:00
A,-0.033023,1.501467,0.80918,-1.059223,-0.739974,0.16194
B,-1.505219,-0.77658,-1.969029,1.30561,0.127757,1.357966
C,1.187196,0.458607,-0.224759,0.106716,-0.401292,0.468767
D,-1.895326,1.041455,-0.496739,-0.199083,0.21977,-0.586061


In [23]:
# Sorting by an axis.
df.sort_index(axis=1, ascending=False)

Unnamed: 0,D,C,B,A
2013-01-01,-1.895326,1.187196,-1.505219,-0.033023
2013-01-02,1.041455,0.458607,-0.77658,1.501467
2013-01-03,-0.496739,-0.224759,-1.969029,0.80918
2013-01-04,-0.199083,0.106716,1.30561,-1.059223
2013-01-05,0.21977,-0.401292,0.127757,-0.739974
2013-01-06,-0.586061,0.468767,1.357966,0.16194


In [24]:
# Sorting by values.
df.sort_values(by='B')

Unnamed: 0,A,B,C,D
2013-01-03,0.80918,-1.969029,-0.224759,-0.496739
2013-01-01,-0.033023,-1.505219,1.187196,-1.895326
2013-01-02,1.501467,-0.77658,0.458607,1.041455
2013-01-05,-0.739974,0.127757,-0.401292,0.21977
2013-01-04,-1.059223,1.30561,0.106716,-0.199083
2013-01-06,0.16194,1.357966,0.468767,-0.586061


In [25]:
#  Selecting a single column, which yields a Series, equivalent to df.A.
df['A']

2013-01-01   -0.033023
2013-01-02    1.501467
2013-01-03    0.809180
2013-01-04   -1.059223
2013-01-05   -0.739974
2013-01-06    0.161940
Freq: D, Name: A, dtype: float64

In [26]:
# Selecting via [], which slices the rows.
df[0:3]

Unnamed: 0,A,B,C,D
2013-01-01,-0.033023,-1.505219,1.187196,-1.895326
2013-01-02,1.501467,-0.77658,0.458607,1.041455
2013-01-03,0.80918,-1.969029,-0.224759,-0.496739


In [27]:
df['20130102':'20130104']

Unnamed: 0,A,B,C,D
2013-01-02,1.501467,-0.77658,0.458607,1.041455
2013-01-03,0.80918,-1.969029,-0.224759,-0.496739
2013-01-04,-1.059223,1.30561,0.106716,-0.199083


### Selection by label.

In [28]:
dates[0]

Timestamp('2013-01-01 00:00:00', freq='D')

In [29]:
df.loc[dates[0]]

A   -0.033023
B   -1.505219
C    1.187196
D   -1.895326
Name: 2013-01-01 00:00:00, dtype: float64

In [37]:
# Selecting on a multi-axis by label.
df.loc[:, ['A', 'B']]

Unnamed: 0,A,B
2013-01-01,-0.033023,-1.505219
2013-01-02,1.501467,-0.77658
2013-01-03,0.80918,-1.969029
2013-01-04,-1.059223,1.30561
2013-01-05,-0.739974,0.127757
2013-01-06,0.16194,1.357966
