In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
s = pd.Series([1,3,5,np.nan,6,8])

In [3]:
s

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64

In [4]:
dates = pd.date_range('20180517', periods=6)
dates

DatetimeIndex(['2018-05-17', '2018-05-18', '2018-05-19', '2018-05-20',
               '2018-05-21', '2018-05-22'],
              dtype='datetime64[ns]', freq='D')

In [8]:
df = pd.DataFrame(np.random.randn(6,3), index=dates, columns=list('ABC'))
df

Unnamed: 0,A,B,C
2018-05-17,1.40245,0.503151,-0.105546
2018-05-18,1.593871,0.414311,-0.068277
2018-05-19,-2.098209,0.450371,0.716683
2018-05-20,0.9406,0.006399,2.167862
2018-05-21,-0.818359,2.109311,0.480636
2018-05-22,0.223634,-0.606856,-1.31144


In [15]:
df2 = pd.DataFrame({ 'A' : 1.,
                     'B' : pd.Timestamp('20180517'),
                     'C' : pd.Series(1,index=list(range(4)),dtype='float32'),
                     'D' : np.array([3] * 4,dtype='int32'),
                     'E' : pd.Categorical(["test","train","test","train"]),
                     'F' : 'foo' })
df2

Unnamed: 0,A,B,C,D,E,F
0,1.0,2018-05-17,1.0,3,test,foo
1,1.0,2018-05-17,1.0,3,train,foo
2,1.0,2018-05-17,1.0,3,test,foo
3,1.0,2018-05-17,1.0,3,train,foo


In [16]:
df2.dtypes

A           float64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object

In [19]:
df.head() # view the top rows of the frame:

Unnamed: 0,A,B,C
2018-05-17,1.40245,0.503151,-0.105546
2018-05-18,1.593871,0.414311,-0.068277
2018-05-19,-2.098209,0.450371,0.716683
2018-05-20,0.9406,0.006399,2.167862
2018-05-21,-0.818359,2.109311,0.480636


In [20]:
df.tail(3) # view the bottom rows of the frame:

Unnamed: 0,A,B,C
2018-05-20,0.9406,0.006399,2.167862
2018-05-21,-0.818359,2.109311,0.480636
2018-05-22,0.223634,-0.606856,-1.31144


In [21]:
# Display the index, columns, and the underlying NumPy data:
df.index

DatetimeIndex(['2018-05-17', '2018-05-18', '2018-05-19', '2018-05-20',
               '2018-05-21', '2018-05-22'],
              dtype='datetime64[ns]', freq='D')

In [22]:
df.columns

Index(['A', 'B', 'C'], dtype='object')

In [23]:
df.values

array([[ 1.4024498 ,  0.50315131, -0.10554628],
       [ 1.59387134,  0.41431052, -0.06827716],
       [-2.09820886,  0.45037075,  0.71668345],
       [ 0.94060008,  0.00639856,  2.16786217],
       [-0.8183588 ,  2.10931056,  0.48063617],
       [ 0.2236336 , -0.6068563 , -1.31144024]])

In [25]:
# describe() shows a quick statistic summary of your data:
df.describe()

Unnamed: 0,A,B,C
count,6.0,6.0,6.0
mean,0.207331,0.479448,0.31332
std,1.432222,0.901769,1.148123
min,-2.098209,-0.606856,-1.31144
25%,-0.557861,0.108377,-0.096229
50%,0.582117,0.432341,0.20618
75%,1.286987,0.489956,0.657672
max,1.593871,2.109311,2.167862


In [27]:
# Transposing your data:
df.T 

Unnamed: 0,2018-05-17 00:00:00,2018-05-18 00:00:00,2018-05-19 00:00:00,2018-05-20 00:00:00,2018-05-21 00:00:00,2018-05-22 00:00:00
A,1.40245,1.593871,-2.098209,0.9406,-0.818359,0.223634
B,0.503151,0.414311,0.450371,0.006399,2.109311,-0.606856
C,-0.105546,-0.068277,0.716683,2.167862,0.480636,-1.31144


In [29]:
# Sorting by an axis:
df.sort_index(ascending=False)

Unnamed: 0,A,B,C
2018-05-22,0.223634,-0.606856,-1.31144
2018-05-21,-0.818359,2.109311,0.480636
2018-05-20,0.9406,0.006399,2.167862
2018-05-19,-2.098209,0.450371,0.716683
2018-05-18,1.593871,0.414311,-0.068277
2018-05-17,1.40245,0.503151,-0.105546


In [32]:
# Sorting by an axis:
df.sort_index(axis = 1, ascending=False) # axis : index, columns to direct sorting
# axis=0： index； axis=1：column

Unnamed: 0,C,B,A
2018-05-17,-0.105546,0.503151,1.40245
2018-05-18,-0.068277,0.414311,1.593871
2018-05-19,0.716683,0.450371,-2.098209
2018-05-20,2.167862,0.006399,0.9406
2018-05-21,0.480636,2.109311,-0.818359
2018-05-22,-1.31144,-0.606856,0.223634


In [33]:
df.sort_values(by='B') # Sorting by values:

Unnamed: 0,A,B,C
2018-05-22,0.223634,-0.606856,-1.31144
2018-05-20,0.9406,0.006399,2.167862
2018-05-18,1.593871,0.414311,-0.068277
2018-05-19,-2.098209,0.450371,0.716683
2018-05-17,1.40245,0.503151,-0.105546
2018-05-21,-0.818359,2.109311,0.480636


In [34]:
df['A']

2018-05-17    1.402450
2018-05-18    1.593871
2018-05-19   -2.098209
2018-05-20    0.940600
2018-05-21   -0.818359
2018-05-22    0.223634
Freq: D, Name: A, dtype: float64

In [42]:
# Selecting via [], which slices the rows.
df[0:3] 

Unnamed: 0,A,B,C
2018-05-17,1.40245,0.503151,-0.105546
2018-05-18,1.593871,0.414311,-0.068277
2018-05-19,-2.098209,0.450371,0.716683


In [43]:
df['C'].values

array([-0.10554628, -0.06827716,  0.71668345,  2.16786217,  0.48063617,
       -1.31144024])

In [45]:
df['20180517':'20180519']

Unnamed: 0,A,B,C
2018-05-17,1.40245,0.503151,-0.105546
2018-05-18,1.593871,0.414311,-0.068277
2018-05-19,-2.098209,0.450371,0.716683


In [47]:
df.loc[dates[0]] # For getting a cross section using a label:

A    1.402450
B    0.503151
C   -0.105546
Name: 2018-05-17 00:00:00, dtype: float64

In [48]:
df.loc['20180517']

A    1.402450
B    0.503151
C   -0.105546
Name: 2018-05-17 00:00:00, dtype: float64

In [49]:
df.loc['20180517', 'A'] # For getting a scalar value:

1.4024497972298926

In [51]:
df.loc[ 'A','20180517'] # For getting a scalar value: KeyError: 'the label [A] is not in the [index]'

In [52]:
df

Unnamed: 0,A,B,C
2018-05-17,1.40245,0.503151,-0.105546
2018-05-18,1.593871,0.414311,-0.068277
2018-05-19,-2.098209,0.450371,0.716683
2018-05-20,0.9406,0.006399,2.167862
2018-05-21,-0.818359,2.109311,0.480636
2018-05-22,0.223634,-0.606856,-1.31144


In [56]:
# df.loc[0:3, 2:4] # 錯誤用法：loc不可以用整數index

In [57]:
df.iloc[0:3, 2:4] # Select via the position of the passed integers:

Unnamed: 0,C
2018-05-17,-0.105546
2018-05-18,-0.068277
2018-05-19,0.716683
