# Pandas

## DataFrame

### DataFrame 结构

In [2]:
import numpy as np
import pandas as pd

In [3]:
# 创建一个符合正态分布的 10 支股票 5 天内的涨跌幅数据
stock_change = np.random.normal(0, 1, (10, 5))

In [5]:
stock_change

array([[-0.46182992,  0.12019474, -1.15437614, -0.09053703,  0.17455913],
       [-0.20905998,  0.68518892,  1.1026634 ,  2.52573148, -1.73700414],
       [-2.30617559,  0.08013553,  0.28369232, -0.46207883,  0.71115346],
       [ 1.19196403,  0.24622009,  0.95572014,  0.1843441 ,  0.34857969],
       [-0.80852024,  0.2570339 , -0.22534932,  2.34169   , -0.94620692],
       [ 1.55849342,  1.57916403,  0.05742434, -1.14350354, -1.42569821],
       [ 1.04755591,  0.0580752 ,  0.14500976,  0.8815285 , -1.32614855],
       [-1.99741675,  0.58771186, -0.77941358, -0.620167  , -0.22863551],
       [ 0.53292115,  0.07138332,  0.15406709,  0.90393226, -1.79411439],
       [-0.43231582,  0.90681722,  0.07309412, -0.19639308,  0.45440386]])

In [7]:
# 用 pandas 创建
pd.DataFrame(stock_change)

Unnamed: 0,0,1,2,3,4
0,-0.46183,0.120195,-1.154376,-0.090537,0.174559
1,-0.20906,0.685189,1.102663,2.525731,-1.737004
2,-2.306176,0.080136,0.283692,-0.462079,0.711153
3,1.191964,0.24622,0.95572,0.184344,0.34858
4,-0.80852,0.257034,-0.225349,2.34169,-0.946207
5,1.558493,1.579164,0.057424,-1.143504,-1.425698
6,1.047556,0.058075,0.14501,0.881528,-1.326149
7,-1.997417,0.587712,-0.779414,-0.620167,-0.228636
8,0.532921,0.071383,0.154067,0.903932,-1.794114
9,-0.432316,0.906817,0.073094,-0.196393,0.454404


In [16]:
# 添加行（index）索引
label_index = ['股票{}'.format(i) for i in range(10)]
label_index

['股票0', '股票1', '股票2', '股票3', '股票4', '股票5', '股票6', '股票7', '股票8', '股票9']

In [17]:
pd.DataFrame(stock_change, index=label_index)

Unnamed: 0,0,1,2,3,4
股票0,-0.46183,0.120195,-1.154376,-0.090537,0.174559
股票1,-0.20906,0.685189,1.102663,2.525731,-1.737004
股票2,-2.306176,0.080136,0.283692,-0.462079,0.711153
股票3,1.191964,0.24622,0.95572,0.184344,0.34858
股票4,-0.80852,0.257034,-0.225349,2.34169,-0.946207
股票5,1.558493,1.579164,0.057424,-1.143504,-1.425698
股票6,1.047556,0.058075,0.14501,0.881528,-1.326149
股票7,-1.997417,0.587712,-0.779414,-0.620167,-0.228636
股票8,0.532921,0.071383,0.154067,0.903932,-1.794114
股票9,-0.432316,0.906817,0.073094,-0.196393,0.454404


In [21]:
# 添加列（columns）索引
date = pd.date_range(start='20220101', periods=5, freq='B')

In [22]:
date

DatetimeIndex(['2022-01-03', '2022-01-04', '2022-01-05', '2022-01-06',
               '2022-01-07'],
              dtype='datetime64[ns]', freq='B')

In [25]:
stock_change = pd.DataFrame(stock_change, index=label_index, columns=date)
stock_change

Unnamed: 0,2022-01-03,2022-01-04,2022-01-05,2022-01-06,2022-01-07
股票0,-0.46183,0.120195,-1.154376,-0.090537,0.174559
股票1,-0.20906,0.685189,1.102663,2.525731,-1.737004
股票2,-2.306176,0.080136,0.283692,-0.462079,0.711153
股票3,1.191964,0.24622,0.95572,0.184344,0.34858
股票4,-0.80852,0.257034,-0.225349,2.34169,-0.946207
股票5,1.558493,1.579164,0.057424,-1.143504,-1.425698
股票6,1.047556,0.058075,0.14501,0.881528,-1.326149
股票7,-1.997417,0.587712,-0.779414,-0.620167,-0.228636
股票8,0.532921,0.071383,0.154067,0.903932,-1.794114
股票9,-0.432316,0.906817,0.073094,-0.196393,0.454404


### DataFrame 属性

In [27]:
# 形状
stock_change.shape  # (10, 5)

(10, 5)

In [29]:
# 行索引
stock_change.index  # Index(['股票0', '股票1', '股票2', '股票3', '股票4', '股票5', '股票6', '股票7', '股票8', '股票9'], dtype='object')

Index(['股票0', '股票1', '股票2', '股票3', '股票4', '股票5', '股票6', '股票7', '股票8', '股票9'], dtype='object')

In [30]:
# 列索引
stock_change.columns  

DatetimeIndex(['2022-01-03', '2022-01-04', '2022-01-05', '2022-01-06',
               '2022-01-07'],
              dtype='datetime64[ns]', freq='B')

In [31]:
# 去除行列索引（ndarry）
stock_change.values

array([[-0.46182992,  0.12019474, -1.15437614, -0.09053703,  0.17455913],
       [-0.20905998,  0.68518892,  1.1026634 ,  2.52573148, -1.73700414],
       [-2.30617559,  0.08013553,  0.28369232, -0.46207883,  0.71115346],
       [ 1.19196403,  0.24622009,  0.95572014,  0.1843441 ,  0.34857969],
       [-0.80852024,  0.2570339 , -0.22534932,  2.34169   , -0.94620692],
       [ 1.55849342,  1.57916403,  0.05742434, -1.14350354, -1.42569821],
       [ 1.04755591,  0.0580752 ,  0.14500976,  0.8815285 , -1.32614855],
       [-1.99741675,  0.58771186, -0.77941358, -0.620167  , -0.22863551],
       [ 0.53292115,  0.07138332,  0.15406709,  0.90393226, -1.79411439],
       [-0.43231582,  0.90681722,  0.07309412, -0.19639308,  0.45440386]])

In [37]:
# 转置
stock_change.T

Unnamed: 0,股票0,股票1,股票2,股票3,股票4,股票5,股票6,股票7,股票8,股票9
2022-01-03,-0.46183,-0.20906,-2.306176,1.191964,-0.80852,1.558493,1.047556,-1.997417,0.532921,-0.432316
2022-01-04,0.120195,0.685189,0.080136,0.24622,0.257034,1.579164,0.058075,0.587712,0.071383,0.906817
2022-01-05,-1.154376,1.102663,0.283692,0.95572,-0.225349,0.057424,0.14501,-0.779414,0.154067,0.073094
2022-01-06,-0.090537,2.525731,-0.462079,0.184344,2.34169,-1.143504,0.881528,-0.620167,0.903932,-0.196393
2022-01-07,0.174559,-1.737004,0.711153,0.34858,-0.946207,-1.425698,-1.326149,-0.228636,-1.794114,0.454404


### DataFrame 的方法

In [34]:
# 返回前 n 行数据，参数中如果不指定，默认是 5 行
stock_change.head(3)

Unnamed: 0,2022-01-03,2022-01-04,2022-01-05,2022-01-06,2022-01-07
股票0,-0.46183,0.120195,-1.154376,-0.090537,0.174559
股票1,-0.20906,0.685189,1.102663,2.525731,-1.737004
股票2,-2.306176,0.080136,0.283692,-0.462079,0.711153


In [36]:
# 返回后 n 行数据，参数中如果不指定，默认是 5 行
stock_change.tail(2)

Unnamed: 0,2022-01-03,2022-01-04,2022-01-05,2022-01-06,2022-01-07
股票8,0.532921,0.071383,0.154067,0.903932,-1.794114
股票9,-0.432316,0.906817,0.073094,-0.196393,0.454404
