# Pandas介绍

In [1]:
import numpy as np
import pandas as pd

In [2]:
stock_change = np.random.standard_normal((8, 10))
data = pd.DataFrame(stock_change)

data

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,0.178817,0.95398,0.160059,-0.655809,0.019708,-1.779309,0.587014,-0.183757,0.244104,0.689152
1,1.106843,0.312691,0.598712,-0.03661,1.488815,-0.939376,0.805096,0.160212,-0.567394,-0.580094
2,0.354455,-0.770311,-0.456714,-1.295187,1.837053,1.218789,0.21532,-1.713057,1.108645,-0.38019
3,-2.072576,0.624806,-1.592329,-1.857971,-0.830081,0.257665,-0.0864,0.28365,0.442984,1.353202
4,0.802811,-1.488624,0.85108,-1.601568,2.329946,-0.863219,2.071075,0.152203,-0.008415,-0.089813
5,-1.050388,0.728117,1.535047,1.052218,-0.692382,-0.442707,0.798056,0.334425,-0.297269,1.998243
6,0.002456,1.159814,1.298081,0.281751,0.254067,0.769014,0.850946,0.26915,1.850569,-0.249231
7,0.433466,-0.362083,-0.501669,1.402216,-0.381445,-1.303242,0.048012,-1.706043,-1.165417,-1.045564


In [3]:
# 生成股票名字列表
stock_name_list = ['股票{}'.format(x) for x in range(8)]
# 生成股票日期列表
stock_date_list = pd.date_range('20230801', periods=10, freq='B')

data = pd.DataFrame(stock_change, index=stock_name_list, columns=stock_date_list)
data

Unnamed: 0,2023-08-01,2023-08-02,2023-08-03,2023-08-04,2023-08-07,2023-08-08,2023-08-09,2023-08-10,2023-08-11,2023-08-14
股票0,0.178817,0.95398,0.160059,-0.655809,0.019708,-1.779309,0.587014,-0.183757,0.244104,0.689152
股票1,1.106843,0.312691,0.598712,-0.03661,1.488815,-0.939376,0.805096,0.160212,-0.567394,-0.580094
股票2,0.354455,-0.770311,-0.456714,-1.295187,1.837053,1.218789,0.21532,-1.713057,1.108645,-0.38019
股票3,-2.072576,0.624806,-1.592329,-1.857971,-0.830081,0.257665,-0.0864,0.28365,0.442984,1.353202
股票4,0.802811,-1.488624,0.85108,-1.601568,2.329946,-0.863219,2.071075,0.152203,-0.008415,-0.089813
股票5,-1.050388,0.728117,1.535047,1.052218,-0.692382,-0.442707,0.798056,0.334425,-0.297269,1.998243
股票6,0.002456,1.159814,1.298081,0.281751,0.254067,0.769014,0.850946,0.26915,1.850569,-0.249231
股票7,0.433466,-0.362083,-0.501669,1.402216,-0.381445,-1.303242,0.048012,-1.706043,-1.165417,-1.045564


## DataFrame的常用属性和方法

In [4]:
# 获取数据
data.values

array([[ 0.17881696,  0.95398041,  0.16005895, -0.65580857,  0.01970752,
        -1.77930854,  0.58701428, -0.18375722,  0.24410406,  0.68915207],
       [ 1.10684276,  0.31269148,  0.59871227, -0.03660989,  1.48881453,
        -0.9393764 ,  0.80509558,  0.16021169, -0.56739404, -0.580094  ],
       [ 0.35445488, -0.77031149, -0.45671413, -1.29518734,  1.83705322,
         1.2187885 ,  0.21531972, -1.71305687,  1.10864472, -0.38019034],
       [-2.07257608,  0.62480589, -1.5923293 , -1.85797051, -0.83008131,
         0.25766507, -0.08640013,  0.28364953,  0.44298411,  1.35320224],
       [ 0.80281096, -1.48862358,  0.85107999, -1.6015682 ,  2.32994604,
        -0.8632186 ,  2.07107539,  0.15220274, -0.0084148 , -0.08981267],
       [-1.05038817,  0.72811681,  1.53504715,  1.05221779, -0.69238164,
        -0.44270679,  0.79805622,  0.3344249 , -0.2972689 ,  1.99824282],
       [ 0.00245626,  1.15981381,  1.29808121,  0.28175103,  0.25406671,
         0.76901421,  0.85094581,  0.26914981

In [5]:
# 获取行索引
data.index

Index(['股票0', '股票1', '股票2', '股票3', '股票4', '股票5', '股票6', '股票7'], dtype='object')

In [6]:
# 获取列索引
data.columns

DatetimeIndex(['2023-08-01', '2023-08-02', '2023-08-03', '2023-08-04',
               '2023-08-07', '2023-08-08', '2023-08-09', '2023-08-10',
               '2023-08-11', '2023-08-14'],
              dtype='datetime64[ns]', freq='B')

In [7]:
# 获取形状
data.shape

(8, 10)

In [8]:
# 行列转换
data.T

Unnamed: 0,股票0,股票1,股票2,股票3,股票4,股票5,股票6,股票7
2023-08-01,0.178817,1.106843,0.354455,-2.072576,0.802811,-1.050388,0.002456,0.433466
2023-08-02,0.95398,0.312691,-0.770311,0.624806,-1.488624,0.728117,1.159814,-0.362083
2023-08-03,0.160059,0.598712,-0.456714,-1.592329,0.85108,1.535047,1.298081,-0.501669
2023-08-04,-0.655809,-0.03661,-1.295187,-1.857971,-1.601568,1.052218,0.281751,1.402216
2023-08-07,0.019708,1.488815,1.837053,-0.830081,2.329946,-0.692382,0.254067,-0.381445
2023-08-08,-1.779309,-0.939376,1.218789,0.257665,-0.863219,-0.442707,0.769014,-1.303242
2023-08-09,0.587014,0.805096,0.21532,-0.0864,2.071075,0.798056,0.850946,0.048012
2023-08-10,-0.183757,0.160212,-1.713057,0.28365,0.152203,0.334425,0.26915,-1.706043
2023-08-11,0.244104,-0.567394,1.108645,0.442984,-0.008415,-0.297269,1.850569,-1.165417
2023-08-14,0.689152,-0.580094,-0.38019,1.353202,-0.089813,1.998243,-0.249231,-1.045564


In [9]:
# 查看最后5行数据, 默认就是5
data.tail()

Unnamed: 0,2023-08-01,2023-08-02,2023-08-03,2023-08-04,2023-08-07,2023-08-08,2023-08-09,2023-08-10,2023-08-11,2023-08-14
股票3,-2.072576,0.624806,-1.592329,-1.857971,-0.830081,0.257665,-0.0864,0.28365,0.442984,1.353202
股票4,0.802811,-1.488624,0.85108,-1.601568,2.329946,-0.863219,2.071075,0.152203,-0.008415,-0.089813
股票5,-1.050388,0.728117,1.535047,1.052218,-0.692382,-0.442707,0.798056,0.334425,-0.297269,1.998243
股票6,0.002456,1.159814,1.298081,0.281751,0.254067,0.769014,0.850946,0.26915,1.850569,-0.249231
股票7,0.433466,-0.362083,-0.501669,1.402216,-0.381445,-1.303242,0.048012,-1.706043,-1.165417,-1.045564


## 索引的设置

In [10]:
# 生成新的索引
new_index = ['股票_{}'.format(x) for x in range(8)]
data.index = new_index
data.reset_index(drop=True)

data

Unnamed: 0,2023-08-01,2023-08-02,2023-08-03,2023-08-04,2023-08-07,2023-08-08,2023-08-09,2023-08-10,2023-08-11,2023-08-14
股票_0,0.178817,0.95398,0.160059,-0.655809,0.019708,-1.779309,0.587014,-0.183757,0.244104,0.689152
股票_1,1.106843,0.312691,0.598712,-0.03661,1.488815,-0.939376,0.805096,0.160212,-0.567394,-0.580094
股票_2,0.354455,-0.770311,-0.456714,-1.295187,1.837053,1.218789,0.21532,-1.713057,1.108645,-0.38019
股票_3,-2.072576,0.624806,-1.592329,-1.857971,-0.830081,0.257665,-0.0864,0.28365,0.442984,1.353202
股票_4,0.802811,-1.488624,0.85108,-1.601568,2.329946,-0.863219,2.071075,0.152203,-0.008415,-0.089813
股票_5,-1.050388,0.728117,1.535047,1.052218,-0.692382,-0.442707,0.798056,0.334425,-0.297269,1.998243
股票_6,0.002456,1.159814,1.298081,0.281751,0.254067,0.769014,0.850946,0.26915,1.850569,-0.249231
股票_7,0.433466,-0.362083,-0.501669,1.402216,-0.381445,-1.303242,0.048012,-1.706043,-1.165417,-1.045564


In [11]:
# 把某一列设置为新索引
df = pd.DataFrame(
    {
        'month': [1, 4, 7, 10],
        'year': [2019, 2020, 2021, 2022],
        'sale': [55, 40, 84, 21]
    }
)

df

Unnamed: 0,month,year,sale
0,1,2019,55
1,4,2020,40
2,7,2021,84
3,10,2022,21


In [12]:
df.set_index('month', drop=False)

Unnamed: 0_level_0,month,year,sale
month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,1,2019,55
4,4,2020,40
7,7,2021,84
10,10,2022,21


In [14]:
# 把某些列设置为新索引
res = df.set_index(['month', 'year'])

res

Unnamed: 0_level_0,Unnamed: 1_level_0,sale
month,year,Unnamed: 2_level_1
1,2019,55
4,2020,40
7,2021,84
10,2022,21


In [18]:
res.index

MultiIndex([( 1, 2019),
            ( 4, 2020),
            ( 7, 2021),
            (10, 2022)],
           names=['month', 'year'])