## 109 与numpy记录数组转换

### from_records ：将结构或记录数组转换成数据框
参数 data, index=None, exclude=None, columns=None, coerce_float=False, nrows=None
### to_records ：将数据框转换成记录数组
参数 index=True, column_dtypes=None, index_dtypes=None

In [1]:
import numpy  as np
import pandas as pd
from faker import Faker

fake = Faker('zh_CN')
df=pd.DataFrame(data=[ [fake.name()] for x in range(10) ],columns=['name'])

df['销售额']=np.random.randint(100,100000,size=(10, 1))
df['时间']=pd.date_range(start='2020-01-01',periods=10,freq='D')
df.insert(0,'dept',['部门1']*5+['部门2']*5)
df.insert(1,'dept 1',['小部门1']*2+['小部门2']*3+['小部门3']*1+['小部门4']*4)
df.iloc[1,3]=np.nan #  修改成nan

### 演示数据

In [2]:
df 

Unnamed: 0,dept,dept 1,name,销售额,时间
0,部门1,小部门1,魏凤英,37678.0,2020-01-01
1,部门1,小部门1,戴坤,,2020-01-02
2,部门1,小部门2,纪明,26939.0,2020-01-03
3,部门1,小部门2,曹凤兰,96852.0,2020-01-04
4,部门1,小部门2,林桂荣,27738.0,2020-01-05
5,部门2,小部门3,赵春梅,34332.0,2020-01-06
6,部门2,小部门4,胡鹏,67693.0,2020-01-07
7,部门2,小部门4,傅强,20224.0,2020-01-08
8,部门2,小部门4,韩丹丹,91480.0,2020-01-09
9,部门2,小部门4,孙春梅,33409.0,2020-01-10


In [3]:
df.to_records()

rec.array([(0, '部门1', '小部门1', '魏凤英', 37678., '2020-01-01T00:00:00.000000000'),
           (1, '部门1', '小部门1', '戴坤',    nan, '2020-01-02T00:00:00.000000000'),
           (2, '部门1', '小部门2', '纪明', 26939., '2020-01-03T00:00:00.000000000'),
           (3, '部门1', '小部门2', '曹凤兰', 96852., '2020-01-04T00:00:00.000000000'),
           (4, '部门1', '小部门2', '林桂荣', 27738., '2020-01-05T00:00:00.000000000'),
           (5, '部门2', '小部门3', '赵春梅', 34332., '2020-01-06T00:00:00.000000000'),
           (6, '部门2', '小部门4', '胡鹏', 67693., '2020-01-07T00:00:00.000000000'),
           (7, '部门2', '小部门4', '傅强', 20224., '2020-01-08T00:00:00.000000000'),
           (8, '部门2', '小部门4', '韩丹丹', 91480., '2020-01-09T00:00:00.000000000'),
           (9, '部门2', '小部门4', '孙春梅', 33409., '2020-01-10T00:00:00.000000000')],
          dtype=[('index', '<i8'), ('dept', 'O'), ('dept 1', 'O'), ('name', 'O'), ('销售额', '<f8'), ('时间', '<M8[ns]')])

In [4]:
df.to_records(index=False) # index  设置 结果中是否有索引

rec.array([('部门1', '小部门1', '魏凤英', 37678., '2020-01-01T00:00:00.000000000'),
           ('部门1', '小部门1', '戴坤',    nan, '2020-01-02T00:00:00.000000000'),
           ('部门1', '小部门2', '纪明', 26939., '2020-01-03T00:00:00.000000000'),
           ('部门1', '小部门2', '曹凤兰', 96852., '2020-01-04T00:00:00.000000000'),
           ('部门1', '小部门2', '林桂荣', 27738., '2020-01-05T00:00:00.000000000'),
           ('部门2', '小部门3', '赵春梅', 34332., '2020-01-06T00:00:00.000000000'),
           ('部门2', '小部门4', '胡鹏', 67693., '2020-01-07T00:00:00.000000000'),
           ('部门2', '小部门4', '傅强', 20224., '2020-01-08T00:00:00.000000000'),
           ('部门2', '小部门4', '韩丹丹', 91480., '2020-01-09T00:00:00.000000000'),
           ('部门2', '小部门4', '孙春梅', 33409., '2020-01-10T00:00:00.000000000')],
          dtype=[('dept', 'O'), ('dept 1', 'O'), ('name', 'O'), ('销售额', '<f8'), ('时间', '<M8[ns]')])

In [5]:
df.to_records(index=False,column_dtypes='O') # column_dtypes  设置 转换的结果的列数据类型

rec.array([('部门1', '小部门1', '魏凤英', 37678.0, 1577836800000000000),
           ('部门1', '小部门1', '戴坤', nan, 1577923200000000000),
           ('部门1', '小部门2', '纪明', 26939.0, 1578009600000000000),
           ('部门1', '小部门2', '曹凤兰', 96852.0, 1578096000000000000),
           ('部门1', '小部门2', '林桂荣', 27738.0, 1578182400000000000),
           ('部门2', '小部门3', '赵春梅', 34332.0, 1578268800000000000),
           ('部门2', '小部门4', '胡鹏', 67693.0, 1578355200000000000),
           ('部门2', '小部门4', '傅强', 20224.0, 1578441600000000000),
           ('部门2', '小部门4', '韩丹丹', 91480.0, 1578528000000000000),
           ('部门2', '小部门4', '孙春梅', 33409.0, 1578614400000000000)],
          dtype=[('dept', 'O'), ('dept 1', 'O'), ('name', 'O'), ('销售额', 'O'), ('时间', 'O')])

In [6]:
df.to_records(index=False,column_dtypes={'时间':'O'}) # column_dtypes  设置 转换的结果的列数据类型。字典映射模式

rec.array([('部门1', '小部门1', '魏凤英', 37678., 1577836800000000000),
           ('部门1', '小部门1', '戴坤',    nan, 1577923200000000000),
           ('部门1', '小部门2', '纪明', 26939., 1578009600000000000),
           ('部门1', '小部门2', '曹凤兰', 96852., 1578096000000000000),
           ('部门1', '小部门2', '林桂荣', 27738., 1578182400000000000),
           ('部门2', '小部门3', '赵春梅', 34332., 1578268800000000000),
           ('部门2', '小部门4', '胡鹏', 67693., 1578355200000000000),
           ('部门2', '小部门4', '傅强', 20224., 1578441600000000000),
           ('部门2', '小部门4', '韩丹丹', 91480., 1578528000000000000),
           ('部门2', '小部门4', '孙春梅', 33409., 1578614400000000000)],
          dtype=[('dept', 'O'), ('dept 1', 'O'), ('name', 'O'), ('销售额', '<f8'), ('时间', 'O')])

In [7]:
data=df.to_records(index=False) # index  设置 结果中是否有索引

In [9]:
data

rec.array([('部门1', '小部门1', '魏凤英', 37678., '2020-01-01T00:00:00.000000000'),
           ('部门1', '小部门1', '戴坤',    nan, '2020-01-02T00:00:00.000000000'),
           ('部门1', '小部门2', '纪明', 26939., '2020-01-03T00:00:00.000000000'),
           ('部门1', '小部门2', '曹凤兰', 96852., '2020-01-04T00:00:00.000000000'),
           ('部门1', '小部门2', '林桂荣', 27738., '2020-01-05T00:00:00.000000000'),
           ('部门2', '小部门3', '赵春梅', 34332., '2020-01-06T00:00:00.000000000'),
           ('部门2', '小部门4', '胡鹏', 67693., '2020-01-07T00:00:00.000000000'),
           ('部门2', '小部门4', '傅强', 20224., '2020-01-08T00:00:00.000000000'),
           ('部门2', '小部门4', '韩丹丹', 91480., '2020-01-09T00:00:00.000000000'),
           ('部门2', '小部门4', '孙春梅', 33409., '2020-01-10T00:00:00.000000000')],
          dtype=[('dept', 'O'), ('dept 1', 'O'), ('name', 'O'), ('销售额', '<f8'), ('时间', '<M8[ns]')])

In [8]:
df.from_records(data) # 从numpy 记录数组 转换成 数据框

Unnamed: 0,dept,dept 1,name,销售额,时间
0,部门1,小部门1,魏凤英,37678.0,2020-01-01
1,部门1,小部门1,戴坤,,2020-01-02
2,部门1,小部门2,纪明,26939.0,2020-01-03
3,部门1,小部门2,曹凤兰,96852.0,2020-01-04
4,部门1,小部门2,林桂荣,27738.0,2020-01-05
5,部门2,小部门3,赵春梅,34332.0,2020-01-06
6,部门2,小部门4,胡鹏,67693.0,2020-01-07
7,部门2,小部门4,傅强,20224.0,2020-01-08
8,部门2,小部门4,韩丹丹,91480.0,2020-01-09
9,部门2,小部门4,孙春梅,33409.0,2020-01-10


In [10]:
df.from_records(data,index='dept') # index 指定索引列

Unnamed: 0_level_0,dept 1,name,销售额,时间
dept,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
部门1,小部门1,魏凤英,37678.0,2020-01-01
部门1,小部门1,戴坤,,2020-01-02
部门1,小部门2,纪明,26939.0,2020-01-03
部门1,小部门2,曹凤兰,96852.0,2020-01-04
部门1,小部门2,林桂荣,27738.0,2020-01-05
部门2,小部门3,赵春梅,34332.0,2020-01-06
部门2,小部门4,胡鹏,67693.0,2020-01-07
部门2,小部门4,傅强,20224.0,2020-01-08
部门2,小部门4,韩丹丹,91480.0,2020-01-09
部门2,小部门4,孙春梅,33409.0,2020-01-10


In [12]:
df.from_records(data,index=['dept','dept 1']) # index 指定索引列。二级索引

Unnamed: 0_level_0,Unnamed: 1_level_0,name,销售额,时间
dept,dept 1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
部门1,小部门1,魏凤英,37678.0,2020-01-01
部门1,小部门1,戴坤,,2020-01-02
部门1,小部门2,纪明,26939.0,2020-01-03
部门1,小部门2,曹凤兰,96852.0,2020-01-04
部门1,小部门2,林桂荣,27738.0,2020-01-05
部门2,小部门3,赵春梅,34332.0,2020-01-06
部门2,小部门4,胡鹏,67693.0,2020-01-07
部门2,小部门4,傅强,20224.0,2020-01-08
部门2,小部门4,韩丹丹,91480.0,2020-01-09
部门2,小部门4,孙春梅,33409.0,2020-01-10


In [13]:
df.from_records(data,index=['dept','dept 1'],exclude=['name']) # exclude 排除的列

Unnamed: 0_level_0,Unnamed: 1_level_0,销售额,时间
dept,dept 1,Unnamed: 2_level_1,Unnamed: 3_level_1
部门1,小部门1,37678.0,2020-01-01
部门1,小部门1,,2020-01-02
部门1,小部门2,26939.0,2020-01-03
部门1,小部门2,96852.0,2020-01-04
部门1,小部门2,27738.0,2020-01-05
部门2,小部门3,34332.0,2020-01-06
部门2,小部门4,67693.0,2020-01-07
部门2,小部门4,20224.0,2020-01-08
部门2,小部门4,91480.0,2020-01-09
部门2,小部门4,33409.0,2020-01-10


In [15]:
df.from_records(data,index=['dept','dept 1'],exclude=['name'],coerce_float=False) # coerce_float 将非字符串、非numeric 类型转换成float，比如decimal

Unnamed: 0_level_0,Unnamed: 1_level_0,销售额,时间
dept,dept 1,Unnamed: 2_level_1,Unnamed: 3_level_1
部门1,小部门1,37678.0,2020-01-01
部门1,小部门1,,2020-01-02
部门1,小部门2,26939.0,2020-01-03
部门1,小部门2,96852.0,2020-01-04
部门1,小部门2,27738.0,2020-01-05
部门2,小部门3,34332.0,2020-01-06
部门2,小部门4,67693.0,2020-01-07
部门2,小部门4,20224.0,2020-01-08
部门2,小部门4,91480.0,2020-01-09
部门2,小部门4,33409.0,2020-01-10


In [16]:
df.from_records(data,index=['dept','dept 1'],exclude=['name'],coerce_float=False) # nrows 如果可迭代，取前n行。   columns ，设置列名（如果没列名）

Unnamed: 0_level_0,Unnamed: 1_level_0,销售额,时间
dept,dept 1,Unnamed: 2_level_1,Unnamed: 3_level_1
部门1,小部门1,37678.0,2020-01-01
部门1,小部门1,,2020-01-02
部门1,小部门2,26939.0,2020-01-03
部门1,小部门2,96852.0,2020-01-04
部门1,小部门2,27738.0,2020-01-05
部门2,小部门3,34332.0,2020-01-06
部门2,小部门4,67693.0,2020-01-07
部门2,小部门4,20224.0,2020-01-08
部门2,小部门4,91480.0,2020-01-09
部门2,小部门4,33409.0,2020-01-10
