In [1]:
import pandas as pd

pd.__version__

'1.3.0'

# 2 pandas 1.3重要更新一览

In [2]:
xml = """<?xml version='1.0' encoding='utf-8'?>
<data>
 <row>
    <shape>square</shape>
    <degrees>360</degrees>
    <sides>4.0</sides>
 </row>
 <row>
    <shape>circle</shape>
    <degrees>360</degrees>
 </row>
 <row>
    <shape>triangle</shape>
    <degrees>180</degrees>
    <sides>3.0</sides>
 </row>
</data>"""

pd.read_xml(xml)

Unnamed: 0,shape,degrees,sides
0,square,360,4.0
1,circle,360,
2,triangle,180,3.0


In [3]:
print(
    pd
    .read_xml(xml)
    .rename(columns={
        'shape': 'new-shape'
    })
    .to_xml()
)

<?xml version='1.0' encoding='utf-8'?>
<data>
  <row>
    <index>0</index>
    <new-shape>square</new-shape>
    <degrees>360</degrees>
    <sides>4.0</sides>
  </row>
  <row>
    <index>1</index>
    <new-shape>circle</new-shape>
    <degrees>360</degrees>
    <sides/>
  </row>
  <row>
    <index>2</index>
    <new-shape>triangle</new-shape>
    <degrees>180</degrees>
    <sides>3.0</sides>
  </row>
</data>


## 2.2 Styler可使用原生css语法

In [4]:
(
    pd
    .read_xml(xml)
    .style
    .set_table_styles(
        [
            # 为每一行鼠标悬停伪类自定义css样式
            {'selector': 'tr:hover',
             'props': 'background-color: #4fc3f7; color: red;'}
        ]
    )
)

Unnamed: 0,shape,degrees,sides
0,square,360,4.0
1,circle,360,
2,triangle,180,3.0


## 2.3 center参数在时间日期index的数据框rolling操作中可用

In [5]:
df = pd.DataFrame(
    {
        "A": [0, 1, 2, 3, 4]
    }, 
    index=pd.date_range("2020", periods=5, freq="1D")
)
df

Unnamed: 0,A
2020-01-01,0
2020-01-02,1
2020-01-03,2
2020-01-04,3
2020-01-05,4


In [6]:
df.rolling("3D").sum()

Unnamed: 0,A
2020-01-01,0.0
2020-01-02,1.0
2020-01-03,3.0
2020-01-04,6.0
2020-01-05,9.0


In [7]:
df.rolling("3D", center=True).sum()

Unnamed: 0,A
2020-01-01,1.0
2020-01-02,3.0
2020-01-03,6.0
2020-01-04,9.0
2020-01-05,7.0


## 2.4 sample()随机抽样新增ignore_index参数

In [8]:
df = pd.DataFrame({
    'v1': range(10),
    'v2': range(10)
})
df.sample(n=5)

Unnamed: 0,v1,v2
3,3,3
0,0,0
7,7,7
9,9,9
4,4,4


In [9]:
df.sample(n=5, ignore_index=True)

Unnamed: 0,v1,v2
0,3,3
1,6,6
2,2,2
3,7,7
4,5,5


## 2.5 explode()新增多列操作支持

In [10]:
df = pd.DataFrame({
    'A': [[0, 1, 2], 'foo'],
    'B': 1,
    'C': [['a', 'b', 'c'], None]
})
df

Unnamed: 0,A,B,C
0,"[0, 1, 2]",1,"[a, b, c]"
1,foo,1,


In [11]:
df.explode(column=['A', 'C'])

Unnamed: 0,A,B,C
0,0,1,a
0,1,1,b
0,2,1,c
1,foo,1,


## 2.6 append模式下写出多工作表excel文件的新策略

In [12]:
import numpy as np

writer = pd.ExcelWriter('demo.xlsx')

pd.DataFrame(np.random.rand(100, 10)).to_excel(writer, sheet_name='工作表1')
pd.DataFrame(np.random.rand(100, 10)).to_excel(writer, sheet_name='工作表2')
writer.save()

In [13]:
# append模式下写入同名工作表时
writer = pd.ExcelWriter('demo.xlsx', mode='a')

try:
    pd.DataFrame(np.random.rand(100, 10)).to_excel(writer, sheet_name='工作表1')
except Exception as e:
    print('默认错误：', e)

默认错误： Sheet '工作表1' already exists and if_sheet_exists is set to 'error'.


In [14]:
# append模式下写入同名工作表时设置参数if_sheet_exists='new'会自动修改重复的名称
writer = pd.ExcelWriter('demo.xlsx', mode='a', if_sheet_exists='new')

pd.DataFrame(np.random.rand(100, 10)).to_excel(writer, sheet_name='工作表1')

writer.save()

pd.ExcelFile('demo.xlsx').sheet_names

['工作表1', '工作表2', '工作表11']

In [15]:
# append模式下写入同名工作表时设置参数if_sheet_exists='replace'会自动覆盖同名工作表
writer = pd.ExcelWriter('demo.xlsx', mode='a', if_sheet_exists='replace')

pd.DataFrame(np.random.rand(100, 10)).to_excel(writer, sheet_name='工作表1')

writer.save()

pd.ExcelFile('demo.xlsx').sheet_names

['工作表1', '工作表2', '工作表11']

## 2.7 结合SQL读取数据库表时可直接设置类型转换

In [16]:
from sqlalchemy import create_engine

engine = create_engine('postgresql://postgres:5201314PZY@localhost/day13')

In [17]:
pd.read_sql_query('''SELECT * FROM bilibili WHERE author = '极速拍档' ''', con=engine).dtypes

type        object
author      object
title       object
coins        int64
danmu        int64
favorite     int64
likes        int64
replay       int64
share        int64
view         int64
dtype: object

In [18]:
pd.read_sql_query('''SELECT * FROM bilibili WHERE author = '极速拍档' ''', con=engine, dtype={'view': 'str'}).dtypes

type        object
author      object
title       object
coins        int64
danmu        int64
favorite     int64
likes        int64
replay       int64
share        int64
view        object
dtype: object