In [None]:
print("""
@File         : exercise_9_tax_planning.ipynb
@Author(s)    : Stephen CUI
@LastEditor(s): Stephen CUI
@CreatedTime  : 2024-10-03 15:31:05
@Email        : cuixuanstephen@gmail.com
@Description  : 税务规划
""")

我们使用想要生成的列的名称来分配给数据框。通常分配一个 Series，但我们也可以分配一个 NumPy 数组或列表，只要它的长度与其他现有列相同即可。列名是唯一的 ‑ 因此就像字典一样，分配给现有列会将其替换为新列。

In [1]:
import pandas as pd
import numpy as np

In [3]:
df = pd.DataFrame(
    [
        {
            "product_id": 23,
            "name": "computer",
            "wholesale_price": 500,
            "retail_price": 1000,
            "sales": 100,
        },
        {
            "product_id": 96,
            "name": "Python Workout",
            "wholesale_price": 35,
            "retail_price": 75,
            "sales": 1000,
        },
        {
            "product_id": 97,
            "name": "Pandas Workout",
            "wholesale_price": 35,
            "retail_price": 75,
            "sales": 500,
        },
        {
            "product_id": 15,
            "name": "banana",
            "wholesale_price": 0.5,
            "retail_price": 1,
            "sales": 200,
        },
        {
            "product_id": 87,
            "name": "sandwich",
            "wholesale_price": 3,
            "retail_price": 5,
            "sales": 300,
        },
    ]
)

df["current_net"] = (df["retail_price"] - df["wholesale_price"]) * df["sales"]

In [4]:
df.assign(current_net=(df['retail_price'] - df['wholesale_price']) * df['sales'])

Unnamed: 0,product_id,name,wholesale_price,retail_price,sales,current_net
0,23,computer,500.0,1000,100,50000.0
1,96,Python Workout,35.0,75,1000,40000.0
2,97,Pandas Workout,35.0,75,500,20000.0
3,15,banana,0.5,1,200,100.0
4,87,sandwich,3.0,5,300,600.0


In [5]:
df['after_15'] = df['current_net'] * 0.85
df['after_20'] = df['current_net'] * 0.80
df['after_25'] = df['current_net'] * 0.75
df

Unnamed: 0,product_id,name,wholesale_price,retail_price,sales,current_net,after_15,after_20,after_25
0,23,computer,500.0,1000,100,50000.0,42500.0,40000.0,37500.0
1,96,Python Workout,35.0,75,1000,40000.0,34000.0,32000.0,30000.0
2,97,Pandas Workout,35.0,75,500,20000.0,17000.0,16000.0,15000.0
3,15,banana,0.5,1,200,100.0,85.0,80.0,75.0
4,87,sandwich,3.0,5,300,600.0,510.0,480.0,450.0


In [7]:
df[['current_net', 'after_15', 'after_20', 'after_25']].sum()

current_net    110700.0
after_15        94095.0
after_20        88560.0
after_25        83025.0
dtype: float64

Beyond

In [10]:
df = pd.DataFrame(
    [
        {
            "product_id": 23,
            "name": "computer",
            "wholesale_price": 500,
            "retail_price": 1000,
            "sales": 100,
        },
        {
            "product_id": 96,
            "name": "Python Workout",
            "wholesale_price": 35,
            "retail_price": 75,
            "sales": 1000,
        },
        {
            "product_id": 97,
            "name": "Pandas Workout",
            "wholesale_price": 35,
            "retail_price": 75,
            "sales": 500,
        },
        {
            "product_id": 15,
            "name": "banana",
            "wholesale_price": 0.5,
            "retail_price": 1,
            "sales": 200,
        },
        {
            "product_id": 87,
            "name": "sandwich",
            "wholesale_price": 3,
            "retail_price": 5,
            "sales": 300,
        },
    ]
)

df["current_net"] = (df["retail_price"] - df["wholesale_price"]) * df["sales"]
df

Unnamed: 0,product_id,name,wholesale_price,retail_price,sales,current_net
0,23,computer,500.0,1000,100,50000.0
1,96,Python Workout,35.0,75,1000,40000.0
2,97,Pandas Workout,35.0,75,500,20000.0
3,15,banana,0.5,1,200,100.0
4,87,sandwich,3.0,5,300,600.0


In [11]:
df['current_net'].apply(lambda x: x * 0.75 if x > 20_000 else x).sum()

88200.0

In [12]:
def calculate_tax(x):
    if x > 20_000:
        return x * 0.75
    else:
        return x
    
df['current_net'].apply(calculate_tax).sum()

88200.0

In [13]:
df["after_tax"] = pd.cut(
    df["retail_price"],
    bins=[0, 30, 80, df["retail_price"].max()],
    labels=[1, 0.9, 0.75],
).astype(np.float64)
df

Unnamed: 0,product_id,name,wholesale_price,retail_price,sales,current_net,after_tax
0,23,computer,500.0,1000,100,50000.0,0.75
1,96,Python Workout,35.0,75,1000,40000.0,0.9
2,97,Pandas Workout,35.0,75,500,20000.0,0.9
3,15,banana,0.5,1,200,100.0,1.0
4,87,sandwich,3.0,5,300,600.0,1.0


In [14]:
df['final_net'] = df['current_net'] * df['after_tax']
df

Unnamed: 0,product_id,name,wholesale_price,retail_price,sales,current_net,after_tax,final_net
0,23,computer,500.0,1000,100,50000.0,0.75,37500.0
1,96,Python Workout,35.0,75,1000,40000.0,0.9,36000.0
2,97,Pandas Workout,35.0,75,500,20000.0,0.9,18000.0
3,15,banana,0.5,1,200,100.0,1.0,100.0
4,87,sandwich,3.0,5,300,600.0,1.0,600.0


In [15]:
pd.options.display.float_format = '{:,.2f}'.format

In [16]:
df

Unnamed: 0,product_id,name,wholesale_price,retail_price,sales,current_net,after_tax,final_net
0,23,computer,500.0,1000,100,50000.0,0.75,37500.0
1,96,Python Workout,35.0,75,1000,40000.0,0.9,36000.0
2,97,Pandas Workout,35.0,75,500,20000.0,0.9,18000.0
3,15,banana,0.5,1,200,100.0,1.0,100.0
4,87,sandwich,3.0,5,300,600.0,1.0,600.0
