## 加载数据

In [20]:
# 解决echarts在notebook的依赖
from pyecharts.globals import CurrentConfig, NotebookType
CurrentConfig.NOTEBOOK_TYPE = NotebookType.JUPYTER_NOTEBOOK

In [1]:
import pandas as pd
import datetime
from pyecharts.charts import *
from pyecharts import options as opts
df1 = pd.read_excel('data/业务数据.xls') 
#要使用原始数据构建新指标，所以保留原始数据，copy新的数据，在新的数据中创建新指标
df2 = df1.copy()
df2.head()

Unnamed: 0,销售,账单状态,账单周期,账单金额,开票金额,实收金额,未收金额,预计付款日,应付日期,商务催收日期,账期,实际到账日,开票日期,客服
0,s101,未确认,2019-05,29805.0,,,,2019-07-31,2019-07-31,2019-08-15,60,,,a201
1,s102,未确认,2019-05,1572.6,,,,2019-07-31,2019-07-31,2019-08-15,60,,,a202
2,s103,已确认,2019-04,487551.2,487551.2,,487551.2,2019-06-30,2019-06-30,2019-07-15,60,,05-16,a203
3,s104,已确认,2019-04,378835.0,378835.0,,378835.0,2019-07-31,2019-07-31,2019-08-15,90,,05-08,a204
4,s105,已确认,2019-04,326866.0,326866.0,,326866.0,2019-07-31,2019-07-31,2019-08-15,90,,05-10,a205


In [2]:
df2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5257 entries, 0 to 5256
Data columns (total 14 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   销售      5257 non-null   object 
 1   账单状态    5257 non-null   object 
 2   账单周期    5257 non-null   object 
 3   账单金额    5257 non-null   float64
 4   开票金额    5010 non-null   float64
 5   实收金额    4470 non-null   float64
 6   未收金额    5010 non-null   float64
 7   预计付款日   5256 non-null   object 
 8   应付日期    5257 non-null   object 
 9   商务催收日期  5257 non-null   object 
 10  账期      5257 non-null   int64  
 11  实际到账日   4387 non-null   object 
 12  开票日期    4996 non-null   object 
 13  客服      5257 non-null   object 
dtypes: float64(4), int64(1), object(9)
memory usage: 575.1+ KB


In [3]:
df2.describe()

Unnamed: 0,账单金额,开票金额,实收金额,未收金额,账期
count,5257.0,5010.0,4470.0,5010.0,5257.0
mean,40732.41,40968.96,40824.19,4684.636,64.539661
std,81761.72,80072.45,79706.28,28884.64,15.622765
min,0.0,25.0,0.0,0.0,0.0
25%,5103.0,5300.0,5112.25,0.0,60.0
50%,14365.0,14865.6,14340.0,0.0,60.0
75%,41780.0,42202.5,41707.5,0.0,75.0
max,1508796.0,1356215.0,1301665.0,1277098.0,90.0


- 数据处理，填充缺失值,将日期时间类型转换成datetime类型

In [4]:
# 获取最大的日期，作为当前时间
today_time = pd.to_datetime(df2.实际到账日.fillna('0').max())

df2['实收金额'] = df2.实收金额.fillna(0)
df2['开票金额'] = df2.开票金额.fillna(0)
df2['未收金额'] = df2.未收金额.fillna(0)

df2['账单周期'] = pd.to_datetime(df2.账单周期)
df2['应付日期'] = pd.to_datetime(df2.应付日期)

df2['实际到账日'] = pd.to_datetime(df2.实际到账日).fillna(today_time)

- 为了后续计算，在原始数据基础上构造新的字段：是否逾期，是否逾期90天

In [5]:
df2['是否到期'] = df2.apply(lambda x : 0 if x.应付日期 > today_time else 1,axis=1)

df2['是否到期90天'] =  ( today_time - df2.应付日期 ).map(lambda x : 1 if x.days >= 90 else 0)

df2['未收金额2'] =  (df2.账单金额 - df2.实收金额)

df2['历史逾期天数'] = df2.apply(lambda x : (x.实际到账日 -  x.应付日期).days if x.未收金额2 == 0  else  (today_time - x.应付日期).days,axis=1)

df2['当前逾期天数'] = df2.apply(lambda x : (x.历史逾期天数) if x.未收金额2 > 0  else 0 ,axis = 1) 

- 查询实际到账日期字段得知当前最近的到账日为2019年5月17日

In [6]:
df3 =df2.copy()
#创建’账单季度‘字段，将日期转换成季度
df3['账单季度'] = df3['账单周期'].map(lambda x : x.to_period('Q'))
#提取2017年3季度到2018年4季度数据
df3 = df3[(df3['账单季度']<='2018Q4') & (df3['账单季度']>='2017Q3')]
df3.shape

(3856, 20)

- 按照季度统计账单金额，到期金额，和逾期金额

In [7]:
#账单金额
fn1 = df3.groupby('账单季度')[['账单金额']].sum()
fn1.columns = ['账单金额']
fn1

Unnamed: 0_level_0,账单金额
账单季度,Unnamed: 1_level_1
2017Q3,8247952.62
2017Q4,11643604.99
2018Q1,17149674.79
2018Q2,31097661.29
2018Q3,38292071.12
2018Q4,51963089.64


In [8]:
#90天到期金额
df4 = df3[(df3.是否到期90天 == 1)]
fn2 = df4.groupby('账单季度')[['账单金额']].sum()
fn2.columns = ['到期金额']
fn2

Unnamed: 0_level_0,到期金额
账单季度,Unnamed: 1_level_1
2017Q3,8247952.62
2017Q4,11643604.99
2018Q1,17149674.79
2018Q2,31097661.29
2018Q3,38292071.12
2018Q4,28265677.59


In [9]:
df4 = df3[(df3.是否到期90天 == 1)]
fn3 = df4.groupby('账单季度')[['未收金额2']].sum()
fn3.columns = ['当前逾期90+金额']
fn3

Unnamed: 0_level_0,当前逾期90+金额
账单季度,Unnamed: 1_level_1
2017Q3,63883.0
2017Q4,57380.0
2018Q1,64283.0
2018Q2,106930.0
2018Q3,412920.1
2018Q4,304183.0


In [10]:
dfs = [fn1,fn2,fn3]
final1 = pd.concat(dfs,axis=1)
final1

Unnamed: 0_level_0,账单金额,到期金额,当前逾期90+金额
账单季度,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2017Q3,8247952.62,8247952.62,63883.0
2017Q4,11643604.99,11643604.99,57380.0
2018Q1,17149674.79,17149674.79,64283.0
2018Q2,31097661.29,31097661.29,106930.0
2018Q3,38292071.12,38292071.12,412920.1
2018Q4,51963089.64,28265677.59,304183.0


In [11]:
final1['90+净坏账率'] = round(final1['当前逾期90+金额'] / final1.到期金额,3)
final1

Unnamed: 0_level_0,账单金额,到期金额,当前逾期90+金额,90+净坏账率
账单季度,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2017Q3,8247952.62,8247952.62,63883.0,0.008
2017Q4,11643604.99,11643604.99,57380.0,0.005
2018Q1,17149674.79,17149674.79,64283.0,0.004
2018Q2,31097661.29,31097661.29,106930.0,0.003
2018Q3,38292071.12,38292071.12,412920.1,0.011
2018Q4,51963089.64,28265677.59,304183.0,0.011


In [12]:
bar = (
    Bar()
    .add_xaxis(list(final1.index.values.astype(str)))
    .add_yaxis(
        "账单金额",
        list(final1.账单金额),
        yaxis_index=0,
        color="#5793f3",
    )
    .set_global_opts(
        title_opts=opts.TitleOpts(title="90+净坏账率"),
    )
    .extend_axis(
        yaxis=opts.AxisOpts(
            name="90+净坏账率",
            type_="value",
            min_=0,
            max_=0.014,
            position="right",
            axisline_opts=opts.AxisLineOpts(
                linestyle_opts=opts.LineStyleOpts(color="#d14a61")
            ),
            axislabel_opts=opts.LabelOpts(formatter="{value}"),
        )
    )
)
line = (
    Line()
    .add_xaxis(list(final1.index.values.astype(str)))
    .add_yaxis(
        "90+净坏账率",
        list(final1['90+净坏账率']),
        yaxis_index=1,
        color="#675bba",
        label_opts=opts.LabelOpts(is_show=False),
    )
)


In [24]:
bar.overlap(line).render()

'/Users/vincent/Movies/render.html'

- 计算每个季度的60天账单入催金额，90天账单入催金额

In [14]:
#60天账期的账单金额
df4 = df3[(df3.账期 == 60)&(df3.是否到期 == 1)]
fn1 = df4.groupby('账单季度')[['账单金额']].sum()
fn1.columns = ['60天账期的账单金额']
#60天账期的入催金额
df4 = df3[(df3.账期 == 60)&(df3.是否到期 == 1)&(df3.历史逾期天数>0)]
fn2 = df4.groupby('账单季度')[['未收金额2']].sum()
fn2.columns = ['60天账期的入催金额']
#90天账期的账单金额
df4 = df3[(df3.账期 == 90)&(df3.是否到期 == 1)]
fn3 = df4.groupby('账单季度')[['账单金额']].sum()
fn3.columns = ['90天账期的账单金额']
#90天账期的入催金额
df4 = df3[(df3.账期 == 90)&(df3.是否到期 == 1)&(df3.历史逾期天数>0)]
fn4 = df4.groupby('账单季度')[['未收金额2']].sum()
fn4.columns = ['90天账期的入催金额']

- 计算入催率

In [15]:
dfs = [fn1,fn2,fn3,fn4]
final2 = pd.concat(dfs,axis=1)
# final2 = fn1.merge(fn2,on='账单季度').merge(fn3,on='账单季度',how='left').merge(fn4,on='账单季度')
final2['60天账期入催率'] = round(final2['60天账期的入催金额'] / final2['60天账期的账单金额'],3)
final2['90天账期入催率'] = round(final2['90天账期的入催金额']/final2['90天账期的账单金额'],3)
final2

Unnamed: 0_level_0,60天账期的账单金额,60天账期的入催金额,90天账期的账单金额,90天账期的入催金额,60天账期入催率,90天账期入催率
账单季度,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2017Q3,4854770.94,36983.0,2769264.0,1900.0,0.008,0.001
2017Q4,6737327.99,52750.0,3921491.0,0.0,0.008,0.0
2018Q1,12106356.79,62460.0,4244304.0,800.0,0.005,0.0
2018Q2,19234086.87,13590.0,8427775.0,0.0,0.001,0.0
2018Q3,22830710.42,380265.1,9835629.0,8235.0,0.017,0.001
2018Q4,26337959.52,584789.5,17706430.0,325141.0,0.022,0.018


In [25]:
line = (
    Line()
    .add_xaxis(list(final1.index.values.astype(str)))
    .add_yaxis(
        "60天账期入催率",
        list(final2['60天账期入催率']),
        yaxis_index=0,
        color="#675bba",
        label_opts=opts.LabelOpts(is_show=False),
    )
    .set_global_opts(
    title_opts=opts.TitleOpts(title="不同账期入催率"),
    )
    .add_xaxis(list(final1.index.values.astype(str)))
    .add_yaxis(
        "90天账期入催率",
        list(final2['90天账期入催率']),
        yaxis_index=0,
        color="#d14a61",
        label_opts=opts.LabelOpts(is_show=False),
    )
)
line.render()

'/Users/vincent/Movies/render.html'

- 不同逾期天数的回收情况

In [17]:
df6 = df3[(df3.未收金额2 == 0)&(df3.是否到期 == 1)].copy()
#使用cut，讲数据按照逾期天数分箱，然后添加分箱之后结果标签
df6['历史逾期天数'] = pd.cut(df6['历史逾期天数'],bins=[-999,0,5,10,15,20,30,60,90,999],right=True,
                       labels=['0','1-5','6-10','11-15','16-20','21-30','31-60','61-90','91+'])
final3 = df6.groupby('历史逾期天数')[['账期']].count()
final3.columns = ['回收账单数']
final3

Unnamed: 0_level_0,回收账单数
历史逾期天数,Unnamed: 1_level_1
0,2400
1-5,358
6-10,235
11-15,215
16-20,92
21-30,189
31-60,156
61-90,60
91+,88


In [26]:
ydata = final3['回收账单数'].values.tolist()
bar = (
    Bar()
    .add_xaxis(list(final3.index.values.tolist()))
    .add_yaxis("收回账单数",ydata,yaxis_index=0,color="#675bba")
    .set_global_opts(
    title_opts=opts.TitleOpts(title="不同逾期天数的已收回账单数"),
    )
)
bar.render()

'/Users/vincent/Movies/render.html'