In [201]:
import numpy as np
import pandas as pd
import plotly_express as px
import matplotlib.pyplot as plt
from Invoice import Invoice
from Enterprise import Enterprise

In [None]:
enterprise_info=pd.read_csv('./C/1_info.csv')
N_enterprise=enterprise_info.shape[0]

In [None]:
enterprise_dic={}
for i in range(N_enterprise):
    number = enterprise_info['企业代号'][i]
    name = enterprise_info['企业名称'][i]
    credit_rating = enterprise_info['信誉评级'][i]
    break_contract_str = enterprise_info['是否违约'][i]
    break_contract = True if break_contract_str=='是' else False
    enterprise_object=Enterprise(number,name,credit_rating,break_contract)
    enterprise_dic[number]=enterprise_object

In [None]:
invoice_in = pd.read_csv('./C/1_in.csv')
N_in = invoice_in.shape[0]

In [None]:
from datetime import date
def get_date(date_str):
    y,m,d=date_str.split('/')
    if len(m) != 2:
        m = '0' + m
    if len(d) != 2:
        d = '0' + d
    return date.fromisoformat(y+'-'+m+'-'+d)

In [None]:
for i in range(N_in):
    enterprise_name = invoice_in['企业代号'][i]
    number = invoice_in['发票号码'][i]
    date_str = invoice_in['开票日期'][i]
    date = get_date(date_str)
    self_enterprise = enterprise_dic[enterprise_name]
    partner = invoice_in['销方单位代号'][i]
    amount = invoice_in['金额'][i]
    tax = invoice_in['税额'][i]
    sum_money = invoice_in['价税合计'][i]
    state_available_str = invoice_in['发票状态'][i]
    state_available = True if state_available_str == '有效发票' else False
    invoice_object = Invoice(number,date,self_enterprise,partner,amount,tax,sum_money,state_available,True)
    self_enterprise.add_invoice(invoice_object)

In [None]:
invoice_out = pd.read_csv('./C/1_out.csv')
N_out = invoice_out.shape[0]

In [None]:
for i in range(N_out):
    enterprise_name = invoice_out['企业代号'][i]
    number = invoice_out['发票号码'][i]
    date_str = invoice_out['开票日期'][i]
    date = get_date(date_str)
    self_enterprise = enterprise_dic[enterprise_name]
    partner = invoice_out['购方单位代号'][i]
    amount = invoice_out['金额'][i]
    tax = invoice_out['税额'][i]
    sum_money = invoice_out['价税合计'][i]
    state_available_str = invoice_out['发票状态'][i]
    state_available = True if state_available_str == '有效发票' else False
    invoice_object = Invoice(number,date,self_enterprise,partner,amount,tax,sum_money,state_available,False)
    self_enterprise.add_invoice(invoice_object)

In [None]:
for enterprise in enterprise_dic.values():
    enterprise.invoice_list.sort(key=lambda x:x.date)

In [None]:
frame = pd.DataFrame(columns=['日期','资金','企业代号','信誉评级'])
for enterprise in enterprise_dic.values():
    sum = 0
    current_date = enterprise.invoice_list[0].date
    for invoice in enterprise.invoice_list:
        temp = invoice
        if invoice.date!=current_date:
            frame = frame.append([{'日期':current_date,'资金':sum,'企业代号':enterprise.number,'信誉评级':enterprise.credit_rating}],ignore_index=True)
            current_date = invoice.date
        if invoice.buy_in == True:
            sum = sum - invoice.sum_money
        else:
            sum = sum + invoice.amount
    frame = frame.append([{'日期':temp.date,'资金':sum,'企业代号':enterprise.number,'信誉评级':enterprise.credit_rating}],ignore_index=True)


In [None]:
graph = px.line(frame, x="日期", y="资金",color='企业代号',category_orders={"信誉评级": ["A","B", "C", "D"]},  render_mode="auto")
graph.write_html('./1_graph/total.html')

In [74]:
frame2 = pd.DataFrame(columns=['天数','资金','企业代号','信誉评级'])
for i in range(frame.shape[0]-1):
    if frame['企业代号'][i+1]==frame['企业代号'][i]:
        frame2 = frame2.append([{'天数':(frame['日期'][i+1]-frame['日期'][i]).days,'资金':frame['资金'][i],'企业代号':frame['企业代号'][i],'信誉评级':frame['信誉评级'][i]}],ignore_index=True)


In [75]:
frame2 = frame2.sort_values(by=['资金','企业代号'],axis=0,ascending=[True,True]).reset_index(drop=True)

In [None]:
graph2 = px.scatter(frame2, x="资金", y="天数",color='企业代号',category_orders={"信誉评级": ["A","B", "C", "D"]})
graph2.write_html('./1_graph/money.html')

In [None]:
len(enterprise_info[enterprise_info.信誉评级 == 'D']) # A 27 B 38 C 34 D 24

In [76]:
frame2

Unnamed: 0,天数,资金,企业代号,信誉评级
0,3,-2.772872e+09,E1,A
1,1,-2.770322e+09,E1,A
2,1,-2.739831e+09,E1,A
3,3,-2.739830e+09,E1,A
4,12,-2.739700e+09,E1,A
...,...,...,...,...
49675,6,1.641354e+09,E4,C
49676,1,1.642816e+09,E4,C
49677,5,1.642823e+09,E4,C
49678,12,1.642825e+09,E4,C


In [218]:
frame3 = pd.DataFrame(columns=['天数','资金','企业代号','信誉评级','是否违约'])
for enterprise in enterprise_dic.values():
    sum=0
    temp = frame2[frame2.企业代号 == enterprise.number].reset_index(drop=True)
    for i in range(temp.shape[0]):
        sum = sum + temp['天数'][i]
        frame3 = frame3.append([{'天数':sum,'资金':temp['资金'][i],'企业代号':temp['企业代号'][i],'信誉评级':temp['信誉评级'][i],'是否违约':('是' if enterprise.break_contract == True else '否')}],ignore_index=True)

In [223]:
frame3

Unnamed: 0,天数,资金,企业代号,信誉评级,是否违约
0,3,-2.772872e+09,E1,A,否
1,4,-2.770322e+09,E1,A,否
2,5,-2.739831e+09,E1,A,否
3,8,-2.739830e+09,E1,A,否
4,20,-2.739700e+09,E1,A,否
...,...,...,...,...,...
49675,772,3.511289e+05,E99,D,是
49676,774,3.523300e+05,E99,D,是
49677,775,3.527300e+05,E99,D,是
49678,777,3.529300e+05,E99,D,是


In [222]:
frame3 = frame3.sort_values(by=['企业代号','天数'],axis=0,ascending=[True,True]).reset_index(drop=True)

In [224]:
graph3 = px.line(frame3, x="资金", y="天数",color='企业代号',category_orders={"信誉评级": ["A","B", "C", "D"]},  render_mode="auto")
graph3.write_html('./1_graph/money2.html')

In [227]:
frame4 = pd.DataFrame(columns=['频率','资金','企业代号','信誉评级','是否违约'])
for enterprise in enterprise_dic.values():
    temp = frame3[frame3.企业代号 == enterprise.number].reset_index(drop=True)
    max = temp['天数'][len(temp['天数'])-1]
    for i in range(temp.shape[0]):
        frame4 = frame4.append([{'频率':temp['天数'][i]/max,'资金':temp['资金'][i],'企业代号':temp['企业代号'][i],'信誉评级':temp['信誉评级'][i],'是否违约':('是' if enterprise.break_contract == True else '否')}],ignore_index=True)

In [228]:
graph4 = px.line(frame4, x="资金", y="频率",color='企业代号',category_orders={"信誉评级": ["A","B", "C", "D"]},  render_mode="auto")
graph4.write_html('./1_graph/money3.html')

In [236]:
def poly(frame):
    var_error={}
    poly1d_dic={}
    assert len(frame[frame.企业代号 != frame['企业代号'][0]]) == 0
    x = np.array(frame['资金'])
    y = np.array(frame['频率'])

    for i in range(1,4):
        fi=np.polyfit(x, y, i)
        poly1d_dic[i]=np.poly1d(fi)
        var_error[i]=np.var(np.abs(np.polyval(fi,x)-y))

    '''
    f1 = np.polyfit(x, y, 1)
    p1 = np.poly1d(f1)
    poly1d_dic[1]=p1
    var_error[1]=np.var(np.abs(np.polyval(f1,x)-y))

    f2 = np.polyfit(x, y, 2)
    p2 = np.poly1d(f2)
    poly1d_dic[2]=p2
    var_error[2]=np.var(np.abs(np.polyval(f2,x)-y))

    f3 = np.polyfit(x, y, 3)
    p3 = np.poly1d(f3)
    poly1d_dic[3]=p3
    var_error[3]=np.var(np.abs(np.polyval(f3,x)-y))

    f4 = np.polyfit(x, y, 4)
    p4 = np.poly1d(f4)
    poly1d_dic[4]=p4
    var_error[4]=np.var(np.abs(np.polyval(f4,x)-y))
    '''
    return (poly1d_dic[min(var_error,key=lambda x:var_error[x])])

    p=poly1d_dic[min(var_error,key=lambda x:var_error[x])]
    yvals = p(x)
    plot1 = plt.plot(x, y, 's',label='original values')
    plot2 = plt.plot(x, yvals, 'r',label='polyfit values')
    plt.xlabel('x')
    plt.ylabel('y')
    plt.legend(loc=4) #指定legend的位置右下角
    plt.title('polyfitting')
    plt.show()
    print(var_error.values())
    return (min(var_error,key=lambda x:var_error[x]))

In [237]:

for enterprise in enterprise_dic.values():
    #enterprise=enterprise_dic['E67']
    #print(enterprise.number)
    temp = frame4[frame4.企业代号 == enterprise.number].reset_index(drop=True)
    #poly(temp)
    print(enterprise.number,poly(temp))

    break

E1            3            2
5.948e-29 x + 1.95e-19 x + 4.475e-10 x + 0.9603


In [148]:
for enterprise in enterprise_dic.values():
    temp = frame3[frame3.企业代号 == enterprise.number].reset_index(drop=True)
    max = temp['天数'][0]
    print(max)
    break

941


In [151]:
frame4['频率'][1]

0.9968119022316685