In [None]:
import numpy as np
import pandas as pd
import plotly_express as px
import matplotlib.pyplot as plt
from Invoice import Invoice
from Enterprise import Enterprise

In [None]:
# 读入数据
enterprise_info=pd.read_csv('./C/1_info.csv')
N_enterprise=enterprise_info.shape[0]

In [None]:
# 将数据写进对象
enterprise_dic={} # 企业代号 到 对象 的字典
for i in range(N_enterprise):
    number = enterprise_info['企业代号'][i]
    name = enterprise_info['企业名称'][i]
    credit_rating = enterprise_info['信誉评级'][i]
    break_contract_str = enterprise_info['是否违约'][i]
    break_contract = True if break_contract_str=='是' else False
    enterprise_object=Enterprise(number,name,credit_rating,break_contract)
    enterprise_dic[number]=enterprise_object

In [None]:
# 将数据中日期的格式转换为date可识别的格式
from datetime import date
def get_date(date_str):
    y,m,d=date_str.split('/')
    if len(m) != 2:
        m = '0' + m
    if len(d) != 2:
        d = '0' + d
    return date.fromisoformat(y+'-'+m+'-'+d)

In [None]:
# 将入账发票信息写入对象
invoice_in = pd.read_csv('./C/1_in.csv')
N_in = invoice_in.shape[0]
for i in range(N_in):
    enterprise_name = invoice_in['企业代号'][i]
    number = invoice_in['发票号码'][i]
    date_str = invoice_in['开票日期'][i]
    date = get_date(date_str)
    self_enterprise = enterprise_dic[enterprise_name]
    partner = invoice_in['销方单位代号'][i]
    amount = invoice_in['金额'][i]
    tax = invoice_in['税额'][i]
    sum_money = invoice_in['价税合计'][i]
    state_available_str = invoice_in['发票状态'][i]
    state_available = True if state_available_str == '有效发票' else False
    invoice_object = Invoice(number,date,self_enterprise,partner,amount,tax,sum_money,state_available,True)
    self_enterprise.add_invoice(invoice_object)

In [None]:
# 将销账发票信息写入对象
invoice_out = pd.read_csv('./C/1_out.csv')
N_out = invoice_out.shape[0]
for i in range(N_out):
    enterprise_name = invoice_out['企业代号'][i]
    number = invoice_out['发票号码'][i]
    date_str = invoice_out['开票日期'][i]
    date = get_date(date_str)
    self_enterprise = enterprise_dic[enterprise_name]
    partner = invoice_out['购方单位代号'][i]
    amount = invoice_out['金额'][i]
    tax = invoice_out['税额'][i]
    sum_money = invoice_out['价税合计'][i]
    state_available_str = invoice_out['发票状态'][i]
    state_available = True if state_available_str == '有效发票' else False
    invoice_object = Invoice(number,date,self_enterprise,partner,amount,tax,sum_money,state_available,False)
    self_enterprise.add_invoice(invoice_object)

In [None]:
# 将每个企业的发票按时间顺序排序
for enterprise in enterprise_dic.values():
    enterprise.invoice_list.sort(key=lambda x:x.date)

In [None]:
# 统计每个企业的客户关系
invoice_out = invoice_out.sort_values(by=['企业代号','购方单位代号'],axis=0,ascending=[True,True]).reset_index(drop=True)
for enterprise in enterprise_dic.values():
    frame8 = pd.DataFrame(columns=['企业代号','购方单位代号','金额','税额','价税合计','交易数'])
    temp = invoice_out[invoice_out.企业代号 == enterprise.number].reset_index(drop=True)
    N = temp.shape[0]
    current_buyer = temp['购方单位代号'][0]
    amount_sum = 0
    tax_sum = 0
    sum_sum = 0
    nums_sum = 0
    for i in range(N):
        temp1 = i
        if temp['发票状态'][i] == '作废发票':
            continue
        if temp['购方单位代号'][i] != current_buyer:
            frame8 = frame8.append([{'企业代号':enterprise.number,'购方单位代号':temp['购方单位代号'][i],'金额':amount_sum,'税额':tax_sum,'价税合计':sum_sum,'交易数':nums_sum}],ignore_index=True)
            current_buyer = temp['购方单位代号'][i]
            amount_sum = 0
            tax_sum = 0
            sum_sum = 0
            nums_sum = 0
        amount_sum = amount_sum + temp['金额'][i]
        tax_sum = tax_sum + temp['税额'][i]
        sum_sum = sum_sum + temp['价税合计'][i]
        nums_sum = nums_sum + 1
    frame8 = frame8.append([{'企业代号':enterprise.number,'购方单位代号':temp['购方单位代号'][i],'金额':amount_sum,'税额':tax_sum,'价税合计':sum_sum,'交易数':nums_sum}],ignore_index=True)

    frame8 = frame8.sort_values(by=['金额','交易数'],axis=0,ascending=[False,False]).reset_index(drop=True)
    total_income = sum(frame8['金额'])
    total_trades = sum(frame8['交易数'])
    graph = px.histogram(frame8, x="购方单位代号", y="金额",title=enterprise.number+' 收入总额:'+str(total_income)+'  总交易数:'+str(total_trades))
    graph.write_html('./1_供求关系/'+enterprise.number+'.html')
    graph2 = px.pie(frame8,values='金额',names='购方单位代号',title=enterprise.number+' 收入总额:'+str(total_income)+'  总交易数:'+str(total_trades))
    graph2.update_traces(textposition='inside', textinfo='percent+label')
    graph2.write_html('./1_供求关系/'+enterprise.number+'_pie.html')

In [None]:
# 统计每个企业资金随时间的变化并画图
frame = pd.DataFrame(columns=['日期','资金','企业代号','信誉评级'])
for enterprise in enterprise_dic.values():
    sum = 0
    current_date = enterprise.invoice_list[0].date
    for invoice in enterprise.invoice_list:
        temp = invoice
        if invoice.date!=current_date:
            frame = frame.append([{'日期':current_date,'资金':sum,'企业代号':enterprise.number,'信誉评级':enterprise.credit_rating}],ignore_index=True)
            current_date = invoice.date
        if invoice.buy_in == True:
            sum = sum - invoice.sum_money
        else:
            sum = sum + invoice.amount
    frame = frame.append([{'日期':temp.date,'资金':sum,'企业代号':enterprise.number,'信誉评级':enterprise.credit_rating}],ignore_index=True)
graph = px.line(frame, x="日期", y="资金",color='企业代号',category_orders={"信誉评级": ["A","B", "C", "D"]},  render_mode="auto")
graph.write_html('./1_graph/total.html')

In [None]:
# 将日期变为差值，便于画图
frame1 = pd.DataFrame(columns=['日期','资金','企业代号','信誉评级'])
for enterprise in enterprise_dic.values():
    temp = frame[frame.企业代号 == enterprise.number].reset_index(drop=True)
    first_date = temp['日期'][0]
    for i in range(temp.shape[0]):
        frame1 = frame1.append([{'日期':(temp['日期'][i]-first_date).days,'资金':temp['资金'][i],'企业代号':temp['企业代号'][i],'信誉评级':temp['信誉评级'][i],'是否违约':('是' if enterprise.break_contract == True else '否')}],ignore_index=True)

In [None]:
# 根据数据预测未来一年企业资金走势
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

def predict(frame_predict):
    N=len(frame_predict['日期'])
    days=frame_predict['日期'][len(frame_predict['日期'])-1]
    x_data = np.array(frame_predict['日期']).reshape(-1, 1)
    y_data = np.array(frame_predict['资金']).reshape(-1, 1)

    # 数据分割
    from sklearn.model_selection import train_test_split
    x_train, x_test, y_train, y_test = train_test_split(x_data, y_data, test_size=0.1)

    model = LinearRegression()
    model.fit(x_train, y_train.astype("int"))
    y_pred = model.predict(x_test)
    
    return model.predict(np.array([days+365]).reshape(-1, 1))[0]-frame_predict['资金'][N-1]

In [None]:
# 计算期望收益需要的一些函数
data_rate_and_loss = pd.read_csv("./C/rate_and_loss.csv")
def calc_alpha(r, credit_rating):
    for i in range(data_rate_and_loss.shape[0]):
        if data_rate_and_loss["贷款年利率"][i] == r:
            return data_rate_and_loss[credit_rating][i]

def calc_beta(A, F):
    return F(A)

def calc_gamma(credit_rating, break_contract):
    c = {"A": 1, "B": 0.9, "C": 0.7}
    b = {True: 0.5, False: 1}
    return c[credit_rating] * b[break_contract]

def calc_A_(A, F, f,enterprise: "Enterprise"):
    lower = frame4[frame4.企业代号 == enterprise.number].reset_index(drop=True)['资金'][0]
    v = F.integ()(A)-F.integ()(lower)
    return (A * F(A)- lower * F(lower) - v) / (F(A)-F(lower))

def calc_expect_profit(A, r, enterprise: "Enterprise",F):
    alpha = calc_alpha(r, enterprise.credit_rating) # 客户流失率
    beta = calc_beta(A,F) # 资金抵债率
    gamma = calc_gamma(enterprise.credit_rating, enterprise.break_contract) # 企业信誉系数
    f=F.deriv()
    A_ = calc_A_(A,F,f,enterprise) # 无法还债时的期望剩余资金
    return (beta * (A_ / gamma - A) + (1 - beta) * (A * r)) * (1 - alpha)
from scipy.stats import norm
from Enterprise import Enterprise

def calc(enterprise, A, r):
    temp = frame1[frame1.企业代号 == enterprise.number].reset_index(drop=True)
    remain = predict(temp)
    beta = 1.0 * A / remain[0] * 0.5

    gamma = calc_gamma(enterprise.credit_rating, enterprise.break_contract)
    EL =  remain[0] / gamma - A
    alpha = calc_alpha(r, enterprise.credit_rating)

    return 1.0 * (A * r * (1 - beta) + EL * beta) * (1 - alpha)

In [None]:
# 计算预计额度
As = {}
for enterprise in enterprise_dic.values():
    temp = frame1[frame1.企业代号 == enterprise.number].reset_index(drop=True)
    N = temp.shape[0]
    days = temp['日期'][N-1]
    min_days = 365
    min_index = 0
    for i in range(N):
        if abs(days-temp['日期'][i]-365)<min_days:
            min_days=abs(days-temp['日期'][i]-365)
            min_index = i
    delta = temp['资金'][N-1] - temp['资金'][min_index]
    As[enterprise.number] = delta/3
for en_num in As.keys():
    if As[en_num]>1000000:
        As[en_num]=1000000
    if As[en_num]<0:
        As[en_num]=0

In [None]:
# 遍历年利率，计算每个企业期望收益最大值
i=0
rates = list(data_rate_and_loss["贷款年利率"])
for enterprise in enterprise_dic.values():
    if enterprise.credit_rating == "D" or As[enterprise.number]==0:
        continue
    ans = 0
    ans_A = 0
    ans_r = 0
    A = As[enterprise.number]
    for r in rates:
        ans_ = calc(enterprise, A, r)
        if (ans_>ans):
            ans = ans_
            ans_A = A
            ans_r = r
    print(enterprise.number, A, ans, ans_r)

In [None]:
# 计算企业月资金增长率的期望和方差
k_mean_var_dic = {}
for enterprise in enterprise_dic.values():
    temp = frame1[frame1.企业代号 == enterprise.number].reset_index(drop=True)
    N = temp.shape[0]
    days = temp['日期'][N-1]
    ks=[]
    i=0
    while temp['日期'][i] + 30 < days:
        today = temp['日期'][i]
        min_index = i
        min_days = 1000
        for j in range(i+1,N):
            delta = temp['日期'][j] - temp['日期'][i] - 30
            if abs(delta) < min_days:
                min_days = abs(delta)
                min_index = j
            if delta > 0:
                break
        ks.append((temp['资金'][min_index] - temp['资金'][i])/(temp['日期'][min_index] - temp['日期'][i]))
        i=min_index
    k_mean_var_dic[enterprise.number] = [np.mean(ks),np.var(ks)]
frame7 = pd.DataFrame.from_dict(k_mean_var_dic,orient='index',columns=['增长率均值','增长率方差'])
frame7 = frame7.reset_index().rename(columns={'index':'企业代号'})
frame7.to_csv("./C/1_资金增长率均值方差.csv",index=False,sep=',',encoding='utf_8_sig')