## 实现灰度预测

In [1]:
import pandas as pd
import numpy as np
from math import exp
from docx import Document
from lxml import etree
import latex2mathml.converter


def latex_to_word(latex_input):
    mathml = latex2mathml.converter.convert(latex_input)
    tree = etree.fromstring(mathml)
    xslt = etree.parse(
        'MML2OMML.XSL'
        )
    transform = etree.XSLT(xslt)
    new_dom = transform(tree)
    return new_dom.getroot()

In [2]:
document = Document()

In [3]:
# 生成灰色预测的结果
class GRAYPRE:
    def __init__(self, datadict, predictn):
        # 数据字典
        self.dd = datadict
        # 预测期数
        self.pn = predictn
 
        # 报告
        self.document = Document()
        
        self.start()
        
    # 将pandas格式转为docx的表格
    def transtable(self, df):
        cols = len(df.keys())
        table = self.document.add_table(rows=1, cols=cols, style='Medium Grid 1 Accent 1')
        hdr_cells = table.rows[0].cells
        # 添加表头
        for kin, kva  in enumerate(df.keys()):
            hdr_cells[kin].text = kva
        # 添加数据
        for da in df.values:
            row_cells = table.add_row().cells
            for din, dva in enumerate(list(da)):
                row_cells[din].text = str(dva)
                
    def start(self):
        self.document.add_heading('灰色预测模型结果', level=0)
        # 序列
        self.document.add_heading('0 符号以及变量说明：', level=1)
        
        p = self.document.add_paragraph('原始数据序列：', style='List Bullet')
        word_math = latex_to_word(r'x^{(0)}(k), k=1, 2, ……,n；其中n为数据的个数')
        p._element.append(word_math)
        
        p = self.document.add_paragraph('一次累加生成序列：', style='List Bullet')
        word_math = latex_to_word(r'{x}^{(1)}(k)= \sum_{i=1}^k{x^{(0)}(i)}, k=1, 2, …… ,n')
        p._element.append(word_math)
        
        p = self.document.add_paragraph('预测的一次累加生成序列：', style='List Bullet')
        word_math = latex_to_word(r'\hat{x}^{(1)}(k), k=1, 2, …… ,n')
        p._element.append(word_math)
        
        p = self.document.add_paragraph('级比值区间：', style='List Bullet')
        word_math = latex_to_word(r'\left[e^{\frac{-2}{n+1}}, e^{\frac{2}{n+1}}\right]')
        p._element.append(word_math)
        
        
        p = self.document.add_paragraph('残差=原始数据-预测数据', style='List Bullet')
        p = self.document.add_paragraph('后验差比C值=残差方差/原始数据方差', style='List Bullet')
        p = self.document.add_paragraph('相对误差=残差/原始数据', style='List Bullet')
        p = self.document.add_paragraph('平均相对误差=相对误差绝对值的均值', style='List Bullet')
 
 
    # 生成灰色预测结果报告
    def generatereport(self,data, name, sign):
        # 平移参数的字典
        self.pingyidict = {}
        
        # 添加title
        self.document.add_heading('%s %s灰色预测结果' % (sign, name), level=1)
        # 显示原始数据
        self.document.add_heading('%s.1 原始数据' % sign, level=2)
        # 构建原始数据df
        self.transtable(data)
        
        # 最原始数据
        weiyiyuanhsi = data.values[:, -1].copy()
        print(weiyiyuanhsi)
        
        
        # 进行级比检验
        self.document.add_heading('%s.2 级比检验结果' % sign, level=2)
        
        lastdata = data.values[:, -1]
        biaoqian = data.values[:, 0]
        # 长度
        n = len(lastdata)
        # 进行比值
        comlist = [round(h, 4) for h in lastdata[:-1] / lastdata[1:]]
        
        # 构建级比数据的df
        dfcomlist = np.array([[''] + comlist]).T
        jibida  = np.hstack((data.values, dfcomlist))
        jibidf = pd.DataFrame(jibida, columns=['年份', name, '级比值'])
        self.transtable(jibidf)
        
        
        # 合理的
        realist = []
        # 区间
        tap = [exp(-2/(n+1)), exp(2/(n+1))]
        for k in comlist:
            if tap[0] > k or k > tap[1]:
                realist.append(k)
        if realist:
            # 需要计算平移转换常数
            self.document.add_paragraph('级比值不是全部在区间[%s,%s]内，不满足级比检验。需要对原始数据序列进行平移，下面计算平移转换常数c。' 
                                        % (round(tap[0], 5), round(tap[1], 5)))
            # 从均值的1% 的倍数依次叠加
            avgdd = np.mean(lastdata)
         
            start = 0
            while 1:
                realist = []
                # 原始数据
                ckk = round(avgdd * start)
                dd_origin = lastdata + ckk
                ddcomlist = [round(h, 4) for h in dd_origin[:-1] / dd_origin[1:]]
                for k in ddcomlist:
                    if tap[0] > k or k > tap[1]:
                        realist.append(k)
                if not realist:
                    break
                start += 0.01
           
            
            self.pingyidict[name] = ckk
            self.document.add_paragraph('计算得到的平移转换常数c=%s，下面给出平移后的数据表' % str(ckk))
            
            # 构建平移后的数据
            sedfcomlist = np.array([[''] + ddcomlist]).T
            pingyi = np.array([dd_origin]).T
            jibida  = np.hstack((data.values, pingyi))
            jibida2  = np.hstack((jibida, sedfcomlist))
            jibidf = pd.DataFrame(jibida2, columns=['年份', name, '平移后', '级比值'])
            self.transtable(jibidf)
            self.document.add_paragraph('级比值全部在区间[%s,%s]内，满足级比检验,可以建立GM(1,1)模型。' %  (round(tap[0], 5), round(tap[1], 5)))
 
        else:
            self.document.add_paragraph('级比值全部在区间[%s,%s]内，满足级比检验,可以建立GM(1,1)模型。' % (round(tap[0], 5), round(tap[1], 5)))
            
        
        # 构建模型
        self.document.add_heading('%s.3 构建GM(1,1)模型' % sign, level=2)
        
        if name in self.pingyidict:
            # 需要将原始数据进行处理
            lastdata +=  self.pingyidict[name]
        
        # 一次累加
        onesum = np.cumsum(lastdata)
        # 计算均值
        avgnum = np.array([(onesum[:-1] + onesum[1:]) / 2])
        
        
        # 构建X
        X = np.vstack((-avgnum, np.ones((1, len(onesum)-1)))).T
        # 构建Y
        Y = np.array(lastdata[-(len(X)):]).reshape(-1, 1)
        # 获得ab
        W = np.array(np.matmul(np.matmul(np.matrix(np.matmul(X.T, X)).I, X.T),Y))
        # 获得参数
        a = W[0][0]
        b = W[1][0]
        
        # 计算真实数据的预测值
        y_real = np.hstack((np.array([lastdata[0]]), np.matmul(X, W).T[0]))
        # 计算残差
        caicha = np.var(lastdata - y_real) / np.var(lastdata)

        # 构建模型df
        modeldf = pd.DataFrame(data=[[round(a, 4),round(b, 4),round(caicha, 4)]], columns=['发展系数a', '灰色作用量b', '后验差比C值'],
                              index=None)
        self.transtable(modeldf)
        
        p = self.document.add_paragraph('预测时间响应序列为：\n')
        word_math = latex_to_word(r' \hat{x}^{(1)}(k+1) = (x^{(0)}(1) - \frac{b}{a}) \times e^{-ak}+\frac{b}{a}\\=(%s-%s) \times e^{-%sk}+ %s ,k=0,1,……n-1'% (lastdata[0], round(b/a,4), round(a,4), round(b/a,4)))
        p._element.append(word_math)
        
        
 
        
        # 模型预测
        self.document.add_heading('%s.4 模型预测' % sign, level=2) 

        # 标签、真实值、预测值、
        st = 0
        start_num = []
        bn = int(biaoqian[0])
        for i in range(len(lastdata)+self.pn):

            pnum = (lastdata[0] - b/ a) * exp(-1*a*i) + b/a
      
            if i > len(lastdata) -1:
                rn = '—'
            else:
                rn = lastdata[i]
            pfu = pnum - st
            if name in self.pingyidict:
                if rn != '—':
                    rn = weiyiyuanhsi[i]
                    print(rn,4444444444444444)
                pfu -= self.pingyidict[name]
                
            start_num.append([bn, rn, round(pfu, 4)])
            st = pnum
            bn = int(start_num[-1][0] + 1)
            
        predictdf = pd.DataFrame(start_num, columns=['年份', '原始值', '预测值'], index=None)
        self.transtable(predictdf)
        # 模型预测
        self.document.add_heading('%s.5 模型预测结果分析' % sign, level=2) 
        
        # 获得分析数据的df
        fenxidf = predictdf[:len(lastdata)]
        fenxidf['残差'] = (fenxidf['原始值'] - fenxidf['预测值']).values
        
        fenxidf['相对误差'] = ['%.3f%%' % (float(fgh)*100) if fgh != 0 else '0%' for fgh in list((fenxidf['残差'] / fenxidf['原始值']).values)]
        fenxidf['残差'] = ['%.4f' % gy if gy != 0 else '0' for gy in list(fenxidf['残差'].values)]
        self.transtable(fenxidf)
        
        # 计算平均相对误差
        dd = fenxidf['相对误差'].values[1:] 
        ssd = sum([abs(float(hj.replace('%', ''))/100) for hj in dd]) / len(dd)
        
        self.document.add_paragraph('模型的平均相对误差为%.4f%%，模型预测结果精准。' % (ssd * 100))
        
            
    # 生成最终的结果报告
    def makelastreport(self):
        for inde, valu in enumerate(self.dd.keys()):
            self.generatereport(self.dd[valu], valu, inde+1)
        
        
  
    

### Step0 读取数据

In [4]:
datafile = '灰度预测数据.xlsx'

In [5]:
# 构建数据字典
f = pd.read_excel(datafile, None)

FileNotFoundError: [Errno 2] No such file or directory: '灰度预测数据.xlsx'

### Step1 数据级比检验

In [6]:
hh = GRAYPRE(f, 5)
hh.makelastreport()
hh.document.save('result.docx')
    


NameError: name 'f' is not defined

### Step2 一次累加构造XY