# 使用不同方法求解最优化问题

In [None]:
import numpy as np
from numpy import dot
import pandas as pd
import matplotlib.pyplot as plt
import time
import math
'''
def f(x,ts):# x是一个4维向量，t是一个标量
    return (x[0,0]*(ts**2+x[1,0]*ts))/(ts**2+x[2,0]*ts+x[3,0])

def targetfunction(x,y,t):# x,y,t都是列向量
    result=0
    for i in range(11):
        result+=(y[i,0]-f(x,t[i,0]))**2
    return result'''

def targetfunction(x,y,t):
    x1=x[0,0]
    x2=x[1,0]
    x3=x[2,0]
    x4=x[3,0]
    result=0.0
    for i in range(0,11):
        result+=(y[i,0]-(x1*(t[i,0]**2+x2*t[i,0])/(t[i,0]**2+x3*t[i,0]+x4)))**2
    return result
'''
def asgrad(x,y,t,epsilon):# 估计梯度值
    grad = np.zeros((4,1))
    for i in range(4):
        x1 = x.copy()
        # x2 = x.copy()
        # x2[i] = x2[i]-epsilon
        # grad[i] = (targetfunction(x1,y,t)-targetfunction(x2,y,t))/(2*epsilon)
        x1[i] += epsilon
        grad[i] = (targetfunction(x1,y,t)-targetfunction(x,y,t))/epsilon
    return np.mat(grad)

def asHessian(x,y,t,epsilon):# 估计海森矩阵
    Hessian = np.zeros((4,4))
    for i in range(4):
        for j in range(4):
            x1 = x.copy()
            x2 = x.copy()
            x3 = x.copy()
            x4 = x.copy()
            x1[i] = x1[i]+epsilon
            x1[j] = x1[j]+epsilon
            x2[i] = x2[i]+epsilon
            x2[j] = x2[j]-epsilon
            x3[i] = x3[i]-epsilon
            x3[j] = x3[j]+epsilon
            x4[i] = x4[i]-epsilon
            x4[j] = x4[j]-epsilon
            Hessian[i,j] = (targetfunction(x1,y,t)-targetfunction(x2,y,t)-targetfunction(x3,y,t)+targetfunction(x4,y,t))/(4*epsilon**2)
    return np.mat(Hessian)
'''
def asgrad(x,y,t,epsilon):
    '''
    求梯度
    :param f: 函数
    :param x: 向量
    :return: 梯度向量
    '''
    delta=epsilon
    gradmatrix=np.zeros(x.shape)
    fx=targetfunction(x,y,t)
    it=np.nditer(x,flags=['multi_index'],op_flags=['readwrite'])
    while not it.finished:
        ix=it.multi_index
        old_value=(np.float64)(x[ix])
        x[ix]=(np.float64)(old_value+delta)
        fxd=targetfunction(x,y,t)
        gradmatrix[ix]=(fxd-fx)/delta
        x[ix]=old_value
        it.iternext()
    return gradmatrix

def asHessian(x,y,t,epsilon):
    '''
    求Hessian矩阵
    :param f: 函数
    :param x: 初始点
    :param epsilon: epsilon
    :return: f在x处的Hessian矩阵
    '''
    delta=epsilon
    n=np.size(x)
    HessianMatrix=np.zeros((n,n))
    gx0=asgrad(x,y,t,epsilon)
    for i in range(0,n):
        old_value = (np.float64)(x[i,0])
        x[i,0] = (np.float64)(old_value + delta)
        gxk=asgrad(x,y,t,epsilon)
        for j in range(0,n):
            HessianMatrix[i,j]=(np.float64)((gxk[j,0]-gx0[j,0])/delta)
        x[i,0]=old_value
    return HessianMatrix

def J(): # 计算雅可比矩阵
    J = np.zeros((11,4))
    for i in range(11):
        for j in range(4):
            J[i,j] = (t[i]**2+x[1]*t[i])/(t[i]**2+x[2]*t[i]+x[3])-y[i]
    return np.mat(J)

## 线搜索armijo方法
'''def armijo(x,y,t,epsilon,dk):
    alpha = 1
    beta = 0.5
    sigma = 0.1
    grad = asgrad(x,y,t,epsilon)
    while targetfunction(x+alpha*dk,y,t) > targetfunction(x,y,t)+sigma*grad.T*dk*alpha:
        alpha *= beta
    return alpha'''
def armijo(xk,y,t,epsilon,dk):
    '''
    用Armijo算法求步长
    f:函数
    xk:函数原始点
    dk：原始方向向量
    rho:rho
    '''
    rho=0.5
    cnt=0
    alpha=1                             #初始化alpha
    vtr=xk
    vtr_alpha=xk+alpha*dk
    #while(f(vtr_alpha)>f(vtr)+rho*alpha*grad(f,xk).T.dot(dk) or f(vtr_alpha)<f(vtr)+(1-rho)*alpha*grad(f,xk).T.dot(dk)): #所需满足的条件
    while (targetfunction(vtr_alpha,y,t) > targetfunction(vtr,y,t) + rho * alpha * asgrad(xk,y,t,epsilon).T.dot(dk)):  # 所需满足的条件
        alpha=rho*alpha
        vtr_alpha = xk + alpha * dk
    return alpha

## 不同方法的函数

# 最速下降法：
def graddecent(x,y,t,epsilon):
    start=time.time()
    cnt=0
    while True:
        if(cnt==100000):break
        grad = asgrad(x,y,t,epsilon)
        dk=-grad
        if np.linalg.norm(grad) < 1e-6:
            break
        alpha= armijo(x,y,t,epsilon,dk)
        x = x + alpha*dk
        cnt+=1
    end=time.time()-start
    return x,end,cnt

# 牛顿法：
def newtonmethod(x,y,t,epsilon):
    start=time.time()
    cnt=0
    while True:
        if(cnt==100000):break
        grad = asgrad(x,y,t,epsilon)
        if np.linalg.norm(grad) < 1e-6:
            break
        Hessian = asHessian(x,y,t,epsilon)
        if np.linalg.det(Hessian) < epsilon:
            dk=-grad
        else:
            dk = -np.linalg.inv(Hessian)*grad
        x = x + dk
        cnt+=1
    end=time.time()-start
    return x,end,cnt

# 阻尼牛顿法：
def dampednewton(x,y,t,epsilon):
    start=time.time()
    cnt=0
    while True:
        if(cnt==100000):break
        grad = asgrad(x,y,t,epsilon)
        if np.linalg.norm(grad) < 1e-6:
            break
        Hessian = asHessian(x,y,t,epsilon)
        if np.linalg.det(Hessian) < epsilon:
            dk=-grad
        else:
            dk = -np.linalg.inv(Hessian)*grad
        alpha=armijo(x,y,t,epsilon,dk)
        x = x + alpha*dk
        cnt+=1
    end=time.time()-start
    return x,end,cnt

# 拟牛顿法：
def BFGS(x,y,t,epsilon):
    start=time.time()
    cnt=0
    H = np.eye(4)
    while True:
        if(cnt==100000):break
        grad = asgrad(x,y,t,epsilon)
        if np.linalg.norm(grad) < 1e-6:
            break
        dk = -H*grad
        alpha=armijo(x,y,t,epsilon,dk)
        x1 = x + alpha*dk
        grad1 = asgrad(x1,y,t,epsilon)
        sk = x1 - x
        yk = grad1 - grad
        H = H + (1+(yk.T*H*yk)/(yk.T*sk))*(sk*sk.T)/(yk.T*sk)-((sk*yk*H+H*yk*sk.T)/(yk.T*sk))
        x = x1
        cnt+=1
    end=time.time()-start
    return x,end,cnt

# 共轭梯度法：
def conjugategradient(x,y,t,epsilon):
    start=time.time()
    cnt=0
    gk = asgrad(x,y,t,epsilon)
    dk = -gk
    betk=np.zeros((4,1))
    while True:
        if(cnt==100000):break
        if np.linalg.norm(asgrad(x,y,t,epsilon)) < epsilon:
            break
        alpha=armijo(x,y,t,epsilon,dk)
        x1 = x + alpha*dk
        gk1 = asgrad(x1,y,t,epsilon)
        betak=-(gk1.T*gk1)/(dk.T*gk)
        dk = -gk1 + dk*betak
        x = x1
        gk = gk1
        cnt+=1
    end=time.time()-start
    return x,end

# LMF方法：
def LMF(x,y,t,epsilon):
    start=time.time()
    max_iter = 1000
    v = 2
    for i in range(max_iter):
        grad = asgrad(x,y,t,epsilon)
        if np.linalg.norm(grad) < 1e-6:
            break
        Hessian = asHessian(x,y,t,epsilon)
        while True:
            H = Hessian + v*np.eye(4)
            dk = -np.linalg.inv(H)*grad
            if targetfunction(x+dk,y,t) < targetfunction(x,y,t):
                break
            v *= 2
        x = x + dk
        v /= 2
    end=time.time()-start
    return x,end

# 定义数据及求解：
X = np.mat([1.0,1.0,1.0,1.0]).T
Y = np.mat([0.1957,0.1947,0.1735,0.1600,0.0844,0.0627,0.0456,0.0342,0.0323,0.0235,0.0246]).T
T = np.mat([4.0000,2.0000,1.0000,0.5000,0.2500,0.1670,0.1250,0.1000,0.0833,0.0714,0.0625]).T
epsilon = 1e-4
# 输出迭代次数，时间和结果并展示
x1,t1,k1 = graddecent(X,Y,T,epsilon)
x2,t2,k2 = newtonmethod(X,Y,T,epsilon)
x3,t3,k3 = dampednewton(X,Y,T,epsilon)
x4,t4,k4 = BFGS(X,Y,T,epsilon)
x5,t5,k5 = conjugategradient(X,Y,T,epsilon)
# x6,t6,k6 = LMF(x,y,t,epsilon)
y1 = targetfunction(x1,Y,T)
print(x1,y1,t1,k1)
y2 = targetfunction(x2,Y,T)
print(x2,y2,t2,k2)
y3 = targetfunction(x3,Y,T)
print(x3,y3,t3,k3)
y4 = targetfunction(x4,Y,T)
print(x4,y4,t4,k4)
y5 = targetfunction(x5,Y,T)
print(x5,y5,t5,k5)
# y6 = targetfunction(x6,y,t)
# print(x6,y6,t6,k6)