In [5]:
from sympy import *
import numpy as np

In [6]:
def gradient_descent(f, sym, x0, alpha = 0.1, eps = 1e-4):
    df = np.array([100])
    i = 1
    it = 1
    while eps < np.linalg.norm(df):
        df = np.array([f.diff(sym[0]).subs(zip(sym, x0)).evalf(), f.diff(sym[1]).subs(zip(sym, x0)).evalf()]).astype(np.float64)
        y = x0 - alpha*df
        if f.subs(zip(sym,y)).evalf()<f.subs(zip(sym, x0)).evalf():
            x0 = y
        else:
            alpha = alpha/2
        i = i + 4
        it = it + 1
    return f.subs(zip(sym,x0)).evalf(), x0, i, it

def gradient_descent(f, sym, x0, alpha = 0.1, eps = 1e-4):
    df = np.array([100,133])
    i = 1
    it = 1
    x0 = ImmutableDenseNDimArray(x0)
    while eps < df[0]**2 + df[1]**2:
        df = tensor.derive_by_array(f,sym).subs(zip(sym,x0))
        y = x0 - df.applyfunc(lambda x: x*alpha)
        if f.subs(zip(sym,y)).evalf() < f.subs(zip(sym, x0)).evalf():
            x0 = y
        else:
            alpha = alpha/2
        i = i + 4
        it = it + 1
    return f.subs(zip(sym,x0)).evalf(), x0, i, it

In [7]:
def newton_search(f, sym, x0, eps=1e-6):
    it = 0 
    d = diff(f, sym).subs({sym : x0}).evalf()
    while eps < abs(d):
        x0 = x0 - d/diff(f, sym, 2).subs({sym : x0}).evalf()
        it = it + 2
        if it>200:
            print("Method not coverage from current start point")
            raise
        d = diff(f, sym).subs({sym : x0}).evalf()
    return f.subs({sym : x0}).evalf(), x0, it

In [8]:
def iterative_search(f, sym, a, b, eps=1e-6, delta = 1000):
    eps = eps
    x0 = a
    x1 = a+delta
    i = 0
    while eps<abs(delta):
        x1 = a+delta
        while f.subs({sym : x1}).evalf() < f.subs({sym : x0}).evalf() and x1>a and x1<b:
            x0 = x1
            x1 = x1+delta
            i = i + 1
        delta = -delta/4
    return f.subs({sym : x0}).evalf(), x0, i

In [9]:
def full_descent(f, sym, x0, eps=1e-4, minimizer = newton_search):
    theta = symbols('theta')
    i = 0
    it = 0
    df = np.array([f.diff(sym[0]).subs(zip(sym, x0)).evalf(), f.diff(sym[1]).subs(zip(sym, x0)).evalf()]).astype(np.float64)
    while eps < np.linalg.norm(df):
        df = np.array([f.diff(sym[0]).subs(zip(sym, x0)).evalf(), f.diff(sym[1]).subs(zip(sym, x0)).evalf()]).astype(np.float64)
        f_ = f.subs(zip(sym, x0-theta*df))
        theta_min = minimizer(f_, theta, 0.4+np.random.rand())
        x0 = x0 - theta_min[1]*df
        i = i + 2 + theta_min[2]
        it = it + 1
    return f.subs(zip(sym,x0)).evalf(), x0, i, it

In [62]:
def ortogonal_descent(f, sym, x0, eps=1e-4, restart = 0, minimizer = newton_search, beta = 'std'):
    i = 0
    it = 0
    df = np.array([f.diff(sym[0]).subs(zip(sym, x0)).evalf(), f.diff(sym[1]).subs(zip(sym, x0)).evalf()]).astype(np.float64)
    while eps < np.linalg.norm(df):
        f_ = f.subs(zip(sym, x0-theta*df))
        theta_min = minimizer(f_, theta, 1)
        x_ = np.array(x0)
        x0 = x0 - theta_min[1]*df
        i = i + 2 + theta_min[2]
        df_1 = df
        s = np.array([f.diff(sym[0]).subs(zip(sym, x0)).evalf(), f.diff(sym[1]).subs(zip(sym, x0)).evalf()]).astype(np.float64)
        if beta == 'std':
            b = (np.linalg.norm(s)/np.linalg.norm(df_1))**2
        elif beta == 'v1':
            b = float(np.dot(x_,s)/np.dot(x_, df_1))
        elif beta == 'v2':
            b = float(np.dot(x_,s)/(np.linalg.norm(df_1)**2))
        else:
            raise Exception("Unknown beta type")
        df = np.array(-1*s + df_1*b)
        it = it+1
        if restart and it%restart == 0:
            df = np.array([f.diff(sym[0]).subs(zip(sym, x0)).evalf(), f.diff(sym[1]).subs(zip(sym, x0)).evalf()]).astype(np.float64)
    return f.subs(zip(sym, x0)).evalf(), x0, i, it

In [69]:
ortogonal_descent(f, [x1,x2], [-2, 1], eps=eps, restart=4, beta = 'v2', minimizer = lambda x,y,z: iterative_search(x,y,0, oo))

(4.07988397757211e-7, array([0.9993617 , 0.99936052]), 3570, 1220)

In [67]:
ortogonal_descent(f, [x1,x2], [-2, 1], eps=eps, restart=3, beta = 'v1')

(7.65170066708452e-8,
 array([0.999723469333913, 0.999723123559974], dtype=object),
 94,
 6)

In [68]:
ortogonal_descent(f, [x1,x2], [-2, 1], eps=eps, restart=3)

(5.16335965377706e-11,
 array([1.00000669781549, -1.00000656769272], dtype=object),
 218,
 15)

# Задание 2

In [11]:
x1 = symbols('x1')
x2 = symbols('x2')
theta = symbols('theta')

def f(a):
    return x1**2+a*x2**2

In [12]:
a = 1
eps = 1e-3
gradient_descent(f(a), [x1,x2], [1,-5], eps=eps), full_descent(f(a), [x1,x2], [1,-5], eps=eps),\
ortogonal_descent(f(a), [x1,x2], [1,-5], eps=eps)

((1.20423772806809e-7, array([ 6.80564734e-05, -3.40282367e-04]), 173, 44),
 (0, array([0, 0], dtype=object), 6, 2),
 (0, array([0, 0], dtype=object), 4, 1))

In [13]:
a = 1
eps = 1e-5
gradient_descent(f(a), [x1,x2], [1,-5], eps=eps), full_descent(f(a), [x1,x2], [1,-5], eps=eps),\
ortogonal_descent(f(a), [x1,x2], [1,-5], eps=eps)

((1.02445216110626e-11, array([ 6.27710174e-07, -3.13855087e-06]), 257, 65),
 (0, array([0, 0], dtype=object), 6, 2),
 (0, array([0, 0], dtype=object), 4, 1))

In [14]:
a = 250
eps = 1e-3
gradient_descent(f(a), [x1,x2], [1,-5], eps=eps), full_descent(f(a), [x1,x2], [1,-5], eps=eps),\
ortogonal_descent(f(a), [x1,x2], [1,-5], eps=eps)

((2.44849917188919e-7,
  array([ 4.94823117e-004, -2.22698617e-303]),
  4877,
  1220),
 (4.92597918719931e-10,
  array([2.21284629224981e-5, -1.08241348985654e-7], dtype=object),
  14,
  4),
 (1.59946761057117e-12,
  array([-1.26429609553595e-6, -2.02286270785217e-9], dtype=object),
  12,
  3))

In [15]:
a = 250
eps = 1e-5
gradient_descent(f(a), [x1,x2], [1,-5], eps=eps), full_descent(f(a), [x1,x2], [1,-5], eps=eps),\
ortogonal_descent(f(a), [x1,x2], [1,-5], eps=eps)

((2.46458570399349e-11,
  array([ 4.96445939e-006, -4.94065646e-324]),
  7813,
  1954),
 (3.69579721928949e-11,
  array([6.07814249839727e-6, 7.52488194508648e-9], dtype=object),
  230,
  61),
 (1.59946761057117e-12,
  array([-1.26429609553595e-6, -2.02286270785217e-9], dtype=object),
  12,
  3))

In [16]:
a = 1000
eps = 1e-3
gradient_descent(f(a), [x1,x2], [1,-5], 2, eps=eps), full_descent(f(a), [x1,x2], [1,-5], eps=eps),\
ortogonal_descent(f(a), [x1,x2], [1,-5], eps=eps)

((2.48888469898506e-7, array([4.98887232e-04, 4.09995923e-81]), 15601, 3901),
 (3.20626497248296e-9,
  array([-5.66206039729551e-5, -1.92919211260761e-8], dtype=object),
  16,
  4),
 (6.36169541267140e-15,
  array([-7.97538569561858e-8, -3.19016188249019e-11], dtype=object),
  14,
  3))

In [17]:
a = 1000
eps = 1e-5
gradient_descent(f(a), [x1,x2], [1,-5], eps=eps), full_descent(f(a), [x1,x2], [1,-5], eps=eps),\
ortogonal_descent(f(a), [x1,x2], [1,-5], eps=eps)

((2.49049400608615e-11,
  array([4.99048495e-006, 4.94065646e-324]),
  31257,
  7815),
 (1.42635747024967e-12,
  array([1.19425800332390e-6, 3.24486897225892e-10], dtype=object),
  24,
  7),
 (6.36169541267140e-15,
  array([-7.97538569561858e-8, -3.19016188249019e-11], dtype=object),
  14,
  3))

# Задание 3

In [18]:
f = 64*x1**2 + 126*x1*x2 + 64*x2**2 + -10*x1+30*x2 + 13

In [19]:
eps = 1e-3
gradient_descent(f, [x1,x2], [1,-5], eps=eps), full_descent(f, [x1,x2], [1,-5], eps=eps),\
ortogonal_descent(f, [x1,x2], [1,-5], eps=eps)

((-187.393700546578, array([  9.96028292, -10.03902307]), 3169, 793),
 (-187.393700781433,
  array([9.96067457812718, -10.0394196445261], dtype=object),
  46,
  12),
 (-187.393700787401,
  array([9.96062946783659, -10.0393696250020], dtype=object),
  24,
  6))

# Задание 5

In [20]:
f = 100*(x1**2-x2**2)**2 + (x1-1)**2
eps = 1e-3
ortogonal_descent(f, [x1,x2], [-1,1], eps=eps)

(8.55017195753593e-20, array([1.00000000001460, 1], dtype=object), 16, 1)

In [21]:
eps = 1e-5
ortogonal_descent(f, [x1,x2], [-1,1], eps=eps)

(8.55017195753593e-20, array([1.00000000001460, 1], dtype=object), 16, 1)

# Задание 6

In [22]:
eps = 1e-3
ortogonal_descent(f, [x1,x2], [-2, 1], eps=eps,restart=0)

(6.28696244350773e-12,
 array([0.999997523040986, 0.999997503570673], dtype=object),
 560,
 26)

In [23]:
eps = 1e-3
ortogonal_descent(f, [x1,x2], [-2, 1], eps=eps, restart=1)#, minimizer = lambda x,y,z: iterative_search(x,y,0, oo))

(3.70999041248751e-7,
 array([0.999391219896092, -0.999390237174221], dtype=object),
 23850,
 2655)

In [24]:
ortogonal_descent(f, [x1,x2], [-2, 1], eps=eps,restart=2)

(2.11970680873075e-7,
 array([0.999539653798858, -0.999539292883541], dtype=object),
 1330,
 87)

In [25]:
eps = 1e-3
ortogonal_descent(f, [x1,x2], [-2, 1], eps=eps,restart=3)

(5.16335965377706e-11,
 array([1.00000669781549, -1.00000656769272], dtype=object),
 218,
 15)

In [26]:
eps = 1e-3
ortogonal_descent(f, [x1,x2], [-2, 1], eps=eps,restart=4)

(9.07357803690489e-11,
 array([1.00000545611457, -1.00000584651790], dtype=object),
 716,
 31)

In [27]:
eps = 1e-3
ortogonal_descent(f, [x1,x2], [-2, 1], eps=eps,restart=5)

(2.62639633503213e-16,
 array([0.999999984008835, 0.999999983877284], dtype=object),
 298,
 15)