In [None]:
##imports
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader

import time

import numpy as np

import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from sklearn.model_selection import train_test_split

from torchmetrics import Accuracy
import seaborn as sns

In [None]:
def func3d(x1 , x2):
    y = 5*x1**2 + 2*x2**2
    return y

In [None]:
def gradient_descent(func, xi,yi, eta, N):
    xis = []
    yis = []
    zis = []
    mt0x = 0
    mt0y = 0
    
    for iter in range(N):
        func(xi,yi).backward()

        xi.data -= eta * xi.grad
        yi.data -= eta * yi.grad
        
        xi.grad.zero_()
        yi.grad.zero_()
        
        xi_copy = xi.clone()
        yi_copy = yi.clone()
        
        
        xis.append(xi_copy.data)
        yis.append(yi_copy.data)
        zis.append(func(xi_copy.data , yi_copy.data))

        
    return xis , yis , zis

In [None]:
def adagrad(func, xi ,yi , eta, N , eps=0.00001):
    xis = []
    yis = []
    zis = []
    vt0x = 0
    vt0y = 0
    
    for iter in range(N):
        func(xi,yi).backward()
        
        vt1x = vt0x + xi.grad**2
        vt1y = vt0y + yi.grad**2

        xi.data -= eta * xi.grad / torch.sqrt(vt1x) + eps
        yi.data -= eta * yi.grad / torch.sqrt(vt1y) + eps
        
        
        xi.grad.zero_()
        yi.grad.zero_()

        xi_copy = xi.clone()
        yi_copy = yi.clone()
        
        xis.append(xi_copy.data)
        yis.append(yi_copy.data)
        zis.append(func(xi_copy.data , yi_copy.data))

        vt0x = vt1x
        vt0y = vt1y

    return xis , yis , zis

In [None]:
def RMSProb(func, xi ,yi , eta, N , beta2=0.999 , eps=0.00001):
    xis = []
    yis = []
    zis = []
    vt0x = 0
    vt0y = 0
    
    for iter in range(N):
        func(xi,yi).backward()
        
        vt1x = (beta2 * vt0x) + (1-beta2) * xi.grad**2
        vt1y = (beta2 * vt0y) + (1-beta2) * yi.grad**2


        xi.data -= eta * xi.grad / torch.sqrt(vt1x) + eps
        yi.data -= eta * yi.grad / torch.sqrt(vt1y) + eps
        
        
        xi.grad.zero_()
        yi.grad.zero_()

        xi_copy = xi.clone()
        yi_copy = yi.clone()
        
        xis.append(xi_copy.data)
        yis.append(yi_copy.data)
        zis.append(func(xi_copy.data , yi_copy.data))

        vt0x = vt1x
        vt0y = vt1y

    return xis , yis , zis

In [None]:
def adam(func, xi ,yi , eta, N ,beta=0.9 , beta2=0.999 , eps=0.00001):
    xis = []
    yis = []
    zis = []
    vt0x = 0
    vt0y = 0
    mt0x = 0
    mt0y = 0
    t = 0
    for iter in range(N):
        func(xi,yi).backward()
        
        mt1x = beta*mt0x + (1-beta)*xi.grad
        mt1y = beta*mt0y + (1-beta)*yi.grad
        
        vt1x = (beta2 * vt0x) + (1-beta2) * xi.grad**2
        vt1y = (beta2 * vt0y) + (1-beta2) * yi.grad**2

        t += 1
        
        mx_prim = mt1x / (1 - beta**t)
        my_prim = mt1y / (1 - beta**t)
        
        vx_prim = vt1x / (1 - beta2**t)
        vy_prim = vt1y / (1 - beta2**t)
        
        
        
        xi.data -= eta * mx_prim / torch.sqrt(vx_prim) + eps
        yi.data -= eta * my_prim / torch.sqrt(vy_prim) + eps
        
        
        xi.grad.zero_()
        yi.grad.zero_()

        xi_copy = xi.clone()
        yi_copy = yi.clone()
        
        xis.append(xi_copy.data)
        yis.append(yi_copy.data)
        zis.append(func(xi_copy.data , yi_copy.data))

        vt0x = vt1x
        vt0y = vt1y
        mt0x = mt1x
        mt0y = mt1y

    return xis , yis , zis

In [None]:
x1 = torch.arange(-5, 5, 0.25)
x2 = torch.arange(-5, 5, 0.25)
X1, X2 = torch.meshgrid(x1, x2)


fig = plt.figure(figsize=(16,14))


x = torch.tensor(-4., requires_grad=True)
y = torch.tensor(4., requires_grad=True)
ax = fig.add_subplot(221 , projection='3d');
surf = ax.plot_surface(X1 , X2 , func3d(X1 , X2) , alpha=0.7);
eta = 0.01
N = 100
xs , ys , zs = gradient_descent(func3d , x , y , eta, N)
ax.plot(xs , ys , zs , c='r' , marker="o" , ms=4);
ax.set_title(f'SGD with {N} Epoch & LR={eta}' , fontfamily='Serif');



x = torch.tensor(-4., requires_grad=True)
y = torch.tensor(4., requires_grad=True)
ax2 = fig.add_subplot(222 , projection='3d');
surf = ax2.plot_surface(X1 , X2 , func3d(X1 , X2) , alpha=0.7);
eta = 0.15
N = 200
xs , ys , zs = adagrad(func3d , x , y , eta, N , eps=0.00001)
ax2.plot(xs , ys , zs , c='r' , marker="o" , ms=4);
ax2.set_title(f'AdaGrad with {N} Epoch & LR={eta}' , fontfamily='Serif');



x = torch.tensor(-4., requires_grad=True)
y = torch.tensor(4., requires_grad=True)
ax3 = fig.add_subplot(223 , projection='3d');
surf = ax3.plot_surface(X1 , X2 , func3d(X1 , X2) , alpha=0.7);
eta = 0.01
N = 100
xs , ys , zs = RMSProb(func3d , x , y , eta, N , eps=0.00001)
ax3.plot(xs , ys , zs , c='r' , marker="o" , ms=4);
ax3.set_title(f'RMSProb with {N} Epoch & LR={eta}' , fontfamily='Serif');



x = torch.tensor(-4., requires_grad=True)
y = torch.tensor(4., requires_grad=True)
ax4 = fig.add_subplot(224 , projection='3d');
surf = ax4.plot_surface(X1 , X2 , func3d(X1 , X2) , alpha=0.7);
eta = 0.05
N = 100
xs , ys , zs = adam(func3d , x , y , eta, N , eps=0.00001)
ax4.plot(xs , ys , zs , c='r' , marker="o" , ms=4);
ax4.set_title(f'Adam with {N} Epoch & LR={eta}' , fontfamily='Serif');