In [2]:
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
from matplotlib import cm
#from matplotlib.ticker import LinearLocator, FormatStrFormatter,
import matplotlib.ticker as ticker

from sklearn.linear_model import LinearRegression

import seaborn as sns
import autograd.numpy as np
from autograd import grad, elementwise_grad
import pandas as pd
from random import random, seed
from sklearn.preprocessing import PolynomialFeatures
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Lasso
from sklearn.utils import resample
from sklearn.model_selection import KFold, cross_val_score
plt.rcParams['font.size'] = 14


In [16]:
def FrankeFunction(x,y):
    term1 = 0.75*np.exp(-(0.25*(9*x-2)**2) - 0.25*((9*y-2)**2))
    term2 = 0.75*np.exp(-((9*x+1)**2)/49.0 - 0.1*(9*y+1))
    term3 = 0.5*np.exp(-(9*x-7)**2/4.0 - 0.25*((9*y-3)**2))
    term4 = -0.2*np.exp(-(9*x-4)**2 - (9*y-7)**2)

    return term1 + term2 + term3 + term4 

# Creating the design matrix, from lecture notes
def create_X(x, y):
    if len(x.shape) > 1:
        x = np.ravel(x)
        y = np.ravel(y)

    N = len(x)
    l = 2 # Number of elements in beta
    X = np.ones((N,l))
    X[:,0] = x
    X[:,1] = y
    return X


# Defining the Mean square error, from lecture notes
def CostFunction(y,ytilde):
    n = len(y)
    return 1/n * np.sum(np.abs(y-ytilde)**2)

def CostFunctionClassification(ao,target):
    n= len(ao)
    return -1/n*np.sum(target*np.log(ao) + (1-target)*np.log(1-ao))
    

def MSE(y,ytilde):
    n = len(y)
    return 1/n * np.sum(np.abs(y-ytilde)**2)

# Defining the R2 function, from lecture notes
def R2(y_data, y_model):
    return 1 - np.sum((y_data - y_model) ** 2) / np.sum((y_data - np.mean(y_data)) ** 2)

def DerivariveCostFunc(y,ytilde):
    n = len(y)
    return 2/n*(y-ytilde)

def Sigmoid(y):
    return np.exp(y)/(1+np.exp(y))

def RELU(y):
    return np.maximum(0,y)

def lexyRelu(y):
    return np.maximum(0.01*y,y)

def Identity(y):
    return y

### note n_hidden is assumed to be a list in this case
def Set_weights_and_bias(n_in, n_hidden,n_out):
    W = []
    b = []
    W_grad =[]
    b_grad = []
    #### Setting hiden weights
    W.append(np.random.randn(n_in, n_hidden[0]))
    b.append( np.zeros(n_hidden[0]) +0.01)
    W_grad.append(np.random.randn(n_in, n_hidden[0]))
    b_grad.append( np.zeros(n_hidden[0]) +0.01)
    for i in range(1,len(n_hidden)):
            W.append(np.random.randn(n_hidden[i-1], n_hidden[i]))
            b.append( np.zeros(n_hidden[i]) +0.01)
            W_grad.append(np.random.randn(n_hidden[i-1], n_hidden[i]))
            b_grad.append( np.zeros(n_hidden[i]) +0.01)

#### setting output weights
    W.append(np.random.randn(n_hidden[-1], n_out))
    b.append(np.zeros(n_out) +0.01)
    W_grad.append(np.random.randn(n_hidden[-1], n_out))
    b_grad.append( np.zeros(n_out) +0.01)
    return W, b, W_grad, b_grad    
###### The gradients are only sett so that they wil have the right dimensions

#### From lecture notes
def feed_forward_train(X,W, b,activation_function, output_function):
    Z_h = []
    A_h = [] 
#### Hidden attac
    Z_h.append( X@W[0] + b[0])
    A_h.append(activation_function(Z_h[0]))
    for i in range(1,len(W)):
        Z_h.append( A_h[-1]@W[i] + b[i])
        A_h.append(activation_function(Z_h[-1]))
    return Z_h, A_h

def back_prop(X,Target,W, W_grad, b, b_grad,activation_function, output_function,hyper_par):
    Z_h,A_h = feed_forward_train(X,W, b,activation_function, output_function)
    error_out = DerivariveCostFunc(A_h[-1],Target)*elementwise_grad(output_function,0)(Z_h[-1]) #* derivative of output_function
    W_grad[-1] = (A_h[-2]).T @ error_out + 2*hyper_par*W[-1]
    b[-1] = np.sum(error_out, axis=0)
    for i in range(len(W)-1,1,-1):
      #  print(i)
        error_hiden = (error_out @ W[i].T)* elementwise_grad(activation_function,0)(Z_h[i-1])
        W_grad[i-1] = (A_h[i-2]).T @ error_hiden + 2*hyper_par*W[i-1]
        b_grad[i-1] = np.sum(error_hiden, axis=0)
        error_out = np.copy(error_hiden)
    ### hidden error
    error_hiden = (error_out @ W[1].T)* elementwise_grad(activation_function,0)(Z_h[0]) #*  a_h * (1 - a_h) # the last two terms are the derivative of the sigmoid
    W_grad[0] = X.T @error_hiden+ 2*hyper_par*W[0]
    b_grad[0] = np.sum(error_hiden,axis=0)
    return W_grad, b_grad

npoints =20
x = np.sort(np.random.uniform(0, 1, npoints)) 
y = np.sort(np.random.uniform(0, 1, npoints)) 
x, y = np.meshgrid(x,y)
X = create_X(x, y)

Y = FrankeFunction(x, y) 

X_train, X_test, y_train, y_test = train_test_split(X, Y.reshape(-1,1), test_size=0.2)


epochs = 100 #numEpochs[1]
Minibach = 8

W,b,W_grad,b_grad=Set_weights_and_bias(2,[8],1)
eta =.1

for i in range(epochs):
    miniBach = np.random.randint(Minibach)
    MiniBachSize = int(X_train.shape[0]/Minibach)
    miniBachMin, miniBachMax = MiniBachSize * miniBach,(MiniBachSize) * (miniBach+1)

    W_grad,b_grad=back_prop(X_train[miniBachMin:miniBachMax],y_train[miniBachMin:miniBachMax],W, W_grad, b, b_grad,Sigmoid, Identity,0)
  #  print(W_grad)
    for i in range(len(W)):
        W[i] -= eta*W_grad[i]
        b[i] -= eta*b_grad[i]
Z_h, A_h = feed_forward_train(X_test,W, b,Sigmoid, Identity)
print(CostFunction(A_h[-1],y_test))



0.09734544429033434
