In [18]:
import numpy as np
import cupy as cp
import matplotlib
matplotlib.use('nbagg')
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

In [49]:
N = 500
# X = (cp.random.random((N, 2)) * 4) - 2
X = cp.random.randn(N, 2) * 2
Y = X[:, 0] * X[:, 1]  # y = x1 * x2
Y

array([-2.84973874e+00,  2.01125142e+00,  1.98898630e+01, -9.52988242e-02,
        4.14973685e+00, -2.67996825e+00,  7.01512995e-01,  5.82793756e+00,
        3.86095786e+00, -9.54667439e-02, -3.42699033e+00,  6.72534303e-01,
        7.04197132e-01,  3.44265325e-01, -1.58898199e+00,  1.64257757e+00,
        9.81162705e-01,  2.73031732e+00,  4.52766236e+00,  1.34063902e-01,
       -7.73375211e+00, -2.45125171e+00,  5.81531188e-01,  1.20003242e+00,
        4.09273056e+00,  3.68295464e-01, -1.25840349e+01, -2.43769334e-01,
       -4.23461793e+00, -3.94252236e-01,  4.40613149e+00,  1.08804084e+01,
        3.46554750e+00,  5.38140702e-01, -2.92859082e+00, -1.44920106e+00,
        2.87394121e+00, -6.25368548e+00, -2.76092796e+00,  9.28922754e-03,
       -2.41728480e+00,  1.38511597e-01, -4.68412844e+00,  4.51654807e-01,
        6.09749240e+00,  7.27712165e-01, -3.04948424e+00,  4.81819559e-01,
        1.15719094e-01,  5.62524947e-01,  3.71881524e-01, -9.23865582e-02,
        2.12843580e-01, -

In [50]:
def sigmoid(z) :
    return 1 / (1 + cp.exp(-z))

def forward(X, W1, b1, W2, b2) :
    z = cp.tanh(X.dot(W1) + b1) #z = (N X M)
    return z.dot(W2) + b2, z  # y = (N X K)

def derivate_W2(T, Y, Z):
    return Z.T.dot(T - Y)

def derivate_b2(T, Y) :
    return (T - Y).sum(axis = 0)

def derivate_W1(X, T, Y, W2, Z) :
    dz = cp.outer(T - Y, W2) * (1 - Z * Z)
    return X.T.dot(dz)

def derivate_b1(T, Y, W2, Z): #same as W1 except for X(or if X was 1)
    dz = cp.outer(T - Y, W2) * (1 - Z * Z)
    return dz.sum(axis = 0)

def classification_rate(T, Y) :
    return np.mean(T == Y)

def cross_entropy(T, Y) :
    return - ((T - Y)**2).mean()

def t2indicator(Yin, K) :
    N = Yin.shape[0]
    Yout = cp.zeros((N, K))
    
    for i in range(N) :
        Yout[i, Yin[i]] = 1
    return Yout

In [64]:
learning_rate = 1e-5
epochs = 2000

In [65]:
D = X.shape[1]
M = 100  #hidden units

W = cp.random.randn(D, M) / cp.sqrt(D * M)
b = cp.random.randn(M) / cp.sqrt(M)

V = cp.random.randn(M) / cp.sqrt(M)
c = 0

costs = []

for i in range(epochs) :
    Yhat, Z = forward(X, W, b, V, c)
    cost = cross_entropy(Y, Yhat)
    costs.append(float(cost.get()))
    
    if i % 50 == 0 :
        print(cost)
        
    W += learning_rate * derivate_W1(X, Y, Yhat, V, Z)
    b += learning_rate * derivate_b1(Y, Yhat, V, Z)
    V += learning_rate * derivate_W2(Y, Yhat, Z)
    c += learning_rate * derivate_b2(Y, Yhat)
    
plt.figure()
plt.plot(costs)
plt.show()

-16.780569775427416
-15.636709315410931
-15.48055675914281
-15.383725558160391
-15.275074180579153
-15.131143497823828
-14.931448674701521
-14.655910571976065
-14.286786266366894
-13.811275343281418
-13.223419086182338
-12.52504211856127
-11.726247148719711
-10.845760111619054
-9.910741905762777
-8.955226464264552
-8.016527959312857
-7.129935086275924
-6.323226992211605
-5.6129815048420415
-5.003872246254443
-4.4907611987875455
-4.062339024044307
-3.7048456883471097
-3.4048616777530816
-3.1508329727145266
-2.933516230000725
-2.7457697756050776
-2.5821026877886917
-2.4382409459442558
-2.31080384535104
-2.197084341852242
-2.0948983018069374
-2.0024745796707055
-1.9183705482291025
-1.8414055762265742
-1.7706083385450104
-1.7051751359493212
-1.6444369979247555
-1.5878337450811746


<IPython.core.display.Javascript object>

In [67]:
#plotting the data 
fig = plt.figure()
ax = fig.add_subplot(111, projection = '3d')
ax.scatter(X[:, 0].get(), X[:, 1].get(), Y.get(), s = 0.5)

#surface plot
line = cp.linspace(-2, 2, 20)
xx, yy = cp.meshgrid(line, line)
Xgrid = cp.vstack((xx.flatten(), yy.flatten())).T
Yhat, _ = forward(Xgrid, W, b, V, c)
ax.plot_trisurf(Xgrid[:, 0].get(), Xgrid[:, 1].get(), Yhat.get(), linewidth = 1, antialiased = True)
plt.show()

<IPython.core.display.Javascript object>

In [71]:
#plotting the magnitude of the residuals
Ygrid = Xgrid[:, 0] * Xgrid[:, 1]
R = cp.abs(Ygrid - Yhat)

plt.figure()
plt.scatter(Xgrid[:, 0].get(), Xgrid[:, 1].get(), c = R.get())
plt.colorbar()
plt.show()

<IPython.core.display.Javascript object>

In [75]:
#residuals
R

array([0.5461069 , 0.60592124, 0.61620723, 0.58182936, 0.50848124,
       0.40252148, 0.2707901 , 0.12041725, 0.04136535, 0.20740232,
       0.37077605, 0.52494756, 0.6638737 , 0.78209823, 0.87481521,
       0.93790408, 0.96793788, 0.96216833, 0.91849308, 0.83541188,
       0.64015596, 0.66608647, 0.64726486, 0.58888365, 0.49678517,
       0.37728586, 0.23699404, 0.08263057, 0.07914044, 0.24186573,
       0.39943812, 0.54620962, 0.67707941, 0.78755379, 0.87377623,
       0.93252746, 0.96119854, 0.95774279, 0.92061475, 0.84870568,
       0.67833478, 0.67680762, 0.63620651, 0.56183348, 0.45944329,
       0.3350723 , 0.19487155, 0.04495007, 0.10876779, 0.26066823,
       0.40555507, 0.53873488, 0.65607735, 0.75404737, 0.82970708,
       0.8806884 , 0.90513974, 0.90165427, 0.86918928, 0.80698729,
       0.66611095, 0.64359896, 0.58834161, 0.50553338, 0.40062567,
       0.27917248, 0.14668752, 0.00851558, 0.1302808 , 0.26501806,
       0.39147048, 0.50592827, 0.60523175, 0.68677845, 0.74850

In [74]:
#3d plot of residuals
fig = plt.figure()
ax = fig.add_subplot(111, projection = '3d')
ax.plot_trisurf(Xgrid[:, 0].get(), Xgrid[:, 1].get(), R.get(), linewidth = 0.2, antialiased = True)

<IPython.core.display.Javascript object>

<mpl_toolkits.mplot3d.art3d.Poly3DCollection at 0x1fa0fdd0fd0>

### Meshgrid

The purpose of __meshgrid__ is to create a rectangular grid out of an array of x values and an array of y values.

So, for example, if we want to create a grid where we have a point at each integer value between 0 and 4 in both the x and y directions. To create a rectangular grid, we need every combination of the x and y points.

This is going to be 25 points, right? So if we wanted to create an x and y array for all of these points, we could do the following.

```
x[0,0] = 0    y[0,0] = 0
x[0,1] = 1    y[0,1] = 0
x[0,2] = 2    y[0,2] = 0
x[0,3] = 3    y[0,3] = 0
x[0,4] = 4    y[0,4] = 0
x[1,0] = 0    y[1,0] = 1
x[1,1] = 1    y[1,1] = 1
...
x[4,3] = 3    y[4,3] = 4
x[4,4] = 4    y[4,4] = 4
```

This would result in the following ```x``` and ```y``` matrices, such that the pairing of the corresponding element in each matrix gives the x and y coordinates of a point in the grid.

```
x =   0 1 2 3 4        y =   0 0 0 0 0
      0 1 2 3 4              1 1 1 1 1
      0 1 2 3 4              2 2 2 2 2
      0 1 2 3 4              3 3 3 3 3
      0 1 2 3 4              4 4 4 4 4
```
We can then plot these to verify that they are a grid:
```
plt.plot(x,y, marker='.', color='k', linestyle='none')
```
![plot image](https://i.stack.imgur.com/kZNzz.png)

Obviously, this gets very tedious especially for large ranges of x and y. Instead, meshgrid can actually generate this for us: all we have to specify are the unique x and y values.

```
xvalues = np.array([0, 1, 2, 3, 4]);
yvalues = np.array([0, 1, 2, 3, 4]);
```

Now, when we call meshgrid, we get the previous output automatically.

```
xx, yy = np.meshgrid(xvalues, yvalues)

plt.plot(xx, yy, marker='.', color='k', linestyle='none')
```

![plot image](https://i.stack.imgur.com/kZNzz.png)


__OR__
![](https://i.stack.imgur.com/8Mbig.png)

In [7]:
x = np.array([1, 2, 3, 4])
y = np.array([7, 8])
xx, yy = np.meshgrid(x, y)  #no of dimensions = x, no of samples = y

#each value of x is mapped to each value of y
#to get this value pair simply call the array with same index
#eg. xx[0] and yy[0] will give us (1, 7) => pair of 1 with 7
print(xx,'\n\n' ,yy)

[[1 2 3 4]
 [1 2 3 4]] 

 [[7 7 7 7]
 [8 8 8 8]]
