In [None]:
import numpy as np
import matplotlib.pyplot as plt

### Plotting Synthetic Data - Gaussian distribution

**Objective:** Generate synthetic data that follows two different gaussian distributions with different mean and variance.

In [None]:
# generate samples from normal distributions with two dimensional
# size of samples
n = 30
X1 = np.random.normal(loc=10, scale=5, size=(n, 2)) # loc and scale indicate mean and variance
X2 = np.random.normal(loc=20, scale=5, size=(n, 2)) # X1 and X2 have different mean
Y1 = np.ones(n)
Y2 = np.ones(n) * -1                      # labeling as 1 for X1 and -1 for X2

In [None]:
# concatenate X1 and X2
X_train = np.concatenate((X1, X2))
Y_train = np.concatenate((Y1, Y2))
X_train.shape  # sanity check for shape of the data array -> (number of samples, dimension)

### Plotting X_train data

**Objective:** Plot the X_train with different colors correspoding to the labels (let 1 label to be blue and -1 to be red)

Use ```plt.scatter()``` method to plot the sample points

In [None]:
# plot the samples without specifying color
plt.scatter(X_train[:, 0].T, X_train[:, 1].T)
plt.show()

In [None]:
# We can specify the color , edgecolor, legend, and point size.
# 'b' and 'k' means blue and black, s is fontsize (sample point size)
plt.scatter(X_train[:, 0].T, X_train[:, 1].T, color='b', edgecolor='k', label='sample points', s=35) 
plt.grid(True)  # grid on
plt.legend()    # display the label ('sample points')
plt.show()

In [None]:
# Plot the samples with color corresponding to the 1, -1 label by using mask indexing
plt.scatter(X_train[Y_train==1][:, 0].T, X_train[Y_train==1][:, 1].T, color='b', edgecolor='k', label='label : 1', s=35)
plt.scatter(X_train[Y_train==-1][:, 0].T, X_train[Y_train==-1][:, 1].T, color='r', edgecolor='k', label='label : -1', s=35)
plt.grid(True)
plt.legend()
plt.show()

### Plotting Arbitrary Linear Boundary

**objective:** understand ```np.meshgrid()``` method and draw boundary by using it

Assume that the boundary is of the form:
$ax_1+bx_2 + c = 0$ where $a, b$ and $c$ are arbitrary in this case. (are not learned yet) 

We must plot this boundary based on whether $ax_1+bx_2 + c = 0$ or not for all data sample point $(x_1, x_2)$. Therefore, we need to prepare all the grid point of 2D dimensional space and compute $ax_1+bx_2 + c$ for all grids. This is what we will do using ```np.meshgrid()``` method.

In [None]:
# specify the min max value of x-axis and y-axis for meshgrids
x_min, x_max = -30, 30
y_min, y_max = -30, 30
xx, yy = np.meshgrid(np.linspace(x_min, x_max, 30), np.linspace(y_min, y_max, 30)) # 30 grids for each axis
grids = np.c_[xx.ravel(), yy.ravel()]
grids.shape # total 30 x 30 = 900 grid points

In [None]:
# initialize a, b and c with arbitrary number
ab = np.random.normal(size=(2))
c = np.random.normal(size=(1))

In [None]:
# compute ax_1 + bx_2 + c for all grids
Z = grids@ab + c
plt.contour(xx, yy, Z.reshape(xx.shape), levels=[0], colors='k') # draw boundary based on whether Z is equal to 0
plt.show()

In [None]:
# Draw with colormap
plt.contour(xx, yy, Z.reshape(xx.shape), levels=[0], colors='k')
plt.contourf(xx, yy, Z.reshape(xx.shape), cmap='RdBu', alpha=0.7)
plt.show()

**Merge all the plots for data points and boundary**

In [None]:
# initialize a, b and c with arbitrary number
ab = np.array([-0.4, -0.3])
c = np.array([11])

# Plot the samples with color corresponding to the 1, -1 label by using mask indexing
plt.scatter(X_train[Y_train==1][:, 0].T, X_train[Y_train==1][:, 1].T, color='b', edgecolor='k', label='label : 1', s=35)
plt.scatter(X_train[Y_train==-1][:, 0].T, X_train[Y_train==-1][:, 1].T, color='r', edgecolor='k', label='label : -1', s=35)
plt.grid(True)
plt.legend()

axes = plt.gca() # get current axes object (similar to MATLAB plot)
x_min, x_max = axes.get_xlim() 
y_min, y_max = axes.get_ylim() # get min max value for each axis for current plot

xx, yy = np.meshgrid(np.linspace(x_min, x_max, 30), np.linspace(y_min, y_max, 30)) # 30 grids for each axis
grids = np.c_[xx.ravel(), yy.ravel()]
Z = grids@ab + c

plt.contour(xx, yy, Z.reshape(xx.shape), levels=[0], colors='k')
plt.contourf(xx, yy, Z.reshape(xx.shape), cmap='RdBu', alpha=0.7)  # draw colormap if you want

plt.show()