In [1]:
%matplotlib widget

In [2]:
import numpy as np
import matplotlib.pyplot as plt 
import math
import pandas as pd  
import seaborn as sns 
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm
import random
from progressbar import ProgressBar
from sklearn.metrics import r2_score

from mpl_toolkits.mplot3d import Axes3D
import celluloid
from celluloid import Camera

In [30]:
def gen_data(num_of_training_data_points, bias, variance):
    x = np.zeros(shape=(num_of_training_data_points, 2))
    y = np.zeros(shape=num_of_training_data_points)

    # above will generate a data for a straight line
    for i in range(0, num_of_training_data_points):
        # bias feature
        x[i][0] = 1
        x[i][1] = i

        #our target variable
        y[i] = (i + bias) + random.uniform(0, 1) * variance

    return x, y

In [60]:
x, y = gen_data(10, 0, 0)
fig, ax = plt.subplots(figsize=(10,7))
ax.scatter(x[:,1], y)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<matplotlib.collections.PathCollection at 0x7fba4c78d7d0>

In [3]:
from sklearn.datasets import load_boston
boston_dataset = load_boston()

In [4]:
boston = pd.DataFrame(boston_dataset.data, columns=boston_dataset.feature_names)

In [5]:
boston_dataset.feature_names

array(['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD',
       'TAX', 'PTRATIO', 'B', 'LSTAT'], dtype='<U7')

In [6]:
boston['MEDV'] = boston_dataset.target

In [7]:
boston.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,MEDV
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33,36.2


In [8]:
correlation_matrix = boston.corr().round(2)
# annot = True to print the values inside the square
fig, ax = plt.subplots(figsize=(11,9))
sns.heatmap(data=correlation_matrix, annot=True, linewidths=0)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<AxesSubplot:>

In [9]:
fig, ax = plt.subplots(figsize=(9,7))
plt.scatter(boston.LSTAT, boston.MEDV)
plt.title('LSTAT VS PRICE')
plt.xlabel('LSAT')
plt.ylabel('PRICE')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Text(0, 0.5, 'PRICE')

In [10]:
fig, ax = plt.subplots(figsize=(9,7))
ax.scatter(boston.RM, boston.MEDV)
plt.title('RM VS PRICE')
plt.xlabel('RM')
plt.ylabel('PRICE')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Text(0, 0.5, 'PRICE')

In [11]:
def cost_function_1param(X, y, theta1):
    h_x = theta1*X
    n = len(X)
    mean_sq_err = sum((h_x - y)**2)
    loss = mean_sq_err/(2*n)
    return loss

In [12]:
x = boston.RM
y = boston.MEDV

In [13]:
theta1 = np.arange(-100, 100, 1)
costs = []
fig, ax = plt.subplots(figsize=(9,7))
for i in theta1:
    costs.append(cost_function_1param(x, y, i))
ax.plot(theta1, costs)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

[<matplotlib.lines.Line2D at 0x7ff8adbab990>]

In [14]:
def create_data(features):
    length = boston.shape[0]
    x = np.ones((length, 1))
    for i in features:
        x1 = np.array(boston[i]).reshape(length, 1)
        x1 /= np.ptp(x1)
        x = np.concatenate((x, x1), axis=1)
    y = np.array(boston.MEDV).reshape(length, 1)
    return x, y

In [22]:
x, y = create_data(['LSTAT'])

In [25]:
y = np.array(boston.MEDV)
x.shape,y.shape

((506, 2), (506,))

In [26]:
def cost(X, y, theta):
    inner = np.power((np.dot(x, theta) - y), 2)
    return np.sum(inner) / (2 * len(X))

In [45]:
theta = np.zeros((x.shape[1], 1))
print(theta)
print(cost(x,y,theta))

[[0.]
 [0.]]
149813.17


# Visualising Cost Function

In [31]:
scale = 1*(10**11)
start = -1*scale
stop = 1*scale
step = 1*(scale/100)
my_thetas_lst = []
for theta_0 in np.arange(start, stop, step):
    for theta_1 in np.arange(start, stop, step):
        my_thetas_lst.append(np.array([theta_0, theta_1]))

In [32]:
costs = []
pbar = ProgressBar()
for my_thetas in pbar(my_thetas_lst):
    if x.shape[1]-2>2:
        shape = x.shape[1]-2
    else:
        shape = 0
    my_thetas = np.append(my_thetas, np.zeros(shape)).reshape(x.shape[1])
    cost_linear = cost(x, y, my_thetas)
    costs.append(cost_linear)

theta_0_lst = [x[0] for x in my_thetas_lst]
theta_1_lst = [x[-1] for x in my_thetas_lst]

np.save('theta0', theta_0_lst)
np.save('theta1', theta_1_lst)
np.save('costs', costs)

100% |########################################################################|


# Gradient Descent

In [33]:
def gradient_descent(x, y, theta, alpha, m, iterations_num):
    pbar = ProgressBar()
    loss_history = []
    t_hist = []
    for i in pbar(range(iterations_num)):
        hypothesis = np.dot(x, theta)
        loss_ = hypothesis - y
        cost = np.sum(loss_ ** 2) / (2*m)
        loss_history.append(cost)
        t_hist.append(theta)
        vectorized_gradient = np.dot(x.T, loss_) / m
        theta = theta - alpha * vectorized_gradient
    return theta, loss_history, t_hist

In [51]:
theta = theta = np.zeros((x.shape[1]))
lr = 0.1
iterations = 1000
t, l, t_hist = gradient_descent(x, y, theta, lr, len(x), iterations)
t_hist = np.array(t_hist)
np.min(l)

100% |########################################################################|


19.275703034689244

In [52]:
fig, ax = plt.subplots(figsize=(10,7))
ax.plot(range(0,iterations), l)
fig.tight_layout()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [53]:
def accuracy(x, y, t):
    pred = np.dot(x, t)
    acc = ((pred - y)/y)*100
    val = np.mean(acc)
    return val

In [54]:
accuracy(x, y, t)

5.855971183221481

In [55]:
pred = np.dot(x, t)
r2_score(y, pred)

0.5433411679764149

In [56]:
fig, ax = plt.subplots(figsize=(10,7))
# ax = plt.axes(xlim=(0, len(x)+50), ylim=(np.min(y)-50, np.max(y)+50))
pred = np.dot(x, t)
ax.plot(x[:,1], pred, c='black')
ax.scatter(x[:,1], y)
fig.tight_layout()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [57]:
def split_iter(iterations):
    pbar = ProgressBar()
    length = len(str(iterations))
    div = np.arange(iterations)
    cnt_a = 2
    if length >= 5:
        cnt_a = length-2
    r = 3
    if length >=5:
        r = length - 3
        
    divisions = np.array_split(div, iterations/length**(r+1))
    cnt = 0
    q = []
    if length >= 5:
        cnt_a = 5
    for i in pbar(divisions):
        cnt += cnt_a
        f = pow(2, cnt)
        q = np.append(q, np.arange(np.min(i), np.max(i), f))
    return q
len(split_iter(10000))

100% |########################################################################|


83

In [58]:
epochs = np.arange(iterations)
theta0 = np.array(t_hist[:,0])
theta1 = np.array(t_hist[:,1])
costs = np.array(l)
xp = np.array(x[:,1])

def predict(t):
    pred = np.dot(x, t)
    return pred

div = split_iter(iterations)

div = np.array(div).astype('int')
print(len(div))

100% |########################################################################|

111





In [59]:
fig, (ax, ax3) = plt.subplots(2, 1, figsize=(13, 30))

camera = Camera(fig)

pbar = ProgressBar()

for i in pbar(div):

    ax.scatter(xp, y, color='grey')
    ax.plot(xp, predict(t_hist[i]), c='black')
    ax.legend([f'Epoch: {i}'])
    ax.set_xlabel('X')
    ax.set_ylabel('Y')

    ax3.plot(epochs, costs, color='grey')
    ax3.scatter(i, costs[i], color='black')
    ax3.legend([f'Epoch: {i}'])
    ax3.set_xlabel('Epochs')
    ax3.set_ylabel('Costs')

    camera.snap()


animation = camera.animate(interval = 100, repeat = False, repeat_delay = 500)

animation.save('visualize_gradient.gif')
plt.close()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

100% |########################################################################|


In [60]:
# Third Animation
fig = plt.figure(figsize=(10,10))  

ax2 = fig.add_subplot(111, projection='3d') # projection='3d'
ax2.set_title("cost function", fontsize=30)
ax2.view_init(elev=20., azim=145)           # set view
camera = Camera(fig)

pbar = ProgressBar()

for i in pbar(div):       

    ax2.plot_trisurf(theta0, theta1, costs, color='b', alpha=0.35) # create surface plot
    ax2.scatter(theta0[i],theta1[i],costs[i],marker='o', s=12**2, color='orange' )
    ax2.set_xlabel("w", fontsize=25, labelpad=10)
    ax2.set_ylabel("b", fontsize=25, labelpad=10)
    ax2.set_zlabel("costs", fontsize=25, labelpad=-35) # negative value for labelpad places z-label left of z-axis.
    ax2.tick_params(axis='both', which='major', labelsize=15) 
    ax2.plot(theta0[0:i],theta1[0:i],costs[0:i], linestyle="dashed",linewidth=2, color="grey") # (dashed) lineplot
    
    plt.tight_layout()
    camera.snap()
    
animation = camera.animate(interval = 5, repeat = False, repeat_delay = 500)
animation.save('SimpleLinReg_3.gif', writer = 'imagemagick')
plt.close()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

100% |########################################################################|


In [None]:
pow(4, 2)