## Optimization using gradient descent

This notebook demonstrates multivariate linear fits using the gradient descent method.


In [1]:
%matplotlib widget

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.colors as colors
import matplotlib.cm as cm

import sys
sys.path.append('/Users/ondrea/MLandstats/OStats/')
from ostats import ML
from ostats import dplot as dp
#from mpltools import color as colors

In [2]:
data_range = np.random.RandomState(1) #make up some fake data for testing
fakex = 10 * data_range.rand(50)
fakey = 3 * fakex - 5 + data_range.randn(50)


!rm -rf ./data/housing*
!wget -P ./data https://raw.githubusercontent.com/ageron/handson-ml2/master/datasets/housing/housing.csv
    
#get some data for testing
!wget -P ./data https://datahub.io/core/global-temp/r/annual.csv

--2021-06-16 11:35:46--  https://raw.githubusercontent.com/ageron/handson-ml2/master/datasets/housing/housing.csv
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.111.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.111.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1423529 (1.4M) [text/plain]
Saving to: ‘./data/housing.csv’


2021-06-16 11:35:46 (7.19 MB/s) - ‘./data/housing.csv’ saved [1423529/1423529]

--2021-06-16 11:35:46--  https://datahub.io/core/global-temp/r/annual.csv
Resolving datahub.io (datahub.io)... 172.67.157.38, 104.21.40.221
Connecting to datahub.io (datahub.io)|172.67.157.38|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://pkgstore.datahub.io/core/global-temp/annual_csv/data/a26b154688b061cdd04f1df36e4408be/annual_csv.csv [following]
--2021-06-16 11:35:48--  https://pkgstore.datahub.io/core/global-temp/an

In [3]:
year, tempa = np.loadtxt('./data/annual.csv', skiprows=1, delimiter=",", usecols=(1,2), unpack=True)
year = np.array([int(y) for y in year])/np.max(year); tempa=np.array(tempa)+1

In [4]:
year = 1 + (year - np.mean(year))/(np.max(year)-np.min(year))

In [5]:
#year

In [6]:
housing_data = pd.read_csv('./data/housing.csv')

In [7]:
housing_data

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,median_house_value,ocean_proximity
0,-122.23,37.88,41.0,880.0,129.0,322.0,126.0,8.3252,452600.0,NEAR BAY
1,-122.22,37.86,21.0,7099.0,1106.0,2401.0,1138.0,8.3014,358500.0,NEAR BAY
2,-122.24,37.85,52.0,1467.0,190.0,496.0,177.0,7.2574,352100.0,NEAR BAY
3,-122.25,37.85,52.0,1274.0,235.0,558.0,219.0,5.6431,341300.0,NEAR BAY
4,-122.25,37.85,52.0,1627.0,280.0,565.0,259.0,3.8462,342200.0,NEAR BAY
...,...,...,...,...,...,...,...,...,...,...
20635,-121.09,39.48,25.0,1665.0,374.0,845.0,330.0,1.5603,78100.0,INLAND
20636,-121.21,39.49,18.0,697.0,150.0,356.0,114.0,2.5568,77100.0,INLAND
20637,-121.22,39.43,17.0,2254.0,485.0,1007.0,433.0,1.7000,92300.0,INLAND
20638,-121.32,39.43,18.0,1860.0,409.0,741.0,349.0,1.8672,84700.0,INLAND


In [8]:
housing_data = housing_data[housing_data.median_house_value != 500001] #a bunch of values are set to this
housing_data = housing_data.sample(n=250) #smaller sample
housing_data.median_house_value = housing_data.median_house_value/500000
housing_data.median_income= housing_data.median_income/10

In [9]:
housing_data.median_income.max()

0.7569899999999999

In [10]:
housing_data

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,median_house_value,ocean_proximity
7235,-118.14,34.01,46.0,1746.0,447.0,1296.0,392.0,0.23929,0.3136,<1H OCEAN
14376,-117.25,32.72,33.0,1677.0,228.0,629.0,239.0,0.65970,0.9928,NEAR OCEAN
9575,-120.58,37.36,33.0,3564.0,716.0,2603.0,696.0,0.22179,0.1350,INLAND
12496,-121.45,38.57,52.0,2006.0,412.0,825.0,384.0,0.32963,0.4722,INLAND
18704,-122.38,40.56,23.0,2281.0,408.0,1164.0,420.0,0.35347,0.2024,INLAND
...,...,...,...,...,...,...,...,...,...,...
11390,-117.97,33.74,16.0,1735.0,380.0,784.0,360.0,0.42566,0.2784,<1H OCEAN
16544,-121.23,37.80,11.0,2451.0,665.0,1155.0,533.0,0.22254,0.2616,INLAND
7778,-118.08,33.91,30.0,3259.0,942.0,2744.0,895.0,0.28608,0.3312,<1H OCEAN
20221,-119.27,34.29,32.0,2274.0,406.0,982.0,393.0,0.53254,0.7704,NEAR OCEAN


In [11]:
x_house = housing_data['median_house_value'].to_numpy()
y_house = housing_data['median_income'].to_numpy()

In [12]:
print(x_house)

[0.3136 0.9928 0.135  0.4722 0.2024 0.3386 0.2666 0.4574 0.2362 0.4784
 0.8376 0.275  0.3814 0.525  0.3834 0.3136 0.0924 0.5492 0.4778 0.547
 0.1992 0.7666 0.9    0.1308 0.7706 0.1638 0.1426 0.2308 0.689  0.7592
 0.325  0.209  0.4284 0.625  0.492  0.595  0.164  0.2876 0.2482 0.2064
 0.416  0.3008 0.2376 0.4958 0.4366 0.1496 0.225  0.2456 0.173  0.4452
 0.24   0.675  0.3294 0.3242 0.5344 0.1872 0.4536 0.316  0.2704 0.4814
 0.3312 0.5078 0.3586 0.512  0.1536 0.225  0.4468 0.114  0.3816 0.6914
 0.4232 0.4334 0.3592 0.1754 0.3348 0.6666 0.4528 0.5566 0.1724 0.325
 0.3876 0.328  0.4862 0.6666 0.1654 0.3478 0.6388 0.1016 0.4446 0.537
 0.488  0.3202 0.5032 0.332  0.473  0.3698 0.628  0.575  0.2222 0.5198
 0.2838 0.3568 0.2882 0.1336 0.224  0.5954 0.3034 0.4038 0.26   0.5758
 0.4652 0.5598 0.2322 0.12   0.1094 0.9778 0.193  0.5056 0.5968 0.135
 0.2002 0.8834 0.5858 0.28   0.7    0.7214 0.5126 0.2584 0.2722 0.86
 0.3222 0.116  0.585  0.4284 0.2602 0.4318 0.3522 0.4094 0.4296 0.5684
 0.2038 0.16

In [13]:
def plotfit(x, beta):
    funct = beta[-1][0]+x*beta[-1][1]
    return(funct)

def plotfit2(x, beta):
    functs=[]
    points = np.linspace(0,len(beta),10)
    points = [int(x) for x in points]
    for i in range(len(points)-1):
        functs.append(beta[points[i]][0]+x*beta[points[i]][1])
    return(functs)

### Fake data

In [14]:
ifig=1;plt.close(ifig);plt.figure(ifig,figsize=(7,6), dpi=120)
theta_fake, J_fake = ML.GradDes_Regression(x=fakex, y=fakey, gamma=0.01)
plt.scatter(fakex,fakey, c='darkred',  s=15)
nlines = 10
for i in range(nlines):
    plt.plot(fakex,plotfit2(fakex,beta=theta_fake)[i-1], color=dp.ColorGradient(stop=(11, 0, 59), n=nlines)[i],lw=1)


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

  stop_condition = (np.abs(theta[1] - store_theta[-1][1])/store_theta[-1][1] < epsilon) and (np.abs(theta[0] -\


### Housing data

In [15]:
theta_house_normal=ML.Normal_Linear_Regression(x=x_house, y=y_house)
ynormal = np.array(theta_house_normal[1])*x_house + np.array(theta_house_normal[0])

theta_house, J_house = ML.GradDes_Regression(x=x_house, y=y_house, gamma=1e-2, epsilon = 1e-6, theta_init = np.array([0.23,0.37] ))
theta_house_alt, J_house_alt = ML.GradDes_Regression(x=x_house, y=y_house, gamma=1e-3,epsilon = 1e-5, theta_init = np.array([0.5,0.5]))
y = np.array(theta_house[-1][1])*x_house + np.array(theta_house[-1][0])
yalt = np.array(theta_house_alt[-1][1])*x_house + np.array(theta_house_alt[-1][0])

In [16]:
ifig=2;plt.close(ifig);plt.figure(ifig,figsize=(7,6), dpi=120)

plt.scatter(x_house, y=y_house, c='g',  s=15, label='data')
plt.plot(x_house,ynormal, c='b', label='Analytic', lw=3)
nlines = 10
#for i in range(nlines):
#    plt.plot(x_house,plotfit2(x_house,beta=theta_house)[i-1],\
#             color=dp.ColorGradient(stop=(0, 200, 90), n=nlines)[i],lw=1)
#    plt.plot(x_house,plotfit2(x_house,beta=theta_house_alt)[i-1],\
#             color=dp.ColorGradient(stop=(140, 20, 90), n=nlines)[i],lw=1)

plt.plot(x_house,y, c='k', label='gamma=1e-2, epsilon = 1e-6')
plt.plot(x_house,yalt, c='r', label = 'gamma=1e-3,epsilon = 1e-5')
plt.legend()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<matplotlib.legend.Legend at 0x7ff3a6849640>

In [17]:
### Climate data
theta_climate, J_climate = ML.GradDes_Regression(x=year, y=tempa, gamma=0.01, epsilon=0.0001, theta_init = np.array([0.5,0.5]))
theta_climate_normal=ML.Normal_Linear_Regression(x=year, y=tempa)
ynormal_climate = np.array(theta_climate_normal[1])*year + np.array(theta_climate_normal[0])

In [18]:
ifig=3;plt.close(ifig);plt.figure(ifig,figsize=(7,6), dpi=120)

plt.scatter(year, y=tempa, c='orange',  s=15)
plt.plot(year,ynormal_climate, c='b')
nlines = 10
for i in range(nlines):
    plt.plot(year,plotfit2(year,beta=theta_climate)[i-1],\
             color=dp.ColorGradient(stop=(31, 80, 40), n=nlines)[i],lw=1)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

## Now take a look at convergence by plotting $J(\theta)$ vs $(\theta)$ or $J(\theta)$ vs n iter

In [19]:
theta_house, J_house = ML.GradDes_Regression(x=x_house, y=y_house, gamma=0.001, epsilon = 0.000001, theta_init = np.array([0.23,0.37] ))
theta_house_alt, J_house_alt = ML.GradDes_Regression(x=x_house, y=y_house, gamma=0.001,epsilon = 0.00001, theta_init = np.array([0.5,0.5]))

In [20]:
#For the two instances of housing data
ifig=4;plt.close(ifig);plt.figure(ifig,figsize=(7,6), dpi=120)
theta0 = [x[0] for x in theta_house]
theta0_alt = [x[0] for x in theta_house_alt]
theta1 = [x[1] for x in theta_house]
theta1_alt = [x[1] for x in theta_house_alt]

niter = range(len(theta0))
niter2 = range(len(theta0_alt))

plt.plot(J_house, theta0, color = 'k')
plt.plot(J_house_alt, theta0_alt, marker='.', ls='',color='k', alpha=0.2)
plt.plot(J_house, theta1, color = 'r')
plt.plot(J_house_alt, theta1_alt, marker='.',ls='', color = 'r', alpha=0.1)
plt.xlabel('J')
plt.ylabel('theta')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Text(0, 0.5, 'theta')

In [21]:
ifig=5;plt.close(ifig);plt.figure(ifig,figsize=(7,6), dpi=120)
plt.plot(niter, J_house, color = 'b', lw=4)
plt.plot(niter2, J_house_alt, marker='_', color='k')
plt.xlabel('Iterations')
plt.ylabel('J')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Text(0, 0.5, 'J')

In [22]:
print(J_house[-1],J_house_alt[-1])


3.3421119371309818 3.371914935407978


In [23]:
#For the two instances of housing data
ifig=6;plt.close(ifig);plt.figure(ifig,figsize=(7,6), dpi=120)
plt.plot(theta0, theta1, c='b', lw=3)
plt.plot(theta0_alt, theta1_alt, c='y')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

[<matplotlib.lines.Line2D at 0x7ff3a71ea8b0>]

In [24]:
ifig=7;plt.close(ifig);plt.figure(ifig,figsize=(7,6), dpi=120)
plt.plot(niter,theta0, label='theta0')
plt.plot(niter,theta1, label='theta1 alt')

plt.plot(niter2,theta0_alt, label='theta0 alt')
plt.plot(niter2,theta1_alt, label='theta1 alt')
plt.legend()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<matplotlib.legend.Legend at 0x7ff3a7211f70>

In [25]:
def cost_func(theta0, theta1,x,y):
    h = theta0 + theta1*x
    theta0 = np.atleast_3d(np.asarray(theta0))
    theta1 = np.atleast_3d(np.asarray(theta1))
    return np.average((y-h)**2, axis=2)/2

In [26]:
th0,th1 = np.linspace(0, 1.0, len(x_house)),np.linspace(0, 1.0, len(x_house))

J_grid = cost_func(th0[np.newaxis,:,np.newaxis],
                   th1[:,np.newaxis,np.newaxis],x_house, y_house)
#th0,th1 = np.meshgrid(th0,th1)

In [30]:
ifig=8;plt.close(ifig);plt.figure(ifig,figsize=(7,6), dpi=120)
color_bar_idxs = np.linspace(0,len(theta0),len(theta0))
color_bar_idxs_alt = np.linspace(0,len(theta0_alt),len(theta0_alt))

plt.contourf(th0,th1, J_grid)
plt.scatter(theta0, theta1, c= color_bar_idxs, cmap ='magma', s =20)
plt.scatter(theta0_alt, theta1_alt, c= color_bar_idxs_alt, cmap ='cividis', s =20)
plt.scatter(theta_house_normal[0], theta_house_normal[1], c='r', s=30)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<matplotlib.collections.PathCollection at 0x7ff3920256d0>