In [None]:
import pandas as pd
import math
import copy
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import numpy as np

# Simple linear regression

In [None]:
x_train = np.array([1.0,2.0])   #(size in 1000 square feet)
y_train = np.array([300.0,500.0])   #(price in 1000s of dollars)
plt.plot(x_train,y_train);
plt.scatter(x_train,y_train,color='red')

## Computing cost function

In [None]:
def compute_cost(x,y,w,b):
    m = x.shape[0]
    cost_sum = 0
    for i in range(0,m):
        f_wb = w*x[i]+b
        cost_sum += (f_wb - y[i])**2
    total_cost = (1/(2*m)) * cost_sum
    return total_cost

In [None]:
compute_cost(x_train,y_train,340,0)

In [None]:
x_train = np.array([1.0, 2.0])
y_train = np.array([300.,500.])

In [None]:
def compute_gradient(x,y,w,b):
    m = x.shape[0]
    dj_dw = 0
    dj_db = 0
    
    for i in range(0,m):
        f_wb = w*x[i]+b
        dj_dw+=(f_wb - y[i])*x[i]
        dj_db += (f_wb - y[i])
    dj_dw = (1/m) * dj_dw
    dj_db = (1/m) * dj_db
    return dj_dw, dj_db

In [None]:
def compute_gradient(x, y, w, b): 
   
    # Number of training examples
    m = x.shape[0]    
    dj_dw = 0
    dj_db = 0
    
    for i in range(m):  
        f_wb = w * x[i] + b 
        dj_dw_i = (f_wb - y[i]) * x[i] 
        dj_db_i = f_wb - y[i] 
        dj_db += dj_db_i
        dj_dw += dj_dw_i 
    dj_dw = dj_dw / m 
    dj_db = dj_db / m 
        
    return dj_dw, dj_db

In [None]:
def gradient_descent(x,y,w_in,b_in,alpha,num_iter,compute_gradient):
    m = x.shape[0]
    w = w_in
    b = b_in
    
    for i in range(0,num_iter):
        dj_dw,dj_db = compute_gradient(x,y,w,b)
        w = w-alpha*dj_dw
        b = b-alpha*dj_db
    return w,b
        

In [None]:
w_init = 0
b_init = 0
iteration = 10000
tmp_alpha = 1.0e-2
w_final, b_final = gradient_descent(x_train,y_train,w_init,b_init,tmp_alpha,iteration,compute_gradient)

In [None]:
print(f"1000 sqft house prediction {w_final*1.0 + b_final:0.1f} Thousand dollars")
print(f"1200 sqft house prediction {w_final*1.2 + b_final:0.1f} Thousand dollars")
print(f"2000 sqft house prediction {w_final*2.0 + b_final:0.1f} Thousand dollars")

# Simple Linear regression

In [None]:
def compute_gradient(x,y,w,b):
    m = x.shape[0]
    dj_dw = 0
    dj_db = 0
    
    for i in range(0,m):
        f_wb = w*x[i]+b
        dj_dw += (f_wb-y[i])*x[i]
        dj_db += (f_wb-y[i])
    dj_dw /=m
    dj_db /=m
    return dj_dw,dj_db

In [None]:
def gradient_descent(x,y,w_init,b_init,alpha,num_iter,cost_function,compute_gradient):
    m = x.shape[0]
    w = w_init
    b = b_init
    J_history = []
    p_history = []
    
    for i in range(0,num_iter):
        dj_dw, dj_db = compute_gradient(x,y,w,b)
        w = w-alpha * dj_dw
        b = b-alpha * dj_db
        # Save cost J at each iteration
        if i<100000:      # prevent resource exhaustion 
            J_history.append( cost_function(x, y, w , b))
            p_history.append([w,b])
        # Print cost every at intervals 10 times or as many iterations if < 10
        if i% math.ceil(num_iter/10) == 0:
            print(f"Iteration {i:4}: Cost {J_history[-1]:0.2e} ",
                    f"dj_dw: {dj_dw: 0.3e}, dj_db: {dj_db: 0.3e}  ",
                    f"w: {w: 0.3e}, b:{b: 0.5e}")
 
    return w, b, J_history, p_history #return w and J,w history for graphing

In [None]:
# initialize parameters
w_init = 0
b_init = 0
# some gradient descent settings
iterations = 10000
tmp_alpha = 0.001
# run gradient descent
x_train = np.array([])
y_train = np.array([10,19 ,28,37,50,63,70 ,85,90,100])
for i in range (0,10):
    x_train = np.append(x_train,i)   
    # y_train = np.append(y_train,i*100+50)   
w_final, b_final, J_hist, p_hist = gradient_descent(x_train ,y_train, w_init, b_init, tmp_alpha, 
                                                    iterations, compute_cost, compute_gradient)
print(f"(w,b) found by gradient descent: ({w_final:8.4f},{b_final:8.4f})")

In [None]:
plt.scatter(x=x_train,y = y_train)
x_final = np.array([])
y_final = np.array([])
for i in (0,10):
    x_final = np.append(x_final,i)
    y_final = np.append(y_final,w_final*i+b_final)
plt.plot(x_final,y_final,'r--');

In [None]:
x_value = 3
y_value = w_final*x_value+b_final
print(y_value)

In [None]:
# PLotting the result
plt.scatter(x=x_train,y = y_train)
plt.plot(x_final,y_final,'r--');
plt.scatter(x = x_value,y = y_value, color='black')

# Vectorization

In [None]:
# Here we have more than one feature 
# x = size, no. of bedroom, age; y = price
np.set_printoptions(precision=2) 
X_train = np.array([[2104, 5, 1, 45], [1416, 3, 2, 40], [852, 2, 1, 35]])
y_train = np.array([460, 232, 178])

In [None]:
# data is stored in numpy array/matrix
print(f"X Shape: {X_train.shape}, X Type:{type(X_train)})")
print(X_train)
print(f"y Shape: {y_train.shape}, y Type:{type(y_train)})")
print(y_train)

#### Cost function

In [None]:
def compute_cost(X,y,w,b):
    m = X.shape[0]
    cost = 0.0
    for i in range(m):
        f_wb =np.dot(w,X[i])+b
        cost +=(f_wb-y[i])**2
    cost = cost/(2*m)
    return cost

In [None]:
# Sample test case for cost function
b_init = 785.1811367994083
w_init = np.array([ 0.39133535, 18.75376741, -53.36032453, -26.42131618])
cost = compute_cost(X_train,y_train,w_init,b_init)
print(f'Cost of optimal w : {cost}')

#### Gradient descent with multiple variable

In [None]:
def compute_gradient(X,y,w,b):
    m,n = X.shape
    dj_dw = np.zeros((n,))
    dj_db = 0.
    for i in range (m):
        err = (np.dot(X[i],w)+b) - y[i]
        for j in range(n):
            dj_dw[j] = dj_dw[j]+err*X[i,j]
        dj_db = dj_db + err
    dj_dw = dj_dw/m
    dj_db = dj_db/m
    
    return dj_db,dj_dw

In [None]:
#Compute and display gradient 
tmp_dj_db, tmp_dj_dw = compute_gradient(X_train, y_train, w_init, b_init)
print(f'dj_db at initial w,b: {tmp_dj_db}')
print(f'dj_dw at initial w,b: \n {tmp_dj_dw}')

In [None]:
def gradient_descent(X,y,w_in, b_in, cost_function, gradient_function, alpha,num_iters):
    # m = X.shape[0]
    w = copy.deepcopy(w_in)
    J_history = []
    
    b = b_in
    for i in range(num_iters):
        dj_db, dj_dw = gradient_function(X,y,w,b)
        w = w- alpha*dj_dw
        b = b- alpha*dj_db
        if i<100000:      # prevent resource exhaustion 
            J_history.append( cost_function(X, y, w, b))

        # Print cost every at intervals 10 times or as many iterations if < 10
        if i% math.ceil(num_iters/10) == 0:
            print(f"Iteration {i:4d}: Cost {J_history[-1]:8.2f}   ")
        
    return w, b, J_history #return final w,b and J history for graphing
    
        

In [None]:
# initialize parameters
initial_w = np.zeros_like(w_init)
initial_b = 0.
# some gradient descent settings
iterations = 1000
alpha = 5.0e-7
# run gradient descent 
w_final, b_final, J_hist = gradient_descent(X_train, y_train, initial_w, initial_b,
                                                    compute_cost, compute_gradient, 
                                                    alpha, iterations)
print(f"b,w found by gradient descent: {b_final:0.2f},{w_final} ")
m,_ = X_train.shape
for i in range(m):
    print(f"prediction: {np.dot(X_train[i], w_final) + b_final:0.2f}, target value: {y_train[i]}")

# Feature Scaling

In [None]:
def load_house_data():
    data = np.loadtxt("./houses.txt", delimiter=',', skiprows=1)
    X = data[:,:4]
    y = data[:,4]
    return X, y

In [None]:
X_train, y_train = load_house_data()
print(X_train,y_train)


In [None]:
def zscore_normalize_features(X):
    mu = np.mean(X,axis=0)
    sigma = np.std(X,axis=0)
    X_norm = (X-mu)/sigma
    return (X_norm, mu,sigma)

In [None]:
# normalize the original features
X_norm, X_mu, X_sigma = zscore_normalize_features(X_train)
print(f"X_mu = {X_mu}, \nX_sigma = {X_sigma}")
print(f"Peak to Peak range by column in Raw        X:{np.ptp(X_train,axis=0)}")   
print(f"Peak to Peak range by column in Normalized X:{np.ptp(X_norm,axis=0)}")


In [None]:
print(X_norm,y_train)

In [None]:
w_init = np.zeros_like(w_init)
b_init = 0
alpha = 1.0e-1
nums_iter = 1000
w_final, b_final,J_hist = gradient_descent(X_norm,y_train,w_init,b_init,compute_cost,compute_gradient,alpha,nums_iter)
print(f"b,w found by gradient descent: {b_final:0.2f},{w_final} ")
m,_ = X_train.shape
for i in range(m):
    print(f"prediction: {np.dot(X_train[i], w_final) + b_final:0.2f}, target value: {y_train[i]}")

In [None]:
X_features = ['size(sqft)','bedrooms','floors','age']

m = X_norm.shape[0]
yp = np.zeros(m)
for i in range(m):
    yp[i] = np.dot(X_norm[i], w_final) + b_final
    
    
fig,ax=plt.subplots(1,4,figsize=(12, 3),sharey=True)
for i in range(len(ax)):
    ax[i].scatter( X_train[:,i],y_train,label='target')
    ax[i].set_xlabel(X_features[i])
    ax[i].scatter(X_train[:,i],yp, label = 'predict')
ax[0].set_ylabel("Price"); ax[0].legend();
fig.suptitle("target versus prediction using z-score normalized model")
plt.show()
    

In [None]:
# First, normalize out example.
x_house = np.array([1200, 3, 1, 40])
x_house_norm = (x_house - X_mu) / X_sigma
print(x_house_norm)
x_house_predict = np.dot(x_house_norm, w_final) + b_final
print(f" predicted price of a house with 1200 sqft, 3 bedrooms, 1 floor, 40 years old = ${x_house_predict*1000:0.0f}")

## Using scikit-Learn for scaling regression

In [None]:
from sklearn.linear_model import SGDRegressor
from sklearn.preprocessing import StandardScaler

In [None]:
#Load data
X_train, y_train = load_house_data()
X_features = ['size(sqft)','bedrooms','floors','age']
print(X_train,y_train)


#### Scale/normalize the training data

In [None]:
scalar = StandardScaler()
X_norm = scalar.fit_transform(X_train)
print(f"Peak to Peak range by column in Raw        X:{np.ptp(X_train,axis=0)}")   
print(f"Peak to Peak range by column in Normalized X:{np.ptp(X_norm,axis=0)}")

In [None]:
sgdr = SGDRegressor(max_iter=1000)
sgdr.fit(X_norm, y_train)
print(sgdr)
print(f"number of iterations completed: {sgdr.n_iter_}, number of weight updates: {sgdr.t_}")

In [None]:
b_norm = sgdr.intercept_
w_norm = sgdr.coef_
print(f"model parameters:                   w: {w_norm}, b:{b_norm}")
print( "model parameters from previous lab: w: [110.56 -21.27 -32.71 -37.97], b: 363.16")

In [None]:
# make a prediction using sgdr.predict()
y_pred_sgd = sgdr.predict(X_norm)
# make a prediction using w,b. 
y_pred = np.dot(X_norm, w_norm) + b_norm  
print(f"prediction using np.dot() and sgdr.predict match: {(y_pred == y_pred_sgd).all()}")

print(f"Prediction on training set:\n{y_pred[:4]}" )
print(f"Target values \n{y_train[:4]}")

In [None]:
# plot predictions and targets vs original features    
fig,ax=plt.subplots(1,4,figsize=(12,3),sharey=True)
for i in range(len(ax)):
    ax[i].scatter(X_train[:,i],y_train, label = 'target')
    ax[i].set_xlabel(X_features[i])
    ax[i].scatter(X_train[:,i],y_pred,label = 'predict')
ax[0].set_ylabel("Price"); ax[0].legend();
fig.suptitle("target versus prediction using z-score normalized model")
plt.show()

# Linear regression using scikit-learn

In [None]:
from sklearn.linear_model import LinearRegression

In [None]:
# sample data
X_train = np.array([1.0, 2.0])   #features
y_train = np.array([300, 500])   #target value

In [None]:
linear_model = LinearRegression()
#X must be a 2d matrix
linear_model.fit(X_train.reshape(-1,1),y_train)

In [None]:
b = linear_model.intercept_
w = linear_model.coef_
print(f"w = {w:}, b = {b:0.2f}")
print(f"'manual' prediction: f_wb = wx+b : {1200*w + b}")

In [None]:
y_pred = linear_model.predict(X_train.reshape(-1, 1))

print("Prediction on training set:", y_pred)

X_test = np.array([[1200]])
print(f"Prediction for 1200 sqft house: ${linear_model.predict(X_test)[0]:0.2f}")

In [None]:
# Using multiple features
# load the dataset
X_train, y_train = load_house_data()
X_features = ['size(sqft)','bedrooms','floors','age']

In [None]:
linear_model = LinearRegression()
linear_model.fit(X_train,y_train)

In [None]:
b = linear_model.intercept_
w = linear_model.coef_
print(f"w = {w:}, b = {b:0.2f}")

In [None]:
print(f"Prediction on training set:\n {linear_model.predict(X_train)[:4]}" )
print(f"prediction using w,b:\n {(X_train @ w + b)[:4]}")
print(f"Target values \n {y_train[:4]}")

x_house = np.array([1200, 3,1, 40]).reshape(-1,4)
x_house_predict = linear_model.predict(x_house)[0]
print(f" predicted price of a house with 1200 sqft, 3 bedrooms, 1 floor, 40 years old = ${x_house_predict*1000:0.2f}")

# Using new housing csv with more features

In [None]:
#Loading housing price
hp = pd.read_csv('./housing.csv')
hp.head()

In [None]:
X_features = ['longitude','latitude','housing_median_age','total_rooms','population','households','median_income']
X_train = hp[X_features]
X_train.head()

In [None]:
y_train = hp['median_house_value']
y_train

In [None]:
from sklearn.linear_model import SGDRegressor
from sklearn.preprocessing import StandardScaler

In [None]:
scalar = StandardScaler()
X_norm = scalar.fit_transform(X_train)
print(f"Peak to Peak range by column in Raw        X:{np.ptp(X_train,axis=0)}")   
print(f"Peak to Peak range by column in Normalized X:{np.ptp(X_norm,axis=0)}")

In [None]:
sgdr = SGDRegressor(max_iter = 1000)
sgdr = sgdr.fit(X_norm,y_train)
print(sgdr)
print(f"number of iterations completed: {sgdr.n_iter_}, number of weight updates: {sgdr.t_}")

In [None]:
b_norm = sgdr.intercept_
w_norm = sgdr.coef_
print(f"model parameters:                   w: {w_norm}, b:{b_norm}")

In [None]:
y_pred_sgd = sgdr.predict(X_norm)
y_pred = np.dot(X_norm, w_norm) + b_norm  

print(f"prediction using np.dot() and sgdr.predict match: {(y_pred == y_pred_sgd).all()}")
print(f"Prediction on training set:\n{y_pred[:4]}" )
print(f"Target values \n{y_train[:4]}")

### Finding accuracy

In [None]:
from sklearn.linear_model import LinearRegression
regressor = LinearRegression()
regressor.fit(X_train,y_train)
r2_score = regressor.score(X_train,y_train)
print(r2_score*100,'%')

### Plotting target and prediction

In [None]:
# plot predictions and targets vs original features    
fig,ax=plt.subplots(1,7,figsize=(12,3),sharey=True)
print(ax)
for i in range(len(ax)):
    # sns.scatterplot(x=X_features[i],y='median_house_value', data=hp)
    # ax[i].scatter(X_train[:,i],y_train, label = 'target')
    ax[i].scatter(X_train[:][X_features[i]],y_pred,label = 'predict')
    ax[i].set_xlabel(X_features[i])
    ax[i].scatter(X_train[:][X_features[i]],y_train,label = 'target')
ax[0].set_ylabel("Price"); ax[0].legend();
fig.suptitle("target versus prediction using z-score normalized model")
plt.show()

# Logistic Regression

## Classification

In [None]:
x_train = np.array([0., 1, 2, 3, 4, 5])
y_train = np.array([0,  0, 0, 1, 1, 1])
X_train2 = np.array([[0.5, 1.5], [1,1], [1.5, 0.5], [3, 0.5], [2, 2], [1, 2.5]])
y_train2 = np.array([0, 0, 0, 1, 1, 1])

In [None]:
pos = y_train == 1
neg = y_train == 0

fig,ax = plt.subplots(1,2,figsize=(8,3))
#plot 1, single variable
ax[0].scatter(x_train[pos], y_train[pos], marker='x', s=100, c = 'red', label="y=1")
ax[0].scatter(x_train[neg], y_train[neg], marker='o', s=100, label="y=0", c='blue', 
              lw=3)

ax[0].set_ylim(-0.08,1.1)
ax[0].set_ylabel('y', fontsize=12)
ax[0].set_xlabel('x', fontsize=12)
ax[0].set_title('one variable plot')
ax[0].legend()

#plot 2, two variables
# plot_data(X_train2, y_train2, ax[1])
ax[1].scatter(X_train2[pos], y_train2[pos], marker='x', s=80, c = 'red', label="y=1")
ax[1].scatter(X_train2[neg], y_train2[neg], marker='o', s=100, label="y=0", facecolors='none', 
              lw=3)
ax[1].axis([0, 4, 0, 4])
ax[1].set_ylabel('$x_1$', fontsize=12)
ax[1].set_xlabel('$x_0$', fontsize=12)
ax[1].set_title('two variable plot')
ax[1].legend()
plt.tight_layout()
plt.show()


## Using linear regression

In [None]:
from sklearn.linear_model import LinearRegression
linear_model = LinearRegression()
#X must be a 2d matrix
linear_model.fit(x_train.reshape(-1,1),y_train)

b = linear_model.intercept_
w = linear_model.coef_
print(f"w = {w:}, b = {b:0.2f}")
print(f"'manual' prediction: f_wb = wx+b : {1200*w + b}")

In [None]:
#plotting
fig,ax = plt.subplots(1,1,figsize=(8,3))
#plot 1, single variable
plt.scatter(x_train[pos], y_train[pos], marker='x', s=100, c = 'red', label="y=1")
plt.scatter(x_train[neg], y_train[neg], marker='o', s=100, label="y=0", c='blue', 
              lw=3)

l = []
for i in range(len(x_train)):
    l.append(x_train[i]*w + b)

plt.plot(l,y_train)
# plt.set_ylim(-0.08,1.1)
plt.ylabel('y', fontsize=12)
plt.xlabel('x', fontsize=12)
plt.title('One variable plot')
plt.legend()


## Using sigmoid function

In [None]:
# sigmoid function
def sigmoid(z):
    g = 1/(1+np.exp(-z))
    return g

In [None]:
# Generate an array of evenly spaced values between -10 and 10
z_tmp = np.arange(-10,11)

# Use the function implemented above to get the sigmoid values
y = sigmoid(z_tmp)

# Code for pretty printing the two arrays next to each other
np.set_printoptions(precision=3) 
print("Input (z), Output (sigmoid(z))")
print(np.c_[z_tmp, y])

In [None]:
# Plot z vs sigmoid(z)
fig,ax = plt.subplots(1,1,figsize=(5,3))
ax.plot(z_tmp, y, c="b")

ax.set_title("Sigmoid function")
ax.set_ylabel('sigmoid(z)')
ax.set_xlabel('z')

In [None]:
x_train = np.array([0., 1, 2, 3, 4, 5])
y_train = np.array([0,  0, 0, 1, 1, 1])

w_in = np.zeros((1))
b_in = 0

## Plotting functions 

In [None]:
#plotting
def plot_logistic_1d(X,y):
    pos = y_train == 1
    neg = y_train == 0

    fig,ax = plt.subplots(1,1,figsize=(4,3))
    #plot 1, single variable
    ax.scatter(x_train[pos], y_train[pos], marker='x', s=80, c = 'red', label="y=1")
    ax.scatter(x_train[neg], y_train[neg], marker='o', s=100, label="y=0", facecolors='none', 
                edgecolors="blue",lw=3)

    ax.set_ylim(-0.08,1.1)
    ax.set_ylabel('y', fontsize=12)
    ax.set_xlabel('x', fontsize=12)
    ax.set_title('one variable plot')
    ax.legend()
    plt.tight_layout()
    # plt.show()

In [None]:
#plotting
def plot_logistic_2d(X,y):
    fig,ax = plt.subplots(1,1,figsize=(4,4))

    # Find Indices of Positive and Negative Examples
    pos = y == 1
    neg = y == 0
    pos = pos.reshape(-1,)  #work with 1D or 1D y vectors
    neg = neg.reshape(-1,)
    ax.scatter(X[pos, 0], X[pos, 1], marker='x', s = 80, c = 'red', label="y=1")
    ax.scatter(X[neg, 0], X[neg, 1], marker='o', s = 80, label="y=0", facecolors='none', edgecolors='blue', lw=3)
    ax.legend(loc='best')

    ax.axis([0, 4, 0, 3.5])
    ax.set_ylabel('$x_1$')
    ax.set_xlabel('$x_0$')
    plt.show()

In [None]:
def plot_sigmoid(z):
    fig,ax = plt.subplots(1,1,figsize=(5,3))
    # Plot z vs sigmoid(z)
    ax.plot(z, sigmoid(z), c="b")

    ax.set_title("Sigmoid function")
    ax.set_ylabel('sigmoid(z)')
    ax.set_xlabel('z')

In [None]:
# plt.scatter(x_train,y_train)
# plt.plot()
plot_logistic_1d(x_train,y_train)

## Decision boundary

In [None]:
X = np.array([[0.5, 1.5], [1,1], [1.5, 0.5], [3, 0.5], [2, 2], [1, 2.5]])
y = np.array([0, 0, 0, 1, 1, 1]).reshape(-1,1) 
print(X)
print(y)

### Plotting functions

In [None]:
plot_logistic_2d(X,y)
def f(x,w,b):
    return w*x+b
fig,ax = plt.subplots(1,1,figsize=(5,4))
x = np.linspace(-10,10,100)
ax.plot(x,f(x,-3,3),color='red')


In [None]:

plot_sigmoid(np.arange(-10,11))

## Computing cost function

In [None]:
def compute_cost_logistic(X,y,w,b):
    m = X.shape[0]
    cost = 0.0
    for i in range(m):
        z_i = np.dot(X[i],w)+b
        f_wb_i = sigmoid(z_i)
        cost += -y[i]*np.log(f_wb_i) - (1-y[i])*np.log(1-f_wb_i)
    cost = cost/m
    return cost

In [None]:
X_train = np.array([[0.5, 1.5], [1,1], [1.5, 0.5], [3, 0.5], [2, 2], [1, 2.5]])  #(m,n)
y_train = np.array([0, 0, 0, 1, 1, 1])     
w_tmp = np.array([1,1])
b_tmp = -3
print(compute_cost_logistic(X_train, y_train, w_tmp, b_tmp))

In [None]:
import matplotlib.pyplot as plt

def plot_data(X, y, ax, pos_label="y=1", neg_label="y=0", s=80, loc='best' ):
    """ plots logistic data with two axis """
    # Find Indices of Positive and Negative Examples
    pos = y == 1
    neg = y == 0
    pos = pos.reshape(-1,)  #work with 1D or 1D y vectors
    neg = neg.reshape(-1,)

    # Plot examples
    ax.scatter(X[pos, 0], X[pos, 1], marker='x', s=s, c = 'red', label=pos_label)
    ax.scatter(X[neg, 0], X[neg, 1], marker='o', s=s, label=neg_label, facecolors='none', edgecolors='blue', lw=3)
    ax.legend(loc=loc)

    ax.figure.canvas.toolbar_visible = False
    ax.figure.canvas.header_visible = False
    ax.figure.canvas.footer_visible = False

# Choose values between 0 and 6
x0 = np.arange(0,6)

# Plot the two decision boundaries
x1 = 3 - x0
x1_other = 4 - x0

fig,ax = plt.subplots(1, 1, figsize=(4,4))
# Plot the decision boundary
ax.plot(x0,x1, c='blue', label="$b$=-3")
ax.plot(x0,x1_other, c='red', label="$b$=-4")
ax.axis([0, 4, 0, 4])

# Plot the original data
plot_data(X_train,y_train,ax)
ax.axis([0, 4, 0, 4])
ax.set_ylabel('$x_1$', fontsize=12)
ax.set_xlabel('$x_0$', fontsize=12)
plt.legend(loc="upper right")
plt.title("Decision Boundary")
plt.show()

In [None]:
w_array1 = np.array([1,1])
b_1 = -3
w_array2 = np.array([1,1])
b_2 = -4

print("Cost for b = -3 : ", compute_cost_logistic(X_train, y_train, w_array1, b_1))
print("Cost for b = -4 : ", compute_cost_logistic(X_train, y_train, w_array2, b_2))

## Gradient descent

In [None]:
def compute_gradient_logistic(X,y,w,b):
    m = X.shape[0]
    dj_dw = 0
    dj_db = 0
    for i in range(m):
        z_i = np.dot(X[i],w)+b;
        f_wb_i = sigmoid(z_i)
        dj_dw += (f_wb_i-y[i])*X[i]
        dj_db += (f_wb_i-y[i])
    dj_db /=m
    dj_dw /=m
    return dj_db,dj_dw

In [None]:
X_tmp = np.array([[0.5, 1.5], [1,1], [1.5, 0.5], [3, 0.5], [2, 2], [1, 2.5]])
y_tmp = np.array([0, 0, 0, 1, 1, 1])
w_tmp = np.array([2.,3.])
b_tmp = 1.
dj_db_tmp, dj_dw_tmp = compute_gradient_logistic(X_tmp, y_tmp, w_tmp, b_tmp)
print(f"dj_db: {dj_db_tmp}" )
print(f"dj_dw: {dj_dw_tmp.tolist()}" )

In [None]:
def gradient_descent(X,y,w_in,b_in,alpha,num_iters):
    m = X.shape[0]
    w = w_in
    b = b_in
    J_history = []
    for i in range(num_iters):
        dj_db , dj_dw = compute_gradient_logistic(X,y,w,b)
        w = w-alpha*dj_dw
        b = b-alpha*dj_db
                # Save cost J at each iteration
        if i<100000:      # prevent resource exhaustion 
            J_history.append( compute_cost_logistic(X, y, w, b) )

        # Print cost every at intervals 10 times or as many iterations if < 10
        if i% math.ceil(num_iters / 10) == 0:
            print(f"Iteration {i:4d}: Cost {J_history[-1]}   ")
        
    return w, b, J_history         #return final w,b and J history for graphing

In [None]:
w_tmp  = np.zeros_like(X_train[0])
b_tmp  = 0.
alph = 0.1
iters = 10000

w_out, b_out, _ = gradient_descent(X_train, y_train, w_tmp, b_tmp, alph, iters) 
print(f"\nupdated parameters: w:{w_out}, b:{b_out}")

In [None]:
import matplotlib.pyplot as plt

def plot_data(X, y, ax, pos_label="y=1", neg_label="y=0", s=80, loc='best' ):
    """ plots logistic data with two axis """
    # Find Indices of Positive and Negative Examples
    pos = y == 1
    neg = y == 0
    pos = pos.reshape(-1,)  #work with 1D or 1D y vectors
    neg = neg.reshape(-1,)

    # Plot examples
    ax.scatter(X[pos, 0], X[pos, 1], marker='x', s=s, c = 'red', label=pos_label)
    ax.scatter(X[neg, 0], X[neg, 1], marker='o', s=s, label=neg_label, facecolors='none', edgecolors='blue', lw=3)
    ax.legend(loc=loc)

    ax.figure.canvas.toolbar_visible = False
    ax.figure.canvas.header_visible = False
    ax.figure.canvas.footer_visible = False

fig,ax = plt.subplots(1,1,figsize=(5,4))
# plot the probability 
# plt_prob(ax, w_out, b_out)

# Plot the original data
ax.set_ylabel(r'$x_1$')
ax.set_xlabel(r'$x_0$')   
ax.axis([0, 4, 0, 3.5])
plot_data(X_train,y_train,ax)

# Plot the decision boundary
x0 = -b_out/w_out[0]
x1 = -b_out/w_out[1]
ax.plot([0,x0],[x1,0], c='blue', lw=1)
plt.show()

## 1 feature visualization

In [None]:
# 1 feature
x_train = np.array([0., 1, 2, 3, 4, 5])
y_train = np.array([0,  0, 0, 1, 1, 1])

w_tmp  = np.zeros_like(x_train[0])
b_tmp  = 0.
alph = 0.1
iters = 10000

w_out, b_out, _ = gradient_descent(x_train, y_train, w_tmp, b_tmp, alph, iters) 
print(f"\nupdated parameters: w:{w_out}, b:{b_out}")

plot_logistic_1d(x_train,y_train)
plt.plot([0,3],[b_out,3*w_out+b_out])
plt.show()