In [3]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from functions import *
import math

%matplotlib inline

## Preprocessing
Convert the datafile into two numpy arrays and apply normalization to them

In [4]:
#Create datapath 
def preprocess(file):
    #file = "Project1_data/shopping.csv"

    #put data into a pandas dataframe
    df = pd.read_csv(file)
    
    #view dataframe rows
    df.head()
    
    #define replacement mapping
    bool_mapping = {
        "TRUE": 1,
        "FALSE": 0,
    }
    
    month_mapping = {
        "Jan": 1, "Feb": 2, "Mar": 3, "Apr": 4, "May": 5, "June": 6, "Jul": 7, "Aug": 8, "Sep": 9, "Oct": 10, "Nov": 11, "Dec": 12
    }
    
    visit_mapping = {
        "New_Visitor": 1, "Returning_Visitor": 2, "Other": 3
    }
    
    #Convert true/false to strings
    df['Weekend'] = df['Weekend'].astype(str)
    df['Revenue'] = df['Revenue'].astype(str)
    
    df['Weekend'] = df['Weekend'].str.upper()
    df['Revenue'] = df['Revenue'].str.upper()
    
    #Replace non-integer values
    df['Month'] = df['Month'].replace(month_mapping)
    df['VisitorType'] = df['VisitorType'].replace(visit_mapping)
    df['Weekend'] = df['Weekend'].replace(bool_mapping)
    df['Revenue'] = df['Revenue'].replace(bool_mapping)
    
    df.head(10)
    
    #convert dataframe to numpy array
    data_array = df.to_numpy()
    
    return split_data(data_array)

def split_data(data):
    X = data[:, :17]
    y = data[:, 17]
    return normalize(X), y

def normalize(x_unn):
    return min_max(x_unn)
    


In [5]:
X_train, y_train = preprocess("Project1_data/shopping.csv")

In [6]:
print("First five elements in X_train are:\n", X_train[:5])
print("Type of X_train:",type(X_train))
print("Shape of X_train:", X_train.shape)

First five elements in X_train are:
 [[4.68944009e-05 2.22748404e-03 0.00000000e+00 0.00000000e+00
  7.50310415e-04 1.64483042e-02 6.79628985e-08 2.03888696e-07
  0.00000000e+00 0.00000000e+00 1.71946137e-04 1.56314670e-05
  1.25051736e-04 9.37888018e-05 1.71946137e-04 3.12629339e-05
  0.00000000e+00]
 [9.37888018e-05 6.83706773e-03 3.12629339e-05 3.68199205e-03
  1.29741176e-03 3.91393454e-02 3.43548694e-08 7.68464175e-08
  3.26106555e-05 0.00000000e+00 4.68944009e-05 3.12629339e-05
  3.12629339e-05 4.68944009e-05 3.12629339e-05 3.12629339e-05
  0.00000000e+00]
 [1.56314670e-05 6.42844079e-04 0.00000000e+00 0.00000000e+00
  1.96956484e-03 6.73716956e-02 1.07518076e-08 2.00439847e-07
  5.39453198e-05 0.00000000e+00 1.71946137e-04 3.12629339e-05
  3.12629339e-05 3.12629339e-05 3.12629339e-05 3.12629339e-05
  0.00000000e+00]
 [3.12629339e-05 2.20403684e-03 0.00000000e+00 0.00000000e+00
  1.56314670e-04 9.48308997e-03 1.30262220e-07 4.12497047e-07
  5.73241753e-04 0.00000000e+00 1.2505173

In [7]:
print("First five elements in y_train are:\n", y_train[:5])
print("Type of y_train:",type(y_train))
print("Shape of y_train:", y_train.shape)

First five elements in y_train are:
 [0. 1. 0. 0. 0.]
Type of y_train: <class 'numpy.ndarray'>
Shape of y_train: (5000,)


In [8]:
print ('The shape of X_train is: ' + str(X_train.shape))
print ('The shape of y_train is: ' + str(y_train.shape))
print ('We have m = %d training examples' % (len(y_train)))

The shape of X_train is: (5000, 17)
The shape of y_train is: (5000,)
We have m = 5000 training examples


Visualizing Data

In [9]:
def sigmoid(z):
    r = 1 / (1 + np.exp(-z))
    return r

In [23]:
# cost function for the regression model
def compute_cost(X, y, w, b, *argv):
    """
    Computes the cost over all examples
    Args:
      X : (ndarray Shape (m,n)) data, m examples by n features
      y : (ndarray Shape (m,))  target value 
      w : (ndarray Shape (n,))  values of parameters of the model      
      b : (scalar)              value of bias parameter of the model
      *argv : unused, for compatibility with regularized version below
    Returns:
      total_cost : (scalar) cost 
    """
    
    m, n = X.shape
    
    ### START CODE HERE ###
    loss = 0.0
    z = np.dot(X, w) + b
    prediction = sigmoid(z)
    total_cost = (-1/m) * np.sum(y * np.log(prediction) + (1 - y) * np.log(1 - prediction))
    
    
    ### END CODE HERE ### 
    
    return total_cost

Test the compute cost function with w and b values

In [20]:
def compute_gradient(X, y, w, b, *argv):
    m = X.shape[0]
    z = np.dot(X, w) + b
    predictions = 1/ (1 + np.exp(-z))
    
    dj_db = np.mean(predictions - y)
    dj_dw = np.dot(X.T, (predictions - y)) / m
    
    return dj_db, dj_dw

In [21]:
# Descent algorithm, written by Dr. Kristofferson Culmer
def gradient_descent(X, y, w_in, b_in, cost_function, gradient_function, alpha, num_iters, lambda_): 
    """
    Performs batch gradient descent to learn theta. Updates theta by taking 
    num_iters gradient steps with learning rate alpha
    
    Args:
      X :    (ndarray Shape (m, n) data, m examples by n features
      y :    (ndarray Shape (m,))  target value 
      w_in : (ndarray Shape (n,))  Initial values of parameters of the model
      b_in : (scalar)              Initial value of parameter of the model
      cost_function :              function to compute cost
      gradient_function :          function to compute gradient
      alpha : (float)              Learning rate
      num_iters : (int)            number of iterations to run gradient descent
      lambda_ : (scalar, float)    regularization constant
      
    Returns:
      w : (ndarray Shape (n,)) Updated values of parameters of the model after
          running gradient descent
      b : (scalar)                Updated value of parameter of the model after
          running gradient descent
    """
    
    # number of training examples
    m = len(X)
    
    # An array to store cost J and w's at each iteration primarily for graphing later
    J_history = []
    w_history = []
    
    for i in range(num_iters):

        # Calculate the gradient and update the parameters
        dj_db, dj_dw = gradient_function(X, y, w_in, b_in, lambda_)   

        # Update Parameters using w, b, alpha and gradient
        w_in = w_in - alpha * dj_dw               
        b_in = b_in - alpha * dj_db              
       
        # Save cost J at each iteration
        if i<100000:      # prevent resource exhaustion 
            cost =  cost_function(X, y, w_in, b_in, lambda_)
            J_history.append(cost)

        # Print cost every at intervals 10 times or as many iterations if < 10
        if i% math.ceil(num_iters/10) == 0 or i == (num_iters-1):
            w_history.append(w_in)
            print(f"Iteration {i:4}: Cost {float(J_history[-1]):8.2f}   ")
        
    return w_in, b_in, J_history, w_history #return w and J,w history for graphing

In [25]:
# Runs the algorithm above to learn parameters
np.random.seed(1)
initial_w = 0.01 * (np.random.rand(17) - 0.5)
initial_b = -8

# Some gradient descent settings
iterations = 100000
alpha = 0.01

w,b, J_history,_ = gradient_descent(X_train ,y_train, initial_w, initial_b, compute_cost, compute_gradient, alpha, iterations, 0)

Iteration    0: Cost     1.23   
Iteration 10000: Cost     0.43   
Iteration 20000: Cost     0.43   
Iteration 30000: Cost     0.43   
Iteration 40000: Cost     0.43   
Iteration 50000: Cost     0.43   
Iteration 60000: Cost     0.43   
Iteration 70000: Cost     0.43   
Iteration 80000: Cost     0.43   
Iteration 90000: Cost     0.43   
Iteration 99999: Cost     0.43   


In [26]:
def predict(X, w, b): 
    """
    Predict whether the label is 0 or 1 using learned logistic
    regression parameters w
    
    Args:
      X : (ndarray Shape (m,n)) data, m examples by n features
      w : (ndarray Shape (n,))  values of parameters of the model      
      b : (scalar)              value of bias parameter of the model

    Returns:
      p : (ndarray (m,)) The predictions for X using a threshold at 0.5
    """
    # number of training examples
    m, n = X.shape   
    p = np.zeros(m)
   
    ### START CODE HERE ### 
    for i in range(m):
        z = np.dot(w, X[i]) + b
        prediction = sigmoid(z)
        if prediction >= 0.5:
            p[i] = 1
        else:
            p[i] = 0

    ### END CODE HERE ### 
    return p

In [27]:
#Compute accuracy on our training set
p = predict(X_train, w,b)
print('Train Accuracy: %f'%(np.mean(p == y_train) * 100))

Train Accuracy: 84.580000


In [28]:
X_test, y_test = preprocess("Project1_data/unseen.csv")
p = predict(X_test, w,b)
print('Train Accuracy: %f'%(np.mean(p == y_test) * 100))

Train Accuracy: 84.800000


## Regularization

In [29]:
X_train, y_train = preprocess("Project1_data/shopping.csv")
X_mapped = map_feature(X_train[:, 0], X_train[:, 16])

In [30]:
# print X_train
print("X_train:", X_train[:5])
print("Type of X_train:",type(X_train))

# print y_train
print("y_train:", y_train[:5])
print("Type of y_train:",type(y_train))

X_train: [[4.68944009e-05 2.22748404e-03 0.00000000e+00 0.00000000e+00
  7.50310415e-04 1.64483042e-02 6.79628985e-08 2.03888696e-07
  0.00000000e+00 0.00000000e+00 1.71946137e-04 1.56314670e-05
  1.25051736e-04 9.37888018e-05 1.71946137e-04 3.12629339e-05
  0.00000000e+00]
 [9.37888018e-05 6.83706773e-03 3.12629339e-05 3.68199205e-03
  1.29741176e-03 3.91393454e-02 3.43548694e-08 7.68464175e-08
  3.26106555e-05 0.00000000e+00 4.68944009e-05 3.12629339e-05
  3.12629339e-05 4.68944009e-05 3.12629339e-05 3.12629339e-05
  0.00000000e+00]
 [1.56314670e-05 6.42844079e-04 0.00000000e+00 0.00000000e+00
  1.96956484e-03 6.73716956e-02 1.07518076e-08 2.00439847e-07
  5.39453198e-05 0.00000000e+00 1.71946137e-04 3.12629339e-05
  3.12629339e-05 3.12629339e-05 3.12629339e-05 3.12629339e-05
  0.00000000e+00]
 [3.12629339e-05 2.20403684e-03 0.00000000e+00 0.00000000e+00
  1.56314670e-04 9.48308997e-03 1.30262220e-07 4.12497047e-07
  5.73241753e-04 0.00000000e+00 1.25051736e-04 3.12629339e-05
  7.815

In [31]:
#Feature mapping
print("Original shape of data:", X_train.shape)
print("Shape after feature mapping:", X_mapped.shape)

Original shape of data: (5000, 17)
Shape after feature mapping: (5000, 27)


In [32]:
print("X_train[0]:", X_train[0])
print("mapped X_train[0]:", X_mapped[0])

X_train[0]: [4.68944009e-05 2.22748404e-03 0.00000000e+00 0.00000000e+00
 7.50310415e-04 1.64483042e-02 6.79628985e-08 2.03888696e-07
 0.00000000e+00 0.00000000e+00 1.71946137e-04 1.56314670e-05
 1.25051736e-04 9.37888018e-05 1.71946137e-04 3.12629339e-05
 0.00000000e+00]
mapped X_train[0]: [4.68944009e-05 0.00000000e+00 2.19908484e-09 0.00000000e+00
 0.00000000e+00 1.03124766e-13 0.00000000e+00 0.00000000e+00
 0.00000000e+00 4.83597412e-18 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 2.26780109e-22 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 1.06347174e-26 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00]


In [33]:
print("Original shape of data:", X_train.shape)

mapped_X =  map_feature(X_train[:, 0], X_train[:, 1])
print("Shape after feature mapping:", mapped_X.shape)

Original shape of data: (5000, 17)
Shape after feature mapping: (5000, 27)


In [34]:
def compute_cost_reg(X, y, w, b, lambda_ = 1):
    """
    Computes the cost over all examples
    Args:
      X : (ndarray Shape (m,n)) data, m examples by n features
      y : (ndarray Shape (m,))  target value 
      w : (ndarray Shape (n,))  values of parameters of the model      
      b : (scalar)              value of bias parameter of the model
      lambda_ : (scalar, float) Controls amount of regularization
    Returns:
      total_cost : (scalar)     cost 
    """

    m, n = X.shape
    
    # Calls the compute_cost function that you implemented above
    cost_without_reg = compute_cost(X, y, w, b) 
    
    # You need to calculate this value
    reg_cost = 0.
    
    ### START CODE HERE ###
    summ = 0.
    for i in range(n):
        summ += w[i] ** 2
        
    reg_cost = (lambda_ / (2*m)) * summ
    ### END CODE HERE ### 
    
    # Add the regularization cost to get the total cost
    total_cost = cost_without_reg + reg_cost

    return total_cost

In [35]:
def compute_gradient_reg(X, y, w, b, lambda_): 
    """
    Computes the gradient for logistic regression with regularization
 
    Args:
      X : (ndarray Shape (m,n)) data, m examples by n features
      y : (ndarray Shape (m,))  target value 
      w : (ndarray Shape (n,))  values of parameters of the model      
      b : (scalar)              value of bias parameter of the model
      lambda_ : (scalar,float)  regularization constant
    Returns
      dj_db : (scalar)             The gradient of the cost w.r.t. the parameter b. 
      dj_dw : (ndarray Shape (n,)) The gradient of the cost w.r.t. the parameters w. 

    """
    m, n = X.shape
    
    dj_db, dj_dw = compute_gradient(X, y, w, b)

    ### START CODE HERE ###     

    for i in range(n):
        dj_dw[i] += (lambda_ / m) * w[i]       
        
    ### END CODE HERE ###         
        
    return dj_db, dj_dw

In [36]:
# Initialize fitting parameters
np.random.seed(1)
initial_w = np.random.rand(X_mapped.shape[1])-0.5
initial_b = 0.8

# Set regularization parameter lambda_ (you can try varying this)
lambda_ = .0004  

# Some gradient descent settings
iterations = 100000
alpha = 0.01

w,b, J_history,_ = gradient_descent(X_mapped, y_train, initial_w, initial_b, 
                                    compute_cost_reg, compute_gradient_reg, 
                                    alpha, iterations, lambda_)

Iteration    0: Cost     1.04   
Iteration 10000: Cost     0.43   
Iteration 20000: Cost     0.43   
Iteration 30000: Cost     0.43   
Iteration 40000: Cost     0.43   
Iteration 50000: Cost     0.43   
Iteration 60000: Cost     0.43   
Iteration 70000: Cost     0.43   
Iteration 80000: Cost     0.43   
Iteration 90000: Cost     0.43   
Iteration 99999: Cost     0.43   


In [37]:
#Compute accuracy on the training set
p = predict(X_mapped, w, b)

print('Train Accuracy: %f'%(np.mean(p == y_train) * 100))

Train Accuracy: 84.580000


In [38]:
X_test, y_test = preprocess("Project1_data/unseen.csv")
test_mapped = map_feature(X_test[:, 0], X_test[:, 16])

In [39]:
print('Shape of X_test: ' + str(X_test.shape))
print('Shape of y_test: ' + str(y_test.shape))
print('Shape of test_mapped: ' + str(test_mapped.shape))

Shape of X_test: (500, 17)
Shape of y_test: (500,)
Shape of test_mapped: (500, 27)


In [40]:
p = predict(test_mapped, w, b)
print('Test Accuracy: %f'%(np.mean(p == y_test) * 100))

Test Accuracy: 84.800000
