In [1]:
import numpy as np
import matplotlib.pyplot as plt
import math
import pandas as pd

In [3]:
def compute_cost_logistic_regression(X, y, w, b, lambda_l = 1):
    '''
    Description: computes the cost for regularized logistic regression
    ( the term with the numerator lambda helps to minimize the size of the parameters 
    (w1, w2,...),
    but I use the version where b is not regularized )
    Preconditions: X - m x n matrix which contains our data 
                    y - m size array - target values
                    w - n size array - model parameters
                    b - real number - free term parameter
                    lambda_l - real number - quantifier for regularization
    Postconditons: total_cost - real number - the cost of our calculations, which will help
    later to build a better model
    '''
    m = X.shape[0] # number of input data examples
    n = X.shape[1] # number of features of each example

    cost = 0.0
    for i in range(m):
        z = np.dot(X[i],w)+b
        f_wb = 1/(1+e^(-z))             # the formula of logistic regression model
        cost += -y[i] * np.log(f_wb) - (1-y[i]) * np.log(1 - f_wb)
    cost /= m

    # the regularization part
    reg_cost = 0
    for j in range(n):
        reg_cost += w[j]**2
    reg_cost *= lambda_l/(2*m)

    # adding the 2 costs
    total_cost = cost + reg_cost
    return total_cost

In [8]:
def compute_gradient(X, y, w, b, lambda_l):
    '''
    Description: computes the partial derivative of the w vector and b scalar used in the gradient descent algorithm 
    Preconditions: X - m x n matrix which contains our data 
                    y - m size array - target values
                    w - n size array - model parameters
                    b - real number - free term parameter
                    lambda_l - real number - quantifier for regularization
    Postconditons: dj_dw - n array (size of w) and dj_db - scalar
    '''
    m = X.shape[0]
    n = X.shape[1]

    dj_dw= np.zeros((n,))    # null vector of n size
    dj_db = 0.0

    for i in range(m):
        z = np.dot(X[i],w)+b
        f_wb = 1/(1+e^(-z))
        dj_db += f_wb - y[i] # forming the derivative of b 

       # forming the gradient of w - unregularized part for now
        for j in range(n):
            dj_dw[j] += (f_wb - y[i])*X[i,j]

    dj_dw /= m
    dj_db /= m

    #now add the regularization part for the w feature vector
    for j in range(n):
        dj_dw[j] += (lambda_l/m) * w[j]

    return dj_db, dj_dw

In [9]:
# apply z-score normalization to bring our input data in the same range, so our model will perform faster and better

def zscore_normalize_features(X): 
    # description: computes  X, zcore normalized by column
    # pre: X - mxn array
    # post: X_norm : normalized data 

    # find the mean of each feature, by columns
    mu = np.mean(X, axis = 0) # nD array
    # find the standard deviation of each column
    sigma = np.std(X, axis=0) # nD array
    # apply the formula of the z-score normalization
    X_norm = (X - mu) / sigma      

    return X_norm

In [12]:
# this is where FOOD INPUT DATA is taken as input

'''
We have 25 examples of healthy and unhealthy foods.
As target values, we have the classes healthy/unhealthy, marked with 1(healthy)/0(unhealthy)       - OUTPUT
Each food has 5 features (measured in 100g or ml):  
 - Total Fat (g)
 - Saturated Fat (g)
 - Carbohydrate (g)                                                            - INPUT
 - Total Sugars (g) 
 - Protein (g)
 '''

X = np.array([[0.5,0.1,4.9,4.9,0.5],
              [3.1,0.43,47.5,0.53,8.7],
              [11,3,14,1,1]])
'''
  [8.7,2300,30],
  [8.7,2300,36],
  [8.6,2300,20],
  [7.1,1400,16],  
 [8.8,2100,32],
  [8.7,2000,25],
  [7.7,1500,20],   
  [9,3300,15],
  [8.1,1800,22],
  [8.4,2200,25],
  [8.9,2300,30],   
  [8.5,2000,22],
  [7.9,1900,18],
  [8,2300,16],
  [8.1,2200,16],
  [8.6,2500,18],
  [8.3,2400,15],
   [8.5,2200,15],
    [8.9,1000,18],
    [8.1,1000,18],
    [7.7,1500,20],
    [7.6,1700,18]])
'''
X_norm = zscore_normalize_features(X)
y = np.array([0,1,0])   #,140,185,171,150,200 ,220 ,250,130,200,185,195,210,250,230,220,160,150,180,250,220,230,210]) 
   # it does matter whether the target data is sorted or not
