In [1]:
import pandas as pd
import numpy as np
import csv

data=pd.read_csv("nhanes.csv", delimiter= ',')
data.set_index("ID", inplace=True)
data.drop("BMI", axis=1, inplace=True)
data.fillna(data.median(), inplace=True)

In [2]:
def normalizeFeatures(X):
    X_norm=X
    mu=np.mean(X)
    sigma=np.std(X)
    X_norm=(X_norm-mu)/sigma
    return X_norm

def sigmoid(x, theta):
    d = 1.0 + np.e ** (sum([-theta[i] * x[i] for i in range (len(theta))]))
    sigmoid_func = 1.0 / d
    return sigmoid_func

def compute_cost(theta,X,y): 
    numb_X = X.shape[0]
    J=0
    for i in range(numb_X):
        J+=y[i]*np.log(sigmoid(X[i], theta))+(1-y[i])*np.log(1-sigmoid(X[i],theta))
    return J/numb_X

def compute_grad(theta, X, y):
    numb_X=X.shape[0]
    grad=np.zeros(len(theta))
    for i in range(numb_X):
        grad+=(y[i]-sigmoid(X[i],theta))*X[i]
    return  grad/numb_X

def gradient_descent(X,y,theta,alpha,max_iter,eps):
    iter=0
    converged=False
    print "\n#----------------------------------------------------#"
    while not converged:
        old_error=compute_cost(theta, X, y)
        theta=theta+alpha*(compute_grad(theta,X,y))
        new_error=compute_cost(theta,X,y)
        print "  Iteration n:", iter, " with an error of:", new_error
        if abs(old_error-new_error)<=eps:
            converged=True
            print "#----------------------------------------------------#"
            print "\n           Converged.\n"
            print "         Theta:\n", theta
            print ""
        else:
            old_error=new_error
        if iter==max_iter:
            converged=True
        iter+=1
    return theta

def obese_results(theta, X):
    m, n = X.shape
    r = np.zeros(shape=(m, 1))
    h = sigmoid(X, theta)
    for i in range(0, h.shape[0]):
        if h[i] > 0.5:
            r[i, 0] = 1
        else:
            r[i, 0] = 0
    return r

In [3]:
data_new=normalizeFeatures(data[data.columns[:-1]])
X=data_new.values
m=np.ones((len(data_new),1 ), dtype=int)
X=np.concatenate((m, X), axis=1)
y=data['OBESE'].tolist()
theta= np.ones(19, dtype = float)

In [4]:
# 1)
m, n = X.shape
it = np.ones(shape=(m, 19))
theta_fin=gradient_descent(X,y,theta,20,50,0.002)


#----------------------------------------------------#
  Iteration n: 0  with an error of: -1.06569808974
  Iteration n: 1  with an error of: -0.198314317442
  Iteration n: 2  with an error of: -0.110867006716
  Iteration n: 3  with an error of: -0.0860062789751
  Iteration n: 4  with an error of: -0.0730637625672
  Iteration n: 5  with an error of: -0.0648782131335
  Iteration n: 6  with an error of: -0.0593955105438
  Iteration n: 7  with an error of: -0.0555853917685
  Iteration n: 8  with an error of: -0.0528473036772
  Iteration n: 9  with an error of: -0.0508176225628
  Iteration n: 10  with an error of: -0.0492711401876
#----------------------------------------------------#

           Converged.

         Theta:
[ -6.86991923e+00   8.18350843e-01  -1.34330292e-02   2.05691209e-03
  -2.18323074e-01  -2.73768140e-02  -1.36088381e-01   6.72268917e-02
  -2.68452084e-01   1.32694682e-01  -3.17074375e-02   7.30211801e+00
  -3.58723082e+00  -8.19154779e-03   3.99867591e-02   5.179842

In [5]:
# 2)

r = obese_results(theta, X)
print "		   Predicted results of obese:\n", r

		   Predicted results of obese:
[[ 1.]
 [ 0.]
 [ 0.]
 ..., 
 [ 0.]
 [ 0.]
 [ 0.]]
