# Optimization w vector with LogReg Cost function

Для начала работы загруим необходимые библиотеки и инструменты 

In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.linear_model import LogisticRegression



import matplotlib.pyplot as plt

from scipy.optimize import fmin


В качестве рабочего набора данных будет использован встроенный набор данных breast_cancer, содержащий в себе 569 записей и информацию по 9 признакам. Для каждого из этих признаков были рассчитаны среднее значение, стандартная ошибка и «наихудшее» или наибольшее (среднее из трех самых больших значений) значение, что дало 30 признаков. Каждая из записей данных имеет метку Malignant (злокачественная опухоль) (212 записей) или Benign(доброкачественная опухоль) (357).

In [2]:
cancer = load_breast_cancer()

In [3]:
cancer_data = pd.DataFrame.from_dict(cancer['data'])
cancer_target = pd.DataFrame.from_dict(cancer['target'])

# add intercept
cancer_data.insert(0, 'x_0', 1)

print("cancer_data", cancer_data.shape)
print("cancer_target",cancer_target.shape)
print()

# split to train and test sets
X_train = cancer_data[0:457]
y_train = cancer_target [0:457]
X_test = cancer_data[457:]
y_test = cancer_target [457:]

print("Shape X_train", X_train.shape)
print("Shape y_train", y_train.shape)
print()
print("Shape X_test", X_test.shape)
print("Shape y_test", y_test.shape)

cancer_data (569, 31)
cancer_target (569, 1)

Shape X_train (457, 31)
Shape y_train (457, 1)

Shape X_test (112, 31)
Shape y_test (112, 1)


In [4]:
# create arrays
X_train1 = np.array(X_train.values)
y_train1 = np.array(y_train.values)
X_test1 = np.array(X_test.values)
y_test1 = np.array(y_test.values)

# initialise w - vector of parameters
w = np.ones(31)


w = np.matrix(w)
X = np.matrix(X_train1)
y = np.matrix(y_train1)

In [5]:
print('X', X.shape)
print('y', y.shape)
print('w', w.shape)

X (457, 31)
y (457, 1)
w (1, 31)


In [6]:
# hypothesis function for logistic regression

$$\large h_{w}{(x)} = \frac{1}{1+ e^{-w^Tx}}
$$

In [7]:
# the linear regression's cost function cannot be used in logistic regression problems

$$ \large \ J{(w)} = \frac{1}{m}\sum_{i=1}^m \frac{1}{2} ((h_{w}{(x^i)}) - y^i)^2
$$

$$ \large \ Cost(h_{w}{(x^{(i)})},{y^{(i)}}) = \frac{1}{2}(h_{w}{(x^{(i)})} - y^{(i)})^2
$$

$$ \large \ J{(w)} = \frac{1}{m}\sum_{i=1}^m Cost(h_{w}{(x^{(i)})},{y^{(i)}})
$$

In [8]:
# Logistic regression cost function


$$\large \
\begin{equation}
  Cost(h_{w}{(x)},{y})=\begin{cases}
    -log(h_{w}{(x)}), & \text{if $y = 1$}.\\
    -log(1 - h_{w}{(x)}), & \text{if $y = 0$}.
  \end{cases}
\end{equation}
$$




In [9]:
# formula without if/else statements 


$$\large \
\begin{equation}
  Cost(h_{w}{(x)},{y})= -ylog(h_{w}{(x)})-(1-y)(log(1 - h_{w}{(x)})
\end{equation}
$$

In [10]:
# define Cost Function for logistic regression with L2 regularization wich we should minimize

$$  J{(w)} = \frac{1}{m}\sum_{i=1}^m Cost(h_{w}{(x^{(i)})},{y^{(i)}}) = -[\frac{1}{m}\sum_{i=1}^m y^{(i)} log(\frac{1}{1+ e^{-w^Tx}})+(1-y^{(i)})log(1 - \frac{1}{1+ e^{-w^Tx}})] + \frac{\lambda}{2m}\sum_{j=1}^n w^2_j 
$$



 $ m  - \text{number of training examples}$  $n - \text {number of features }$  $x - \text{input variable/features }$ $y - \text{output variable/ target variable}$ $w - \text{parameters of the model }$ $ \lambda - \text{parameter of regularization}$ 

In [11]:
def cost_function_L2_penalty(w,X,y,learningRate):
    
    w = np.matrix(w)
    X = np.matrix(X)
    y = np.matrix(y)
    
    lg = np.log(1/(1 + np.exp(-X*w.T)))
    
    first = np.multiply(-y, lg)
    second = np.multiply((1-y), lg)
    
    penalty = learningRate/(2*len(X))*np.sum(w*w.T)
    
    return np.sum(first - second)/(len(X)) + penalty

In [104]:
learningRate = 0.1 # LearningRate is alpha = 1 / (n_samples * C)
cost_function_L2_penalty(w,X_train1,y_train1,learningRate)

0.003391684901531729

In [105]:
w_new = fmin(func = cost_function_L2_penalty, x0 = w, args=(X_train1,y_train1,learningRate), maxiter=100)
print(w_new)

[0.99740871 0.98382111 1.03154298 1.0352815  1.03425103 1.00777605
 0.98870541 1.0371062  1.00616981 0.9779672  0.99311712 0.9312597
 0.95998774 1.03402172 0.95327126 0.96552775 0.9855948  0.97447227
 1.01147925 0.98105309 0.99526063 1.01357562 0.99094533 1.01824129
 1.0234979  1.00954558 0.98634959 1.0158313  1.01906068 1.02079928
 0.94900147]


In [106]:
cost_function_L2_penalty(w_new,X_train1,y_train1,learningRate)

0.0033793181478710722

$$\large accuracy(y,\hat{y}) = {\frac{1}{n_{samples}} \sum_{i=0}^{n_{samples}-1}} 1(\hat{y}_i = y_i)$$ 

In [107]:
def accuracy(w,X_test,y_test):
    n = len(X_test)
    z = X_test.dot(w.T)
    prediction = 1/(1 + np.exp(-(z)))>= 0.5
    
    prediction = prediction.reshape(n,1)
    correct =  prediction == y_test
    accuracy = (np.sum(correct))/n
    #print('accuracy {:.3f}'.format(accuracy))
    return accuracy

In [108]:
print(accuracy(w_new, X_train1,y_train1))
print(accuracy(w_new, X_test1,y_test1))

0.5929978118161926
0.7678571428571429
