In [3]:
# Прочитать про методы оптимизации для нейронных сетей https://habr.com/post/318970/
# Реализовать самостоятельно логистическую регрессию
#     Обучить ее методом градиентного спуска
#     Методом nesterov momentum
#     Методом rmsprop

In [7]:
import numpy as np
from sklearn.datasets import load_iris

In [21]:
x = load_iris()

In [102]:
X,y = x.data, x.target

In [103]:
X.shape

(150, 4)

In [104]:
y.shape

(150,)

In [105]:
X_01 = X[np.any([y == 0,y == 1], axis = 0)]
y_01 = y[np.any([y == 0,y == 1], axis = 0)]

In [106]:
print(X_01.shape)
print(y_01.shape)

(100, 4)
(100,)


In [128]:
shuffle_index = np.random.permutation(X_01.shape[0])
X_01_shuffled, y_01_shuffled = X_01[shuffle_index], y_01[shuffle_index]

train_proportion = 0.8
train_test_cut = int(len(X_01)*train_proportion)

X_train, X_test, y_train, y_test = \
    X_01_shuffled[:train_test_cut], \
    X_01_shuffled[train_test_cut:], \
    y_01_shuffled[:train_test_cut], \
    y_01_shuffled[train_test_cut:]

In [110]:
X_train_normalised = X_train/255.0
X_test_normalised = X_test/255.0

In [130]:
X_train_tr = X_train_normalised.transpose()
y_train_tr = y_train.reshape(1,y_train.shape[0])
X_test_tr = X_test_normalised.transpose()
y_test_tr = y_test.reshape(1,y_test.shape[0])

dim_train = X_train_tr.shape[1]
dim_test = X_test_tr.shape[1]

In [131]:
y_train_shifted = y_train_tr - 1
y_test_shifted = y_test_tr - 1

In [132]:
Xtrain = X_train_tr
ytrain = y_train_shifted
Xtest = X_test_tr
ytest = y_test_shifted

In [133]:
def sigmoid(z):
    s = 1.0 / (1.0 + np.exp(-z))
    return s

In [135]:
def initialize(dim):
    w = np.zeros((dim,1))
    b = 0
    assert (w.shape == (dim,1))
    assert (isinstance(b, float) or isinstance(b,int))
    return w,b

In [136]:
def propagate(w, b, X, Y):
    m = X.shape[1]
    z = np.dot(w.T,X)+b
    A = sigmoid(z)
    cost = -1.0/m*np.sum(Y*np.log(A)+(1.0-Y)*np.log(1.0-A))
    dw = 1.0/m*np.dot(X, (A-Y).T)
    db = 1.0/m*np.sum(A-Y)
    assert (dw.shape == w.shape)
    assert (db.dtype == float)
    cost = np.squeeze(cost)
    assert (cost.shape == ())
    grads = {"dw": dw, 
             "db":db}
    return grads, cost

In [137]:
def optimize(w, b, X, Y, num_iterations, learning_rate, print_cost = False):
    costs = []
    for i in range(num_iterations):
        grads, cost = propagate(w, b, X, Y)
        dw = grads["dw"]
        db = grads["db"]
        w = w - learning_rate*dw
        b = b - learning_rate*db
        if i % 100 == 0:
            costs.append(cost)
            
        #if print_cost and i % 100 == 0:
        #    print ("Cost (iteration %i) = %f" %(i, cost))
            
    grads = {"dw": dw, "db": db}
    params = {"w": w, "b": b}
        
    return params, grads, costs

In [138]:
def predict (w, b, X):    
    m = X.shape[1]
    Y_prediction = np.zeros((1,m))
    w = w.reshape(X.shape[0],1)
    A = sigmoid (np.dot(w.T, X)+b)
    for i in range(A.shape[1]):
        if (A[:,i] > 0.5): 
            Y_prediction[:, i] = 1
        elif (A[:,i] <= 0.5):
            Y_prediction[:, i] = 0
            
    assert (Y_prediction.shape == (1,m))
    
    return Y_prediction

In [139]:
def model (X_train, Y_train, X_test, Y_test, num_iterations = 1000, learning_rate = 0.5, print_cost = False):
    
    w, b = initialize(X_train.shape[0])
    parameters, grads, costs = optimize(w, b, X_train, Y_train, num_iterations, learning_rate, print_cost)
    
    w = parameters["w"]
    b = parameters["b"]
    
    Y_prediction_test = predict (w, b, X_test)
    Y_prediction_train = predict (w, b, X_train)
    
    train_accuracy = 100.0 - np.mean(np.abs(Y_prediction_train-Y_train)*100.0)
    test_accuracy = 100.0 - np.mean(np.abs(Y_prediction_test-Y_test)*100.0)
    
    d = {"costs": costs,
        "Y_prediction_test": Y_prediction_test,
        "Y_prediction_train": Y_prediction_train,
         "w": w,
         "b": b,
         "learning_rate": learning_rate,
         "num_iterations": num_iterations}
    
    print ("Accuarcy Test: ",  test_accuracy)
    print ("Accuracy Train: ", train_accuracy)
    
    return d

In [140]:
d = model (Xtrain, 
           ytrain, 
           Xtest, 
           ytest, 
           num_iterations = 4000, 
           learning_rate = 0.05, 
           print_cost = True)

Accuarcy Test:  65.0
Accuracy Train:  46.25


In [141]:
ypred = d["Y_prediction_test"]
ypred_ravel = ypred.ravel()
ytest_ravel = ytest.ravel()

from sklearn.metrics import confusion_matrix

confusion_matrix (ytest_ravel, ypred_ravel)

array([[ 0,  7],
       [ 0, 13]])

In [142]:
from sklearn.linear_model import LogisticRegression
logistic = LogisticRegression()

In [143]:
XX = Xtrain.T
YY = ytrain.T.ravel()

logistic.fit(XX,YY)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)

In [144]:
logistic.score(XX,YY)

0.5375

In [145]:
sum(logistic.predict(XX) == YY) / len(XX)

0.5375