In [1]:
import numpy as np
import pandas as pd
from pandas import DataFrame
from sklearn import datasets
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt


In [2]:
iris = datasets.load_iris()

In [3]:
print(iris.DESCR)

.. _iris_dataset:

Iris plants dataset
--------------------

**Data Set Characteristics:**

    :Number of Instances: 150 (50 in each of three classes)
    :Number of Attributes: 4 numeric, predictive attributes and the class
    :Attribute Information:
        - sepal length in cm
        - sepal width in cm
        - petal length in cm
        - petal width in cm
        - class:
                - Iris-Setosa
                - Iris-Versicolour
                - Iris-Virginica
                
    :Summary Statistics:

                    Min  Max   Mean    SD   Class Correlation
    sepal length:   4.3  7.9   5.84   0.83    0.7826
    sepal width:    2.0  4.4   3.05   0.43   -0.4194
    petal length:   1.0  6.9   3.76   1.76    0.9490  (high!)
    petal width:    0.1  2.5   1.20   0.76    0.9565  (high!)

    :Missing Attribute Values: None
    :Class Distribution: 33.3% for each of 3 classes.
    :Creator: R.A. Fisher
    :Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov)
    :

In [16]:
df_iris = DataFrame(iris.data)
df_iris.columns = iris.feature_names
df_iris['target'] = iris.target
df_iris['name'] = df_iris.target.apply(lambda x : iris.target_names[x])

In [17]:
df_iris.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target,name
0,5.1,3.5,1.4,0.2,0,setosa
1,4.9,3.0,1.4,0.2,0,setosa
2,4.7,3.2,1.3,0.2,0,setosa
3,4.6,3.1,1.5,0.2,0,setosa
4,5.0,3.6,1.4,0.2,0,setosa


Оставляем два класса:

In [18]:
df_iris_1 = df_iris[df_iris.target != 0] 

df_iris_1.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target,name
50,7.0,3.2,4.7,1.4,1,versicolor
51,6.4,3.2,4.5,1.5,1,versicolor
52,6.9,3.1,4.9,1.5,1,versicolor
53,5.5,2.3,4.0,1.3,1,versicolor
54,6.5,2.8,4.6,1.5,1,versicolor


# Градиентный спуск

In [19]:
X = []
y = []
for idx, item in enumerate(iris.target):
    if item != 0:
        X.append(iris.data[idx])
        y.append(item-1)

X = np.asarray(X)
y = np.asarray(y)
print(y)

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]


In [20]:
def predict(coefs, X):
    predict = coefs[0] + coefs[1] * X[:, 0] + coefs[2] * X[:, 1] + coefs[3] * X[:, 2] + coefs[4] * X[:, 3]
    sigm = 1. / (1 + np.exp(-predict))
    return sigm

In [21]:
lr = 0.05
coefs = []
coefs = np.random.normal(size=(5,))
print(coefs)
grad_loss_hist = []
for i in range(100):
    sigm = predict(coefs, X)
    coefs[0] -= lr * np.sum(sigm - y)/len(sigm)
    coefs[1] -= lr * np.sum((sigm - y) * X[:, 0])/len(sigm)
    coefs[2] -= lr * np.sum((sigm - y) * X[:, 1])/len(sigm)
    coefs[3] -= lr * np.sum((sigm - y) * X[:, 2])/len(sigm)
    coefs[4] -= lr * np.sum((sigm - y) * X[:, 3])/len(sigm)
    loss = - np.mean(np.log(sigm) * y + np.log(1 - sigm) * (1 - y))
    #print(coefs)
    grad_loss_hist.append(loss)

[ 0.66963899 -0.53203818  0.39126919  1.22274523 -1.04595505]


In [22]:
err1 = 0
for idx, item in enumerate(np.around(predict(coefs, X))):
    if item != y[idx]:
        err1 += 1
err1

16

# Метод RMSProp

In [23]:
# обучение модели методом RMSProp
e_sq_grad = np.zeros(5)
grad = np.zeros(5)
print(coefs)
rmsp_loss_hist = []
gamma = 0.975
eps = 0.00000001
for i in range(100):
    sigm = predict(coefs, X)
    loss = - np.mean(np.log(sigm) * y + np.log(1 - sigm) * (1 - y))
    rmsp_loss_hist.append(loss)
    grad[0] = np.sum(sigm - y)/len(sigm)
    grad[1] = np.sum((sigm - y) * X[:, 0])/len(sigm)
    grad[2] = np.sum((sigm - y) * X[:, 1])/len(sigm)
    grad[3] = np.sum((sigm - y) * X[:, 2])/len(sigm)
    grad[4] = np.sum((sigm - y) * X[:, 3])/len(sigm)
    
    e_sq_grad = gamma * e_sq_grad + (1 - gamma)  * grad ** 2
    
    coefs -= lr * grad / np.sqrt(e_sq_grad + eps)
    #print(coefs)

[ 0.46119414 -1.09329761  0.02076599  1.52778    -0.6160565 ]


In [24]:
# количество ошибок при классификации
err2 = 0
for idx, item in enumerate(np.around(predict(coefs, X))):
    if item != y[idx]:
        err2 += 1
err2

5

# Метод nesterov momentum

In [25]:
vel_pred = np.zeros(5)
vel = np.zeros(5)
print(coefs)
nest_loss_hist = []
gamma = 0.975
for i in range(100):
    sigm = predict(coefs, X)
    loss = - np.mean(np.log(sigm) * y + np.log(1 - sigm) * (1 - y))
    nest_loss_hist.append(loss)
    
    sigm = predict(coefs - gamma * vel_pred, X)
    
    vel[0] = (gamma * vel_pred[0] + lr * np.sum(sigm - y))/len(sigm)
    vel[1] = (gamma * vel_pred[1] + lr * np.sum((sigm - y) * X[:, 0]))/len(sigm)
    vel[2] = (gamma * vel_pred[2] + lr * np.sum((sigm - y) * X[:, 1]))/len(sigm)
    vel[3] = (gamma * vel_pred[3] + lr * np.sum((sigm - y) * X[:, 2]))/len(sigm)
    vel[4] = (gamma * vel_pred[4] + lr * np.sum((sigm - y) * X[:, 3]))/len(sigm)
    coefs -= vel
    #print(coefs)

    vel_pred = vel

[-1.07529465 -1.71590111 -0.92952653  2.48729444  1.62380897]


In [26]:
err3 = 0
for idx, item in enumerate(np.around(predict(coefs, X))):
    if item != y[idx]:
        err3 += 1
err3

4

In [27]:
m = ['Градиентный спуск', 'RMSProp', 'nesterov momentum']

error = [err1, err2, err3] 

data = pd.DataFrame({'model#': m, 
                     'error': error
                    
                    }                                      
                   )
data.head(3)

Unnamed: 0,model#,error
0,Градиентный спуск,16
1,RMSProp,5
2,nesterov momentum,4


Исходя из количества ошибок Нестеров моментум показал наилучший результат.