In [51]:
from sklearn import datasets
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import time

In [52]:
iris = datasets.load_iris()

In [53]:
X = iris.data
y = iris.target

In [54]:
df = pd.DataFrame(X, columns=iris.feature_names)

In [55]:
df['target'] = iris.target
# df['species'] = df['species'].replace(to_replace= [0, 1, 2], value = ['setosa', 'versicolor', 'virginica'])

In [56]:
# 0 is setosa, we leave only
df = df[df.target != 0]

In [57]:
# df['target'] = df['target'].apply(lambda x : 1 if x == 2 else 0)

In [58]:
df['target'].replace({1: 0, 2: 1}, inplace=True)

In [59]:
df.reset_index(drop=True, inplace=True)

In [60]:
df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
0,7.0,3.2,4.7,1.4,0
1,6.4,3.2,4.5,1.5,0
2,6.9,3.1,4.9,1.5,0
3,5.5,2.3,4.0,1.3,0
4,6.5,2.8,4.6,1.5,0


### Градиентный спуск (функция потерь)

In [121]:
y = df['target']
X = df.drop('target', axis=1)

In [62]:
# результат регрессии и сигмоиды
def sigmoid(X, weight):
    z = np.dot(X, weight)
    return 1 / (1 + np.exp(-z))

In [13]:
# def loss(h, y):
#     return (-y * np.log(h) - (1 - y) * np.log(1 - h)).mean()

In [14]:
# считаем обычный градиент
def gradient_descent(X, h, y):
    return np.dot(X.T, (h - y)) / y.shape[0] # 100

In [15]:
# обновление тета методом градиента
def update_weight_loss(weight, learning_rate, gradient):
    return weight - learning_rate * gradient

In [16]:
start_time = time.time()

num_iter = 100000

# добавляем интерсепт к Х
intercept = np.ones((X.shape[0], 1))
X = np.concatenate((intercept, X), axis=1)

# задаем нулевые параметры
theta = np.zeros(X.shape[1])

for i in range(num_iter):
    # вычисляем значение/вероятность при заданных тета
    h = sigmoid(X, theta)
    
    # вычисляем градиент
    gradient = gradient_descent(X, h, y)
    
    # обновляем тета в направлении антиградиента
    theta = update_weight_loss(theta, 0.1, gradient)
    
print("Training time (using Gradient descent):" + str(time.time() - start_time) + " seconds")
print("Learning rate: {}\nIteration: {}".format(0.1, num_iter))

Training time (using Gradient descent):39.31449580192566 seconds
Learning rate: 0.1
Iteration: 100000


In [17]:
gradient

array([ 8.32356361e-04, -7.11931778e-05,  6.78869820e-06, -2.43973847e-05,
       -1.68496812e-04])

In [131]:
result = sigmoid(X, theta)

In [132]:
result_df = pd.DataFrame(np.around(result, decimals=6)).join(y)

In [133]:
result_df['pred'] = result_df[0].apply(lambda x : 0 if x < 0.5 else 1)

In [134]:
result_df.head()

Unnamed: 0,0,target,pred
0,0.0001,0,0
1,0.000628,0,0
2,0.003191,0,0
3,0.004074,0,0
4,0.007259,0,0


In [135]:
print("Accuracy (loss minimization):")
result_df.loc[result_df['pred']==result_df['target']].shape[0] / result_df.shape[0] * 100

Accuracy (loss minimization):


97.0

### Градиентный подъем (максимизация правдоподобия)

In [168]:
y = df['target']
X = df.drop('target', axis=1)

In [169]:
# def log_likelihood(x, y, weights):
#     z = np.dot(x, weights)
#     ll = np.sum( y*z - np.log(1 + np.exp(z)) )
#     return ll

In [170]:
# максимизируем функцию правдоподобия
def gradient_ascent(X, h, y):
    return np.dot(X.T, y - h)
def update_weight_mle(weight, learning_rate, gradient):
    return weight + learning_rate * gradient

In [171]:
start_time = time.time()
num_iter = 100000

intercept = np.ones((X.shape[0], 1))
X = np.concatenate((intercept, X), axis=1)
theta = np.zeros(X.shape[1])

for i in range(num_iter):
    h = sigmoid(X, theta)
    gradient = gradient_ascent(X, h, y) #np.dot(X.T, (h - y)) / y.size
    theta = update_weight_mle(theta, 0.1, gradient)
    
print("Training time (Log Reg using MLE):" + str(time.time() - start_time) + "seconds")
print("Learning rate: {}\nIteration: {}".format(0.1, num_iter))

Training time (Log Reg using MLE):11.094940662384033seconds
Learning rate: 0.1
Iteration: 100000


In [172]:
result = sigmoid(X, theta)

In [173]:
print("Accuracy (Maximum Likelihood Estimation):")
result_df = pd.DataFrame(result).join(y)
result_df.loc[result_df[0]==result_df['target']].shape[0] / result_df.shape[0] * 100

Accuracy (Maximum Likelihood Estimation):


32.0

### Nesterov Accelerated Gradient

In [143]:
y = df['target']
X = df.drop('target', axis=1)

In [144]:
# Nesterov Accelerated Gradient

In [145]:
# результат регрессии и сигмоиды
def sigmoid(X, weight):
    z = np.dot(X, weight)
    return 1 / (1 + np.exp(-z))

In [146]:
gamma = 0.9

In [147]:
def nesterov_gradient(X, h, y):
    return np.dot(X.T, (h - y)) / y.shape[0] # 100 

In [148]:
# обновление тета nesterov momentum
def nesterov_momentum(weight, learning_rate, gradient, gamma, prev_gradient):
    return weight - (prev_gradient * gamma + (learning_rate * (gradient - prev_gradient * gamma)))

In [149]:
# задаем переменную для предышущего значения градиента
prev_gradient = np.zeros(5)
prev_gradient

array([0., 0., 0., 0., 0.])

In [150]:
start_time = time.time()

num_iter = 100000

# добавляем интерсепт к Х
intercept = np.ones((X.shape[0], 1))
X = np.concatenate((intercept, X), axis=1)

# задаем нулевые параметры
theta = np.zeros(X.shape[1])

for i in range(num_iter):
    # вычисляем значение/вероятность при заданных тета
    h = sigmoid(X, theta)
    
    # вычисляем градиент с учетом предыдущего значения и гамма
    gradient = nesterov_gradient(X, h, y)
    
    # обновляем тета в направлении антиградиента
    theta = nesterov_momentum(theta, 0.1, gradient, gamma, prev_gradient)
    
    # записываем предыдущее значение градиента
    prev_gradient = gradient
    
print("Training time (using Gradient descent):" + str(time.time() - start_time) + " seconds")
print("Learning rate: {}\nIteration: {}".format(0.1, num_iter))

Training time (using Gradient descent):13.780502319335938 seconds
Learning rate: 0.1
Iteration: 100000


In [151]:
result = sigmoid(X, theta)

In [152]:
result_df = pd.DataFrame(np.around(result, decimals=6)).join(y)

In [153]:
result_df['pred'] = result_df[0].apply(lambda x : 0 if x < 0.5 else 1)

In [154]:
print("Accuracy (Nesterov Momentum):")
result_df.loc[result_df['pred']==result_df['target']].shape[0] / result_df.shape[0] * 100

Accuracy (Nesterov Momentum):


98.0

### RMSProp

In [187]:
y = df['target']
X = df.drop('target', axis=1)

In [188]:
# результат регрессии и сигмоиды
def sigmoid(X, weight):
    z = np.dot(X, weight)
    return 1 / (1 + np.exp(-z))

In [189]:
# задаем гамму
gamma = 0.9
# задаем сглаживающий параметр
e = 0.01

In [190]:
# переменная для градиентов
prev_gradient = np.zeros(5)
prev_gradient

array([0., 0., 0., 0., 0.])

In [191]:
# создаем список, в который записываем предыдущие градиенты
list_gradients = []
list_gradients.append(prev_gradient)
list_gradients

[array([0., 0., 0., 0., 0.])]

In [192]:
def rmsprop_gradient(X, h, y):
    return np.dot(X.T, (h - y)) / y.shape[0] # 100

In [193]:
# задаем усреднённый по истории квадрат градиента
def running_average(list_gradients):
    return (np.mean(list_gradients[-3:], axis=0)) ** 2

In [194]:
# обновление тета nesterov momentum
def rmsprop_change(weight, learning_rate, gradient, gamma, ra):
    ra_inter = gamma * ra + (1 - gamma) * (gradient ** 2)
    return weight - (learning_rate / np.sqrt(ra_inter + e)) * gradient

In [195]:
start_time = time.time()

num_iter = 100000

# добавляем интерсепт к Х
intercept = np.ones((X.shape[0], 1))
X = np.concatenate((intercept, X), axis=1)

# задаем нулевые параметры
theta = np.zeros(X.shape[1])

for i in range(num_iter):
    # вычисляем значение/вероятность при заданных тета
    h = sigmoid(X, theta)
    
    # вычисляем градиент с учетом предыдущего значения и гамма
    gradient = rmsprop_gradient(X, h, y)
    
    # вычисляем бегущее среднее
    ra = running_average(list_gradients)
    
    # обновляем тета
    theta = rmsprop_change(theta, 0.1, gradient, gamma, ra)
    
    # записываем предыдущее значение градиента в список
    list_gradients.append(gradient)
    
print("Training time (using Gradient descent):" + str(time.time() - start_time) + " seconds")
print("Learning rate: {}\nIteration: {}".format(0.1, num_iter))

Training time (using Gradient descent):15.863286018371582 seconds
Learning rate: 0.1
Iteration: 100000


In [196]:
result = sigmoid(X, theta)

In [197]:
result_df = pd.DataFrame(np.around(result, decimals=6)).join(y)

In [198]:
result_df['pred'] = result_df[0].apply(lambda x : 0 if x < 0.5 else 1)

In [199]:
print("Accuracy (RMSProp):")
result_df.loc[result_df['pred']==result_df['target']].shape[0] / result_df.shape[0] * 100

Accuracy (RMSProp):


98.0