# Домашнее задание "Функции потерь и оптимизация"

In [1]:
import numpy as np
import pandas as pd
import random
import matplotlib.pyplot as plt

from sklearn.datasets import load_iris

## Загрузка данных в датафрейм

In [2]:
data = load_iris()

In [3]:
data.target_names

array(['setosa', 'versicolor', 'virginica'], dtype='<U10')

In [4]:
df = pd.DataFrame(data.data, columns=data.feature_names)
df['target'] = data.target

Добавим колонку с названиями ирисов

In [5]:
target_names = {
    0: 'setosa',
    1: 'versicolor',
    2: 'virginica',
}

df['target_names'] = df['target'].map(target_names)
df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target,target_names
0,5.1,3.5,1.4,0.2,0,setosa
1,4.9,3.0,1.4,0.2,0,setosa
2,4.7,3.2,1.3,0.2,0,setosa
3,4.6,3.1,1.5,0.2,0,setosa
4,5.0,3.6,1.4,0.2,0,setosa


Удалим данные по ирисам класса **Setosa**

In [6]:
df = df.loc[df.target_names != 'setosa']

Изменим значения в столбце "target" на 0 и 1

In [7]:
df.loc[df['target'] == 1, 'target'] = 0
df.loc[df['target'] == 2, 'target'] = 1
df

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target,target_names
50,7.0,3.2,4.7,1.4,0,versicolor
51,6.4,3.2,4.5,1.5,0,versicolor
52,6.9,3.1,4.9,1.5,0,versicolor
53,5.5,2.3,4.0,1.3,0,versicolor
54,6.5,2.8,4.6,1.5,0,versicolor
...,...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,1,virginica
146,6.3,2.5,5.0,1.9,1,virginica
147,6.5,3.0,5.2,2.0,1,virginica
148,6.2,3.4,5.4,2.3,1,virginica


## Реализация логистической регрессии

In [8]:
# сигмоидная функция
def sigmoid(x):
    return np.exp(x) / (1 + np.exp(x))

# функция оценки
def score(weights, bias, features):
    return np.dot(weights, features) + bias

# функция прогноза
def prediction(weights, bias, features):
    return sigmoid(score(weights, bias, features))

In [9]:
# логарифмическая потеря для одной записи
def log_loss(weights, bias, features, label):
    pred = prediction(weights, bias, features)
    return -label * np.log(pred) - (1 - label) * np.log(1 - pred)

# логарифмическая потеря по всему набору данных
def total_log_loss(weights, bias, features, labels):
    total_error = 0
    for i in range(len(features)):
        total_error += log_loss(weights, bias, features[i], labels[i])
    return total_error

In [10]:
# функция изменения весов и смещения
def logistic_trick(weights, bias, features, label, learning_rate=0.01):
    pred = prediction(weights, bias, features)
    for i in range(len(weights)):
        weights[i] += (label - pred) * features[i] * learning_rate
        bias += (label - pred) * learning_rate
    return weights, bias

In [11]:
# Функция алгоритма логистической регрессии
def logistic_regression_algorithm(features, labels, learning_rate=0.01, epochs=1000):
    weights = [1.0 for i in range(len(features[0]))]
    bias = 0.0
    errors = []
    for i in range(epochs):
        errors.append(total_log_loss(weights, bias, features, labels))
        j = random.randint(0, len(features) - 1)
        weights, bias = logistic_trick(weights, bias, features[j], labels[j])
    return weights, bias

In [12]:
df.columns

Index(['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)',
       'petal width (cm)', 'target', 'target_names'],
      dtype='object')

In [13]:
features = np.array(df[['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)',
       'petal width (cm)']])

In [14]:
features[:5]

array([[7. , 3.2, 4.7, 1.4],
       [6.4, 3.2, 4.5, 1.5],
       [6.9, 3.1, 4.9, 1.5],
       [5.5, 2.3, 4. , 1.3],
       [6.5, 2.8, 4.6, 1.5]])

In [15]:
labels = np.array(df['target'])
labels

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])

In [16]:
logistic_regression_algorithm(features, labels)

([-0.707461023369215,
  -0.14832150600446886,
  1.0203563425887252,
  1.3861760725711278],
 -1.8273101700601924)

## Реализация метода градиентного спуска