In [1]:
import numpy as np
from sklearn.datasets import make_blobs
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
import pandas as pd
from math import exp
%matplotlib inline


In [2]:
from sklearn.datasets import load_iris


загрузим датасет ирисов и возьмем только те цветы, что принадлежат к классам 0 и 1

In [4]:
iris = load_iris()
iris_df = pd.DataFrame(data= np.c_[iris['data'], iris['target']],
                     columns= iris['feature_names'] + ['target'])
iris_df = iris_df[iris_df['target']<2]
iris_df                       

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
0,5.1,3.5,1.4,0.2,0.0
1,4.9,3.0,1.4,0.2,0.0
2,4.7,3.2,1.3,0.2,0.0
3,4.6,3.1,1.5,0.2,0.0
4,5.0,3.6,1.4,0.2,0.0
5,5.4,3.9,1.7,0.4,0.0
6,4.6,3.4,1.4,0.3,0.0
7,5.0,3.4,1.5,0.2,0.0
8,4.4,2.9,1.4,0.2,0.0
9,4.9,3.1,1.5,0.1,0.0


In [5]:
def coefficients_sgd(train, l_rate, n_epoch):
    """Рассчет коэффициентов логистической регрессии с использованием стохастического градиентного спуска"""
    coef = [0.0 for i in range(len(train[0]))]
    for epoch in range(n_epoch):
        sum_error = 0
        for row in train:
            yhat = predict(row, coef)
            error = row[-1] - yhat
            sum_error += error**2
            coef[0] = coef[0] + l_rate * error * yhat * (1.0 - yhat)
            for i in range(len(row)-1):
                coef[i + 1] = coef[i + 1] + l_rate * error * yhat * (1.0 - yhat) * row[i]
        print('>epoch=%d, lrate=%.3f, error=%.3f' % (epoch, l_rate, sum_error))
    return coef

In [6]:
def predict(row, coefficients):
    """рассчет принадлежности к классу по рассчитанным коэффициентам"""
    yhat = coefficients[0]
    for i in range(len(row)-1):
        yhat += coefficients[i + 1] * row[i]
    return 1.0 / (1.0 + exp(-yhat))

обучение. Возьмем для обучения данные с 10 по 90 индекс, чтобы в выборку попали оба класса

In [7]:
dataset_train = iris_df[10:90].values 
l_rate = 0.3
n_epoch = 100
coef = coefficients_sgd(dataset_train, l_rate, n_epoch)
print(coef)

>epoch=0, lrate=0.300, error=5.373
>epoch=1, lrate=0.300, error=4.000
>epoch=2, lrate=0.300, error=2.370
>epoch=3, lrate=0.300, error=1.527
>epoch=4, lrate=0.300, error=1.039
>epoch=5, lrate=0.300, error=0.732
>epoch=6, lrate=0.300, error=0.548
>epoch=7, lrate=0.300, error=0.435
>epoch=8, lrate=0.300, error=0.365
>epoch=9, lrate=0.300, error=0.317
>epoch=10, lrate=0.300, error=0.281
>epoch=11, lrate=0.300, error=0.251
>epoch=12, lrate=0.300, error=0.226
>epoch=13, lrate=0.300, error=0.204
>epoch=14, lrate=0.300, error=0.186
>epoch=15, lrate=0.300, error=0.170
>epoch=16, lrate=0.300, error=0.156
>epoch=17, lrate=0.300, error=0.144
>epoch=18, lrate=0.300, error=0.134
>epoch=19, lrate=0.300, error=0.124
>epoch=20, lrate=0.300, error=0.116
>epoch=21, lrate=0.300, error=0.109
>epoch=22, lrate=0.300, error=0.103
>epoch=23, lrate=0.300, error=0.097
>epoch=24, lrate=0.300, error=0.092
>epoch=25, lrate=0.300, error=0.087
>epoch=26, lrate=0.300, error=0.083
>epoch=27, lrate=0.300, error=0.079
>e

In [8]:
from sklearn.metrics import classification_report


В тестовую выборку возьмем 10 первых и 10 последних значений из каждого класса

In [9]:
l1=iris_df['target'][:10].values
l2=iris_df['target'][-10:].values
y_test=[*l1,*l2]
l1=iris_df[:10].values
l2=iris_df[-10:].values
dataset_test = [*l1,*l2]
y_pred=[]
for i in range(len(dataset_test)):
    y_pred.append(round(predict(dataset_test[i],coef),0))
print (classification_report(y_test, y_pred))

              precision    recall  f1-score   support

         0.0       1.00      1.00      1.00        10
         1.0       1.00      1.00      1.00        10

   micro avg       1.00      1.00      1.00        20
   macro avg       1.00      1.00      1.00        20
weighted avg       1.00      1.00      1.00        20



In [10]:
for i in range(len(dataset_test)):
    print(predict(dataset_test[i], coef), dataset_test[i][-1])

0.006195550608282335 0.0
0.0166473061659142 0.0
0.009793424661238758 0.0
0.021166601025279567 0.0
0.0054414486155726725 0.0
0.00761850382735097 0.0
0.010701185927219196 0.0
0.010233145710562752 0.0
0.025298779227556172 0.0
0.016205608215220206 0.0
0.9971950605008825 1.0
0.9965190122593338 1.0
0.990239366586712 1.0
0.96685494660175 1.0
0.9945950561080902 1.0
0.9890325836117132 1.0
0.9918820973520033 1.0
0.9921127782505973 1.0
0.9053561202782355 1.0
0.9910680629160098 1.0
