## Введение

Работа по освоению линейной классификатор. Для этого считаю необходимым написать базовый алгоритм самостоятельно, а потом сравнить его с готовым решением из пакета sklearn. Для работы буду использовать модель линейной классификации с L2 регуляризацией и стохастический градиентный спуск как оптимизатор.

In [1]:
from torch.nn import Linear
import torch
import numpy as np
from sklearn.datasets import load_breast_cancer
import torch.nn.functional as F
from sklearn.model_selection import train_test_split

In [2]:
iris = load_breast_cancer()

In [3]:
train_X,test_X,train_Y,test_Y = train_test_split(iris.data,
                                                iris.target,
                                                test_size = 0.20,
                                                random_state = 42)
train_X_torch = torch.tensor(train_X, dtype = torch.float32)
train_Y_torch = torch.tensor(train_Y, dtype = torch.float32)
test_X_torch = torch.tensor(test_X, dtype = torch.float32)
test_Y_torch = torch.tensor(test_Y, dtype = torch.float32)

In [4]:
class LinearClassifier():
    def __init__(self,n_features,max_iter = 5000):
        self.n_features = n_features
        self.max_iter = max_iter
        self.model = torch.nn.Sequential()
        self.model.add_module('first', torch.nn.Linear(self.n_features,1))
        self.model.add_module('second', torch.nn.Sigmoid())
        self.optimizer = torch.optim.SGD(self.model.parameters(), lr = 0.001)
    
    def fit(self,design_matrix,target):
        for i in range(self.max_iter):
            sampling  = np.random.randint(0,design_matrix.shape[1],70)
            part_design_matrix = design_matrix[sampling]
            part_target_vector = target[sampling]
                
            pred = self.model(part_design_matrix)[:,0]
            loss = torch.mean(F.binary_cross_entropy(pred,
                                                    part_target_vector,
                                                    reduce = 'none'),dim = 0,keepdim = True)

            loss.backward()
            self.optimizer.step()
            self.optimizer.zero_grad()
                
        return self.model
    

            

In [5]:
torch_classifier = LinearClassifier(30)

In [6]:
fited = torch_classifier.fit(train_X_torch,train_Y_torch)



In [7]:
final_pred = fited(test_X_torch)[:,0]
final_pred = np.array(final_pred > 0.5)

In [8]:
final_pred

array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True])

In [9]:
test_Y_torch

tensor([1., 0., 0., 1., 1., 0., 0., 0., 1., 1., 1., 0., 1., 0., 1., 0., 1., 1.,
        1., 0., 0., 1., 0., 1., 1., 1., 1., 1., 1., 0., 1., 1., 1., 1., 1., 1.,
        0., 1., 0., 1., 1., 0., 1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 1., 1.,
        1., 1., 1., 0., 0., 1., 1., 0., 0., 1., 1., 1., 0., 0., 1., 1., 0., 0.,
        1., 0., 1., 1., 1., 0., 1., 1., 0., 1., 0., 0., 0., 0., 0., 0., 1., 1.,
        1., 1., 1., 1., 1., 1., 0., 0., 1., 0., 0., 1., 0., 0., 1., 1., 1., 0.,
        1., 1., 0., 1., 1., 0.])

In [10]:
np.mean(final_pred == test_Y_torch)

0.0