In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

class MyLogReg():

    def __init__(self, n_iter=5000, learning_rate=0.01, weights=None, metric=None, reg=None,
                l1_coef=0, l2_coef=0):
        self.n_iter = n_iter
        self.learning_rate = learning_rate
        self.weights = weights
        self.metric = metric
        self.metric_value = None
        self.reg = reg
        self.l1_coef = l1_coef
        self.l2_coef = l2_coef 
        
    def __str__(self):
        return f'MyLogReg class: n_iter={self.n_iter}, learning_rate={self.learning_rate}'
    
    def fit(self, X, y, verbose=False):
        n = len(X)
        y = np.array(y)
        m = len(X.columns)
        X.insert(0, 'x0', [1] * n)
        self.weights = np.zeros(m + 1)
        eps = 1e-15
        error = []
        for i in range(self.n_iter):
            y_pred = 1 / (1 + np.exp(-np.dot(X, self.weights)))
            LogLoss = -(1 / n) * np.sum(y * np.log(y_pred) + (1 - y) * np.log(1 - y_pred))  + self.regularization()[0] / n
            grad = (1 / n) * np.dot((y_pred - y), X) + self.regularization()[1]  / n
            self.weights = self.weights - self.learning_rate * grad
            error.append(LogLoss)
        del X['x0']
        return (self.weights, error)
        
    def get_coef(self):
        return np.sum(self.weights[1:])
    
    def predict_proba(self, X):
        if not(len(self.weights) == len(X.columns)):
            n = len(X)
            X.insert(0, 'x0', [1] * n)
        prb = 1 / (1 + np.exp(-np.dot(X, self.weights)))
        return prb
    
    def predict(self, X):
        if not(len(self.weights) == len(X.columns)):
            n = len(X)
            X.insert(0, 'x0', [1] * n)
        prb = 1 / (1 + np.exp(-np.dot(X, self.weights)))
        y_pred = self.classification(prb, 0.5)
        return int(sum(y_pred))
    
    def metrics(self, y_pred, y):
            TP = np.count_nonzero((y_pred == 1) & (y_pred == y))
            TN = np.count_nonzero((y_pred == 0) & (y_pred == y))
            FN = np.count_nonzero((y_pred == 0) & (y_pred != y))
            FP = np.count_nonzero((y_pred == 1) & (y_pred != y))
            accuracy = (TP + TN) / (TP + TN + FN + FP)
            metrics = {'accuracy': accuracy}
            return metrics[self.metric]
    
    def classification(self, pred):
        classes = np.zeros_like(pred)
        classes[pred >= 0] = 1
        classes[pred < 0] = 0
        return classes
    
    def get_best_score(self):
        return self.metric_value
    
    def regularization(self):
        l1 = self.l1_coef * sum(np.abs(self.weights[1:]))
        l2 = self.l2_coef * sum((self.weights[1:])**2)
        gr1 = np.zeros_like(self.weights)
        gr1[1:] = self.l1_coef * np.sign(self.weights[1:])
        gr2 = np.zeros_like(self.weights)
        gr2[1:] = 2 * self.l2_coef * self.weights[1:]
        regul = {'l1': l1,
                 'l2': l2,
                 'elasticnet': l1 + l2,
                 None: 0}
        regul_grad = {'l1': gr1,
                      'l2': gr2,
                      'elasticnet': gr1 + gr2,
                      None: 0}
        return (regul[self.reg], regul_grad[self.reg])

In [2]:
def features(x1, x2):
    lst = []
    for i in range(31):
        for j in range(31):
            if (i + j) <= 30:
                lst.append((x1 ** i) * (x2 ** j))
    data = np.array(lst)
    data1 = data.transpose()
    df = pd.DataFrame({i + i: data[:, i] for i in range(data1.shape[0])})
    new = df.T
    return new

In [3]:
def normalize(df):
    df = df.to_numpy()
    arr = df.transpose()
    new = np.zeros_like(arr)
    for i in range(arr.shape[0]):
        new[i,:] = (arr[i,:] - np.mean(arr[i,:])) / np.std(arr[i,:])
    new_transpose = new.transpose()
    res = pd.DataFrame(data=new_transpose)
    return res

In [4]:
from sklearn.model_selection import train_test_split
df = pd.read_csv('ex2data2.csv', names=['x1', 'x2', 'y'])
X = features(np.array(df['x1']), np.array(df['x2']))
X_train, X_test, y_train, y_test = train_test_split(X, df['y'], test_size=0.3, random_state=42)
X_test, X_val, y_test, y_val = train_test_split(X_test, y_test, test_size=0.3, random_state=42)
X_train = normalize(X_train)
del X_train[0]
X_test = normalize(X_test)
del X_test[0]
X_test.insert(0, 0, [1] * len(X_test))
X_val = normalize(X_val)
del X_val[0]
X_val.insert(0, 0, [1] * len(X_val))

  new[i,:] = (arr[i,:] - np.mean(arr[i,:])) / np.std(arr[i,:])
  X_test.insert(0, 0, [1] * len(X_test))
  new[i,:] = (arr[i,:] - np.mean(arr[i,:])) / np.std(arr[i,:])
  X_val.insert(0, 0, [1] * len(X_val))


In [5]:
h = 0.3
a = 0
b = 5
values_of_par = np.linspace(a, b, int((b - a) / h))
values_of_loss = np.zeros_like(values_of_par, dtype=np.float64)
for i in range(len(values_of_par)):
    model = MyLogReg(n_iter=10000, learning_rate=0.5, l2_coef=values_of_par[i], reg='l2')
    weights = model.fit(X_train, y_train)[0]
    y_pred = 1 / (1 + np.exp(-np.dot(X_val, weights)))
    LogLoss = -(1 / len(X_val)) * np.sum(y_val * np.log(y_pred) + (1 - y_val) * np.log(1 - y_pred))
    values_of_loss[i] =  LogLoss

par = values_of_par[values_of_loss.argmin()]
print(f'значение параметра регуляризации: {par}')

  X.insert(0, 'x0', [1] * n)


значение параметра регуляризации: 3.6666666666666665


In [6]:
model = MyLogReg(n_iter=50000, learning_rate=0.5, l2_coef=par, metric='accuracy', reg='l2')
weights_reg = model.fit(X_train, y_train)[0]
X_train.insert(0, 0, [1] * len(X_train))
y_pred_train = np.dot(X_train, weights_reg)
y_pred_test = np.dot(X_test, weights_reg)
y_pred_val = np.dot(X_val, weights_reg)
y_classes_train = model.classification(y_pred_train)
y_classes_test = model.classification(y_pred_test)
y_classes_val = model.classification(y_pred_val)
print(f'точность для тренировочной: {model.metrics(y_classes_train, y_train)}')
print(f'точность для тестовой: {model.metrics(y_classes_test, y_test)}')
print(f'точность для валидационной: {model.metrics(y_classes_val, y_val)}')
del X_train[0]

точность для тренировочной: 0.8536585365853658
точность для тестовой: 0.84
точность для валидационной: 0.9090909090909091
