### Linear Regression

In [1]:
import numpy as np

class MyLinearRegression():
    def __init__(self, learning_rate: float = 0.01, regulation_term: float = 0.01, iterations: int = 1000):
        self.learning_rate = learning_rate
        self.iterations = iterations
        self.regulation_term = regulation_term
        
    def fit(self, X: np.ndarray, y: np.ndarray) -> None:
        self.weights = np.zeros((y.shape[1], X.shape[1]))
        self.bias = np.zeros(y.shape[1])
        
        for _ in range(self.iterations):
            dW = np.zeros(self.weights.shape)
            db = np.zeros(self.bias.shape)
            
            for i in range(X.shape[0]):
                y_hat = np.dot(self.weights, X[i]) + self.bias
                dW += 2 * np.outer((y_hat - y[i]), X[i])
                db += 2 * (y_hat - y[i])
            
            dW /= X.shape[0]
            dW += 2 * self.regulation_term * self.weights
            db /= X.shape[0] 
            
            self.weights -= self.learning_rate * dW
            self.bias -= self.learning_rate * db
            
    def predict(self, X: np.ndarray) -> np.ndarray:
        if self.weights is None or self.bias is None:
            raise Exception("Model is not trained yet!")
        return np.array(list(map(lambda x: np.dot(self.weights, x) + self.bias, X)))

In [21]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from sklearn.linear_model import LinearRegression
import numpy as np


def generate_data(n: int) -> np.ndarray:
    X = np.random.rand(n, 2)
    y = np.array([np.array([X[i][0] * 5 + 1, X[i][1] * 4 - 2]) + np.random.rand(2) for i in range(n)])
    return X, y

r2_score_average_custom = 0
r2_score_average_sklearn = 0
for _ in range(10):
    X, y = generate_data(1000)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

    lr = MyLinearRegression()
    lr.fit(X_train, y_train)
    y_pred = lr.predict(X_test)
    r2_score_average_custom += r2_score(y_test, y_pred)
    
    lr = LinearRegression()
    lr.fit(X_train, y_train)
    y_pred = lr.predict(X_test)
    r2_score_average_sklearn += r2_score(y_test, y_pred)
    
print(f'R2 score for custom implementation: {r2_score_average_custom / 10}')
print(f'R2 score for sklearn implementation: {r2_score_average_sklearn / 10}')    

R2 score for custom implementation: 0.8851974823989215
R2 score for sklearn implementation: 0.9508460370201635


### Logistic Rregression

In [6]:
class MyLogisticRegression():
    def __init__(self, learning_rate: float = 0.01, regulation_constant: float = 0.0, iterations: int = 1000) -> None:
        self.epochs_no = iterations
        self.learning_rate = learning_rate
        self.regulation_constant = regulation_constant
    
    def __sigmoid(self, x):
        x = np.clip(x, -709, 709)
        return 1 / (1 + np.exp(-x))
    
    def fit(self, x: np.ndarray, y: np.ndarray) -> None:
        samples_count, features_count = x.shape
        self.weights = np.zeros(features_count)
        self.bias = 0
        
        for _ in range(self.epochs_no):
            y_predicted = self.__sigmoid(np.dot(x, self.weights) + self.bias)
            
            dw = (1 / samples_count) * np.dot(x.T, (y_predicted - y)) + self.regulation_constant * self.weights
            db = (1 / samples_count) * np.sum(y_predicted - y)
            
            self.weights -= self.learning_rate * dw
            self.bias -= self.learning_rate * db
        
    def predict(self, x) -> np.ndarray:
        if self.weights is None or self.bias is None:
            raise Exception('Model is not trained')
        return np.round(self.__sigmoid(np.dot(x, self.weights) + self.bias))

In [7]:
from sklearn.calibration import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, accuracy_score, precision_score, recall_score
from sklearn.linear_model import LogisticRegression
import pandas as pd

f1_score_average_custom = 0
f1_score_average_sklearn = 0

accuracy_score_average_custom = 0
accuracy_score_average_sklearn = 0

precision_score_average_custom = 0
precision_score_average_sklearn = 0

recall_score_average_custom = 0
recall_score_average_sklearn = 0

for _ in range(10):
    dataset = pd.read_csv('./datasets/chess')
    X = dataset.drop('class', axis=1)
    y = dataset['class']

    for col in X.columns:
        labelEncoder = LabelEncoder()
        X[col] = labelEncoder.fit_transform(X[col])
    y = LabelEncoder().fit_transform(y)

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
    
    lr = MyLogisticRegression()
    lr.fit(X_train, y_train)
    y_pred = lr.predict(X_test)
    f1_score_average_custom += f1_score(y_test, y_pred)
    accuracy_score_average_custom += accuracy_score(y_test, y_pred)
    precision_score_average_custom += precision_score(y_test, y_pred)
    recall_score_average_custom += recall_score(y_test, y_pred)
    
    lr = LogisticRegression()
    lr.fit(X_train, y_train)
    y_pred = lr.predict(X_test)
    f1_score_average_sklearn += f1_score(y_test, y_pred)
    accuracy_score_average_sklearn += accuracy_score(y_test, y_pred)
    precision_score_average_sklearn += precision_score(y_test, y_pred)
    recall_score_average_sklearn += recall_score(y_test, y_pred)

print(f'F1 score for custom implementation: {f1_score_average_custom / 10}')
print(f'F1 score for sklearn implementation: {f1_score_average_sklearn / 10}')
print()
print(f'Accuracy score for custom implementation: {accuracy_score_average_custom / 10}')
print(f'Accuracy score for sklearn implementation: {accuracy_score_average_sklearn / 10}')
print()
print(f'Precision score for custom implementation: {precision_score_average_custom / 10}')
print(f'Precision score for sklearn implementation: {precision_score_average_sklearn / 10}')
print()
print(f'Recall score for custom implementation: {recall_score_average_custom / 10}')
print(f'Recall score for sklearn implementation: {recall_score_average_sklearn / 10}')

F1 score for custom implementation: 0.8775170197823954
F1 score for sklearn implementation: 0.9641852937369295

Accuracy score for custom implementation: 0.8670312499999998
Accuracy score for sklearn implementation: 0.96265625

Precision score for custom implementation: 0.8460295572799493
Precision score for sklearn implementation: 0.9675390649331044

Recall score for custom implementation: 0.9117407632749366
Recall score for sklearn implementation: 0.9609018656923689
