In [8]:
import numpy as np

# Linear Regression

In [16]:
class LinearRegression:
    def __init__(self,n_iters = 1000):
        self.n_iters = n_iters
    
    def fit(self, X, y, lr = 0.001):
        d_size, n_features = X.shape
        self.weights = np.zeros(n_features)
        self.bias = 0

        for _ in range(self.n_iters):
            y_pred = np.dot(X, self.weights) + self.bias
            dw = 1/d_size * np.dot(X.T, (y_pred - y))
            db = 1/d_size * np.sum(y_pred-y)
            self.weights = self.weights - lr * dw
            self.bias = self.bias - lr * db

    def predict(self, X):
        return np.dot(X, self.weights) + self.bias

In [30]:
from sklearn import datasets
X, y = datasets.make_regression(n_samples=1000, n_features=5, noise=50, random_state=31)
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y , test_size=0.2, random_state=31)
lr = LinearRegression()
lr.fit(X_train, y_train, lr = 0.1)
y_pred = lr.predict(X_test)

In [31]:
def mse(y_pred, y):
    return np.mean((y_pred - y)**2)

In [32]:
y_pred = lr.predict(X_test)
print("Accuracy = ", mse(y_pred, y_test))


Accuracy =  2645.1056380306977


# Logistic Regression

In [33]:
def sigmoid(X):
    X = np.clip(X, -500, 500)
    return 1/(1+ np.exp(-X))

class LogisticRegression:
    def __init__(self,n_iters = 1000):
        self.n_iters = n_iters
    
    def fit(self, X, y, lr = 0.001):
        d_size, n_features = X.shape
        self.weights = np.zeros(n_features)
        self.bias = 0

        for _ in range(self.n_iters):
            y_pred = np.dot(X, self.weights) + self.bias
            y_pred = sigmoid(y_pred)
            dw = 1/d_size * np.dot(X.T, (y_pred - y))
            db = 1/d_size * np.sum(y_pred-y)
            self.weights = self.weights - lr * dw
            self.bias = self.bias - lr * db

    def predict(self, X):
        y_preds = sigmoid(np.dot(X, self.weights) + self.bias)
        result = [1 if y_pred >= 0.5 else 0 for y_pred in y_preds]
        return result

In [46]:
from sklearn import datasets
X, y = datasets.make_classification(n_samples=500, n_features=5, random_state=31)
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y ,test_size=0.2, random_state=31)
lr = LogisticRegression()
lr.fit(X_train,y_train, lr = 0.001)
y_pred = lr.predict(X_test)


In [47]:
def accuracy(y_pred, y):
    return np.equal(y_pred, y)

In [48]:
(np.sum(accuracy(y_pred, y_test))/len(y_test))*100

np.float64(88.0)

# KNN

In [59]:
from collections import Counter
def euclidean_dist(x2, x1):
    return np.sqrt(np.sum((x2 - x1)**2))

class KNN:  
    def __init__(self, k_neigbors=3):
        self.k_neigbors = k_neigbors

    def fit(self, X_train, y_train):
        self.X_train = X_train
        self.y_train = y_train

    def predict(self, X_test):
        predictions = [self._predict(x) for x in X_test]
        return predictions
    
    def _predict(self, x):
        distances = [euclidean_dist(x,X) for X in self.X_train]

        indexes = np.argsort(distances)[:self.k_neigbors]
        labels = [self.y_train[ind] for ind in indexes]
        return Counter(labels).most_common()[0][0]        

In [61]:
from sklearn import datasets
X, y = datasets.make_classification(n_samples=500, n_features=4, random_state=31)
from sklearn.model_selection import train_test_split
X_train, X_test , y_train, y_test = train_test_split(X, y , test_size= 0.2, random_state=31)
knn = KNN()
knn.fit(X_train, y_train)
y_pred = knn.predict(X_test)

In [62]:
(np.sum(accuracy(y_pred, y_test))/len(y_test))*100

np.float64(95.0)

In [None]:
# Thats cool KNN works better than logistic regression.

In [None]:
dist = [5, 4,3,2,5,6,2,1]
np.argsort(dist)

array([7, 3, 6, 2, 1, 0, 4, 5])

In [55]:
labels = [1, 0 , 0 , 1, 1, 1, 1, 0 , 0, 2]
from collections import Counter
print(Counter(labels))
print(Counter(labels).most_common()[0][0])

Counter({1: 5, 0: 4, 2: 1})
1


In [14]:
from sklearn.datasets import make_classification
X, y = make_classification(n_samples=4, n_features=4, random_state=31)

In [15]:
X

array([[ 0.08071906, -0.69070464, -0.26079469,  0.08401115],
       [-0.68922676,  1.43947115,  0.66445245, -0.37354426],
       [-0.58614106, -0.45475709, -0.02330901, -0.18820341],
       [ 1.03033594, -0.79065147, -0.5162551 ,  0.45344531]])

In [16]:
y

array([1, 1, 0, 0])

In [17]:
from sklearn.model_selection import train_test_split
X1, X2 = train_test_split(X, test_size=0.5)

In [18]:
X1

array([[ 1.03033594, -0.79065147, -0.5162551 ,  0.45344531],
       [ 0.08071906, -0.69070464, -0.26079469,  0.08401115]])

In [19]:
X2

array([[-0.68922676,  1.43947115,  0.66445245, -0.37354426],
       [-0.58614106, -0.45475709, -0.02330901, -0.18820341]])

In [21]:
import numpy as np
np.sum(X2-X1)

np.float64(0.398636446147026)

In [22]:
np.array([-0.68922676,  1.43947115,  0.66445245, -0.37354426]) - np.array([ 1.03033594, -0.79065147, -0.5162551 ,  0.45344531])

array([-1.7195627 ,  2.23012262,  1.18070755, -0.82698957])

In [23]:
sum([-1.7195627 ,  2.23012262,  1.18070755, -0.82698957])

0.8642778999999999

In [24]:
np.array([-0.58614106, -0.45475709, -0.02330901, -0.18820341]) - np.array([ 0.08071906, -0.69070464, -0.26079469,  0.08401115])

array([-0.66686012,  0.23594755,  0.23748568, -0.27221456])

In [25]:
sum([-0.66686012,  0.23594755,  0.23748568, -0.27221456])

-0.4656414499999999

In [27]:
0.8642778999999999- 0.4656414499999999

0.39863645000000003

In [29]:
X1

array([[ 1.03033594, -0.79065147, -0.5162551 ,  0.45344531],
       [ 0.08071906, -0.69070464, -0.26079469,  0.08401115]])

In [30]:
X2

array([[-0.68922676,  1.43947115,  0.66445245, -0.37354426],
       [-0.58614106, -0.45475709, -0.02330901, -0.18820341]])

In [31]:
np.dot(X1, X2.T)

array([[-2.36066394, -0.31767441],
       [-1.2545507 ,  0.25705776]])