In [95]:
import numpy as np
from models.linear import LinearRegression
from models.logistic import LogisticRegression
from metrics.classification_metrics import *

In [96]:
n_samples = 500
np.random.seed(42)

classes = ["A", "B", "C"]

random_classes = np.random.choice(classes, size=n_samples)


class_to_int = {label: idx for idx, label in enumerate(classes)}
random_classes_int = np.vectorize(class_to_int.get)(random_classes)
random_classes_int

array([2, 0, 2, 2, 0, 0, 2, 1, 2, 2, 2, 2, 0, 2, 1, 0, 1, 1, 1, 1, 0, 0,
       1, 1, 0, 0, 0, 2, 2, 2, 1, 2, 1, 1, 2, 1, 2, 2, 0, 2, 0, 2, 2, 0,
       0, 2, 1, 0, 1, 1, 1, 0, 1, 0, 1, 2, 2, 0, 2, 2, 1, 0, 1, 1, 1, 1,
       1, 1, 1, 0, 2, 1, 1, 1, 1, 1, 1, 2, 2, 1, 2, 0, 1, 0, 0, 1, 2, 0,
       1, 0, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 2, 2, 2, 0, 2, 2, 0, 2, 0, 1,
       2, 1, 0, 2, 0, 1, 0, 2, 2, 1, 0, 2, 1, 2, 2, 0, 2, 0, 2, 1, 2, 0,
       0, 1, 2, 2, 1, 2, 2, 0, 2, 2, 1, 1, 0, 2, 2, 2, 0, 0, 1, 0, 2, 2,
       0, 2, 2, 0, 0, 2, 2, 2, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 2, 1, 2, 0,
       2, 1, 0, 0, 0, 2, 1, 0, 0, 0, 2, 2, 1, 2, 0, 1, 0, 0, 2, 1, 2, 2,
       1, 0, 0, 1, 0, 1, 1, 2, 1, 2, 0, 0, 0, 0, 2, 0, 1, 1, 1, 2, 0, 0,
       0, 2, 1, 1, 0, 1, 1, 2, 2, 2, 2, 0, 2, 1, 0, 1, 1, 1, 2, 2, 0, 0,
       2, 1, 0, 2, 2, 2, 1, 2, 2, 2, 2, 0, 0, 2, 1, 0, 2, 0, 0, 1, 2, 2,
       1, 1, 2, 2, 1, 0, 0, 1, 0, 1, 0, 0, 2, 2, 0, 0, 2, 2, 2, 1, 0, 2,
       0, 1, 1, 2, 1, 0, 2, 0, 0, 1, 1, 1, 1, 1, 2,

In [97]:
X = np.random.normal(2, 2, n_samples)
Z = np.random.normal(9, 1, n_samples) 
P = np.random.beta(1, 2, n_samples)

Y = 2*X +  np.random.normal(0, 0.5, n_samples) + P + Z + random_classes_int * X 

X= np.column_stack((X,X**2, Z + X, X * P, P, random_classes_int))

In [98]:
reg = LinearRegression(regularization="None").fit(X,Y)
ridge = LinearRegression(regularization="Ridge").fit(X,Y)
lasso = LinearRegression(regularization="Lasso", n_iter = 10000, lr=0.001).fit(X,Y)
elastic = LinearRegression(regularization="ElasticNet", n_iter = 10000, lr=0.001, alpha=0.4).fit(X,Y)

In [99]:
elastic_pred = elastic.predict(X)
elastic.print_errors(Y, elastic_pred), lasso.print_errors(Y, lasso.predict(X)), ridge.print_errors(Y, ridge.predict(X))
elastic.score(X, Y), lasso.score(X, Y), ridge.score(X, Y)

╒══════════╤═════════╕
│ Metric   │   Value │
╞══════════╪═════════╡
│ MAE      │  1.5889 │
├──────────┼─────────┤
│ RMSE     │  2.1834 │
├──────────┼─────────┤
│ MSE      │  4.7671 │
╘══════════╧═════════╛
╒══════════╤═════════╕
│ Metric   │   Value │
╞══════════╪═════════╡
│ MAE      │  1.3203 │
├──────────┼─────────┤
│ RMSE     │  1.8239 │
├──────────┼─────────┤
│ MSE      │  3.3266 │
╘══════════╧═════════╛
╒══════════╤═════════╕
│ Metric   │   Value │
╞══════════╪═════════╡
│ MAE      │  1.2972 │
├──────────┼─────────┤
│ RMSE     │  1.7891 │
├──────────┼─────────┤
│ MSE      │  3.201  │
╘══════════╧═════════╛


(Array(0.9052454, dtype=float32),
 Array(0.9338775, dtype=float32),
 np.float64(0.9363737064688338))

In [100]:
#elastic.do_all(X, Y)

In [101]:
#reg.Cooks_distance(X,Y)

In [102]:
classes = ["A", "B", "C", "D"]
n_samples = 150

random_classes = np.random.choice(classes, size=n_samples)


class_to_int = {label: idx for idx, label in enumerate(classes)}
random_classes_int = np.vectorize(class_to_int.get)(random_classes)

X = np.random.normal(2, 2, n_samples)
Z = np.random.normal(9, 1, n_samples) 
P = np.random.beta(1, 2, n_samples)


X= np.column_stack((X, Z, P*Z, Z+X, X**2))

Y = random_classes_int
#Y = np.eye(len(np.unique(Y)))[random_classes_int]


In [103]:
log = LogisticRegression(X.shape[1], len(classes))
log.fit(X, Y)

<models.logistic.LogisticRegression at 0x1b93f233ed0>

In [104]:
pred = log.predict(X)
pred

array([2, 3, 1, 2, 2, 3, 3, 3, 2, 3, 2, 2, 3, 3, 3, 1, 2, 2, 2, 3, 1, 2,
       2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 3, 1, 2, 2, 1, 1, 1, 3, 3,
       1, 2, 1, 2, 3, 3, 2, 3, 2, 1, 1, 3, 3, 3, 3, 3, 2, 3, 2, 1, 3, 3,
       2, 1, 3, 0, 1, 3, 1, 2, 3, 3, 1, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3, 2,
       3, 0, 2, 3, 1, 1, 2, 1, 3, 3, 2, 3, 1, 3, 3, 1, 2, 1, 2, 3, 2, 3,
       3, 2, 3, 3, 2, 3, 1, 3, 1, 2, 1, 2, 2, 1, 2, 1, 1, 0, 2, 1, 2, 3,
       2, 1, 2, 1, 3, 1, 3, 2, 2, 1, 3, 3, 3, 3, 2, 0, 1, 2])

In [105]:
confusion_matrix(Y, pred), precision(Y, pred), recall(Y, pred), f1_stat(Y, pred), accuracy(Y, pred), balanced_accuracy(Y, pred), NPV(Y, pred), FOR(Y, pred)

(array([[ 2.,  7.,  9., 16.],
        [ 2., 14., 13., 12.],
        [ 0.,  9., 13., 16.],
        [ 0.,  7., 11., 19.]]),
 array([0.5       , 0.37837838, 0.2826087 , 0.3015873 ]),
 array([0.05882353, 0.34146341, 0.34210526, 0.51351351]),
 array([0.10526316, 0.35897436, 0.30952381, 0.38      ]),
 array(0.32),
 array(0.31397643),
 array([0.78082192, 0.76106195, 0.75961538, 0.79310345]),
 array([0.21917808, 0.23893805, 0.24038462, 0.20689655]))

In [106]:
Fowlkes_Mallows(Y, pred)

array([0.17149859, 0.35944732, 0.31093717, 0.39353419])

In [107]:
log.theta

array([[-0.02719566,  0.00089618,  0.03213054, -0.00583106],
       [-0.00076542, -0.00853331, -0.00309271,  0.01239144],
       [ 0.03845553,  0.01194308, -0.07974542,  0.02934682],
       [-0.02796108, -0.00763712,  0.02903783,  0.00656038],
       [ 0.02448265,  0.02745062, -0.01333104, -0.03860223]])