In [1]:
import numpy as np
from models.linear import LinearRegression
from models.logistic import LogisticRegression
from metrics.classification_metrics import *

In [2]:
n_samples = 500
np.random.seed(42)

classes = ["A", "B", "C"]

random_classes = np.random.choice(classes, size=n_samples)


class_to_int = {label: idx for idx, label in enumerate(classes)}
random_classes_int = np.vectorize(class_to_int.get)(random_classes)
random_classes_int

array([2, 0, 2, 2, 0, 0, 2, 1, 2, 2, 2, 2, 0, 2, 1, 0, 1, 1, 1, 1, 0, 0,
       1, 1, 0, 0, 0, 2, 2, 2, 1, 2, 1, 1, 2, 1, 2, 2, 0, 2, 0, 2, 2, 0,
       0, 2, 1, 0, 1, 1, 1, 0, 1, 0, 1, 2, 2, 0, 2, 2, 1, 0, 1, 1, 1, 1,
       1, 1, 1, 0, 2, 1, 1, 1, 1, 1, 1, 2, 2, 1, 2, 0, 1, 0, 0, 1, 2, 0,
       1, 0, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 2, 2, 2, 0, 2, 2, 0, 2, 0, 1,
       2, 1, 0, 2, 0, 1, 0, 2, 2, 1, 0, 2, 1, 2, 2, 0, 2, 0, 2, 1, 2, 0,
       0, 1, 2, 2, 1, 2, 2, 0, 2, 2, 1, 1, 0, 2, 2, 2, 0, 0, 1, 0, 2, 2,
       0, 2, 2, 0, 0, 2, 2, 2, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 2, 1, 2, 0,
       2, 1, 0, 0, 0, 2, 1, 0, 0, 0, 2, 2, 1, 2, 0, 1, 0, 0, 2, 1, 2, 2,
       1, 0, 0, 1, 0, 1, 1, 2, 1, 2, 0, 0, 0, 0, 2, 0, 1, 1, 1, 2, 0, 0,
       0, 2, 1, 1, 0, 1, 1, 2, 2, 2, 2, 0, 2, 1, 0, 1, 1, 1, 2, 2, 0, 0,
       2, 1, 0, 2, 2, 2, 1, 2, 2, 2, 2, 0, 0, 2, 1, 0, 2, 0, 0, 1, 2, 2,
       1, 1, 2, 2, 1, 0, 0, 1, 0, 1, 0, 0, 2, 2, 0, 0, 2, 2, 2, 1, 0, 2,
       0, 1, 1, 2, 1, 0, 2, 0, 0, 1, 1, 1, 1, 1, 2,

In [3]:
X = np.random.normal(2, 2, n_samples)
Z = np.random.normal(9, 1, n_samples) 
P = np.random.beta(1, 2, n_samples)

Y = 2*X +  np.random.normal(0, 0.5, n_samples) + P + Z + random_classes_int * X 

X= np.column_stack((X,X**2, Z + X, X * P, P, random_classes_int))

In [4]:
reg = LinearRegression(regularization="None").fit(X,Y)
ridge = LinearRegression(regularization="Ridge").fit(X,Y)
lasso = LinearRegression(regularization="Lasso", n_iter = 10000, lr=0.001).fit(X,Y)
elastic = LinearRegression(regularization="ElasticNet", n_iter = 10000, lr=0.001, alpha=0.4).fit(X,Y)

In [5]:
elastic_pred = elastic.predict(X)
elastic.print_errors(Y, elastic_pred), lasso.print_errors(Y, lasso.predict(X)), ridge.print_errors(Y, ridge.predict(X))
elastic.score(X, Y), lasso.score(X, Y), ridge.score(X, Y)

╒══════════╤═════════╕
│ Metric   │   Value │
╞══════════╪═════════╡
│ MAE      │  1.5889 │
├──────────┼─────────┤
│ RMSE     │  2.1834 │
├──────────┼─────────┤
│ MSE      │  4.7671 │
╘══════════╧═════════╛
╒══════════╤═════════╕
│ Metric   │   Value │
╞══════════╪═════════╡
│ MAE      │  1.3203 │
├──────────┼─────────┤
│ RMSE     │  1.8239 │
├──────────┼─────────┤
│ MSE      │  3.3266 │
╘══════════╧═════════╛
╒══════════╤═════════╕
│ Metric   │   Value │
╞══════════╪═════════╡
│ MAE      │  1.2972 │
├──────────┼─────────┤
│ RMSE     │  1.7891 │
├──────────┼─────────┤
│ MSE      │  3.201  │
╘══════════╧═════════╛


(Array(0.9052454, dtype=float32),
 Array(0.9338775, dtype=float32),
 np.float64(0.9363737064688338))

In [6]:
#elastic.do_all(X, Y)

In [7]:
#reg.Cooks_distance(X,Y)

In [8]:
classes = ["A", "B", "C"]
n_samples = 1500

random_classes = np.random.choice(classes, size=n_samples)


class_to_int = {label: idx for idx, label in enumerate(classes)}
random_classes_int = np.vectorize(class_to_int.get)(random_classes)

X = np.random.normal(2, 2, n_samples)
Z = np.random.normal(9, 1, n_samples) 
P = np.random.beta(1, 2, n_samples)


X= np.column_stack((X, Z, P*Z, Z+X, X**2))

Y = random_classes_int


In [9]:
import pandas as pd

X = pd.DataFrame(X, columns=["1","2","3","4","5"])

In [10]:
log = LogisticRegression(X.shape[1], len(classes), regularization="None")
log.fit(X, Y)

<models.logistic.LogisticRegression at 0x24f62242660>

In [11]:
pred = log.predict(X)
pred

array([2, 2, 0, ..., 2, 2, 0], shape=(1500,))

In [12]:
confusion_matrix(Y, pred)
precision(Y, pred), recall(Y, pred), f1_stat(Y, pred), accuracy(Y, pred), balanced_accuracy(Y, pred), NPV(Y, pred), FOR(Y, pred)

(array([0.36746988, 0.3816568 , 0.3624498 ]),
 array([0.12815126, 0.25697211, 0.69157088]),
 array([0.19003115, 0.30714286, 0.47562582]),
 array(0.36733333),
 array(0.35889808),
 array([0.68890555, 0.67900172, 0.68055556]),
 array([0.31109445, 0.32099828, 0.31944444]))

In [13]:
Fowlkes_Mallows(Y, pred)

array([0.21700629, 0.31316953, 0.50065929])

In [14]:
log.theta

array([[-0.00212145,  0.00226784, -0.00014639],
       [-0.0099058 ,  0.01309753, -0.00319173],
       [ 0.00121627, -0.00424751,  0.00303123],
       [-0.00419983, -0.00519411,  0.00939394],
       [-0.00868953,  0.00885003, -0.0001605 ],
       [ 0.00964295, -0.00942614, -0.00021681]])

In [15]:
data = []
for i in np.unique(Y):
    data.append((Y == i).astype(int))

In [16]:
data

[array([0, 1, 0, ..., 0, 0, 0], shape=(1500,)),
 array([1, 0, 1, ..., 0, 0, 0], shape=(1500,)),
 array([0, 0, 0, ..., 1, 1, 1], shape=(1500,))]

In [17]:
from models.onevsall import OVA
ova = OVA()


In [18]:
ova.fit(X, Y)

<models.onevsall.OVA at 0x24f62242900>

In [19]:
pred = ova.predict(X)

In [23]:
confusion_matrix(Y, pred=pred), accuracy(Y, pred)

(array([[ 63., 113., 300.],
        [ 50., 140., 312.],
        [ 60., 119., 343.]]),
 array(0.364))