In [1]:
import numpy as np
from models.linear import LinearRegression


In [2]:
n_samples = 500
np.random.seed(42)

classes = ["A", "B", "C"]

random_classes = np.random.choice(classes, size=n_samples)


class_to_int = {label: idx for idx, label in enumerate(classes)}
random_classes_int = np.vectorize(class_to_int.get)(random_classes)
random_classes_int

array([2, 0, 2, 2, 0, 0, 2, 1, 2, 2, 2, 2, 0, 2, 1, 0, 1, 1, 1, 1, 0, 0,
       1, 1, 0, 0, 0, 2, 2, 2, 1, 2, 1, 1, 2, 1, 2, 2, 0, 2, 0, 2, 2, 0,
       0, 2, 1, 0, 1, 1, 1, 0, 1, 0, 1, 2, 2, 0, 2, 2, 1, 0, 1, 1, 1, 1,
       1, 1, 1, 0, 2, 1, 1, 1, 1, 1, 1, 2, 2, 1, 2, 0, 1, 0, 0, 1, 2, 0,
       1, 0, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 2, 2, 2, 0, 2, 2, 0, 2, 0, 1,
       2, 1, 0, 2, 0, 1, 0, 2, 2, 1, 0, 2, 1, 2, 2, 0, 2, 0, 2, 1, 2, 0,
       0, 1, 2, 2, 1, 2, 2, 0, 2, 2, 1, 1, 0, 2, 2, 2, 0, 0, 1, 0, 2, 2,
       0, 2, 2, 0, 0, 2, 2, 2, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 2, 1, 2, 0,
       2, 1, 0, 0, 0, 2, 1, 0, 0, 0, 2, 2, 1, 2, 0, 1, 0, 0, 2, 1, 2, 2,
       1, 0, 0, 1, 0, 1, 1, 2, 1, 2, 0, 0, 0, 0, 2, 0, 1, 1, 1, 2, 0, 0,
       0, 2, 1, 1, 0, 1, 1, 2, 2, 2, 2, 0, 2, 1, 0, 1, 1, 1, 2, 2, 0, 0,
       2, 1, 0, 2, 2, 2, 1, 2, 2, 2, 2, 0, 0, 2, 1, 0, 2, 0, 0, 1, 2, 2,
       1, 1, 2, 2, 1, 0, 0, 1, 0, 1, 0, 0, 2, 2, 0, 0, 2, 2, 2, 1, 0, 2,
       0, 1, 1, 2, 1, 0, 2, 0, 0, 1, 1, 1, 1, 1, 2,

In [3]:
X = np.random.normal(2, 2, n_samples)
Z = np.random.normal(9, 1, n_samples) 
P = np.random.beta(1, 2, n_samples)

Y = 2*X +  np.random.normal(0, 0.5, n_samples) + P + Z + random_classes_int * X 

X= np.column_stack((X,X**2, Z + X, X * P, P, random_classes_int))

In [4]:
reg = LinearRegression(regularization="None").fit(X,Y)
ridge = LinearRegression(regularization="Ridge").fit(X,Y)
lasso = LinearRegression(regularization="Lasso", n_iter = 10000, lr=0.001).fit(X,Y)
elastic = LinearRegression(regularization="ElasticNet", n_iter = 10000, lr=0.001, alpha=0.4).fit(X,Y)

In [5]:
elastic.print_errors(X, Y), lasso.print_errors(X, Y), ridge.print_errors(X, Y)
elastic.score(X, Y), lasso.score(X, Y), ridge.score(X, Y)

╒══════════╤═════════╕
│ Metric   │   Value │
╞══════════╪═════════╡
│ MAE      │  1.5889 │
├──────────┼─────────┤
│ RMSE     │  2.1834 │
├──────────┼─────────┤
│ MSE      │  4.7671 │
╘══════════╧═════════╛
╒══════════╤═════════╕
│ Metric   │   Value │
╞══════════╪═════════╡
│ MAE      │  1.3203 │
├──────────┼─────────┤
│ RMSE     │  1.8239 │
├──────────┼─────────┤
│ MSE      │  3.3266 │
╘══════════╧═════════╛
╒══════════╤═════════╕
│ Metric   │   Value │
╞══════════╪═════════╡
│ MAE      │  1.2972 │
├──────────┼─────────┤
│ RMSE     │  1.7891 │
├──────────┼─────────┤
│ MSE      │  3.201  │
╘══════════╧═════════╛


(Array(0.9052454, dtype=float32),
 Array(0.9338775, dtype=float32),
 np.float64(0.9363737064688338))

In [6]:
#elastic.do_all(X, Y)

In [7]:
#reg.Cooks_distance(X,Y)

In [8]:
classes = ["A", "B", "C"]
n_samples = 100

random_classes = np.random.choice(classes, size=n_samples)


class_to_int = {label: idx for idx, label in enumerate(classes)}
random_classes_int = np.vectorize(class_to_int.get)(random_classes)


In [9]:
random_classes_int


array([1, 0, 1, 0, 1, 2, 2, 1, 2, 2, 2, 0, 2, 0, 1, 2, 2, 2, 1, 0, 0, 2,
       1, 1, 1, 1, 1, 0, 0, 0, 2, 1, 2, 1, 0, 2, 0, 1, 1, 1, 0, 1, 0, 2,
       1, 1, 1, 2, 1, 0, 0, 1, 1, 2, 2, 2, 1, 2, 2, 0, 2, 0, 0, 2, 0, 1,
       1, 2, 1, 0, 1, 2, 2, 0, 2, 1, 2, 0, 2, 1, 0, 1, 0, 2, 0, 0, 2, 1,
       2, 0, 2, 0, 2, 2, 1, 0, 1, 0, 2, 0])

In [10]:
X = np.random.normal(2, 2, n_samples)
Z = np.random.normal(9, 1, n_samples) 
P = np.random.beta(1, 2, n_samples)


X= np.column_stack((X,X**2, Z + X, X * P, P))

Y = random_classes_int
#Y = np.eye(len(np.unique(Y)))[random_classes_int]


In [11]:
from models.logistic import LogisticRegression
log = LogisticRegression(X.shape[1], len(classes))
log.fit(X, Y)

<models.logistic.LogisticRegression at 0x1b6966ed2b0>

In [12]:
log.confusion_matrix(X, Y)

array([[10., 16.,  5.],
       [ 5., 25.,  4.],
       [ 7., 17., 11.]])

In [13]:
log.precision(X,Y)


array([0.45454545, 0.43103448, 0.55      ])

In [14]:
log.recall(X,Y)


array([0.32258065, 0.73529412, 0.31428571])

In [15]:
log.f1_stat(X,Y)

array([0.37735849, 0.54347826, 0.4       ])