In [1]:
import numpy as np
from models.linear import LinearRegression


In [2]:
n_samples = 500
np.random.seed(42)

classes = ["A", "B", "C"]

random_classes = np.random.choice(classes, size=n_samples)


class_to_int = {label: idx for idx, label in enumerate(classes)}
random_classes_int = np.vectorize(class_to_int.get)(random_classes)
random_classes_int

array([2, 0, 2, 2, 0, 0, 2, 1, 2, 2, 2, 2, 0, 2, 1, 0, 1, 1, 1, 1, 0, 0,
       1, 1, 0, 0, 0, 2, 2, 2, 1, 2, 1, 1, 2, 1, 2, 2, 0, 2, 0, 2, 2, 0,
       0, 2, 1, 0, 1, 1, 1, 0, 1, 0, 1, 2, 2, 0, 2, 2, 1, 0, 1, 1, 1, 1,
       1, 1, 1, 0, 2, 1, 1, 1, 1, 1, 1, 2, 2, 1, 2, 0, 1, 0, 0, 1, 2, 0,
       1, 0, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 2, 2, 2, 0, 2, 2, 0, 2, 0, 1,
       2, 1, 0, 2, 0, 1, 0, 2, 2, 1, 0, 2, 1, 2, 2, 0, 2, 0, 2, 1, 2, 0,
       0, 1, 2, 2, 1, 2, 2, 0, 2, 2, 1, 1, 0, 2, 2, 2, 0, 0, 1, 0, 2, 2,
       0, 2, 2, 0, 0, 2, 2, 2, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 2, 1, 2, 0,
       2, 1, 0, 0, 0, 2, 1, 0, 0, 0, 2, 2, 1, 2, 0, 1, 0, 0, 2, 1, 2, 2,
       1, 0, 0, 1, 0, 1, 1, 2, 1, 2, 0, 0, 0, 0, 2, 0, 1, 1, 1, 2, 0, 0,
       0, 2, 1, 1, 0, 1, 1, 2, 2, 2, 2, 0, 2, 1, 0, 1, 1, 1, 2, 2, 0, 0,
       2, 1, 0, 2, 2, 2, 1, 2, 2, 2, 2, 0, 0, 2, 1, 0, 2, 0, 0, 1, 2, 2,
       1, 1, 2, 2, 1, 0, 0, 1, 0, 1, 0, 0, 2, 2, 0, 0, 2, 2, 2, 1, 0, 2,
       0, 1, 1, 2, 1, 0, 2, 0, 0, 1, 1, 1, 1, 1, 2,

In [3]:
X = np.random.normal(2, 2, n_samples)
Z = np.random.normal(9, 1, n_samples) 
P = np.random.beta(1, 2, n_samples)

Y = 2*X +  np.random.normal(0, 0.5, n_samples) + P + Z + random_classes_int * X 

X= np.column_stack((X,X**2, Z + X, X * P, P, random_classes_int))

In [4]:
reg = LinearRegression(regularization="None").fit(X,Y)
ridge = LinearRegression(regularization="Ridge").fit(X,Y)
lasso = LinearRegression(regularization="Lasso", n_iter = 10000, lr=0.001).fit(X,Y)
elastic = LinearRegression(regularization="ElasticNet", n_iter = 10000, lr=0.001, alpha=0.4).fit(X,Y)

In [5]:
elastic.print_errors(X, Y), lasso.print_errors(X, Y), ridge.print_errors(X, Y)
elastic.score(X, Y), lasso.score(X, Y), ridge.score(X, Y)

╒══════════╤═════════╕
│ Metric   │   Value │
╞══════════╪═════════╡
│ MAE      │  1.5889 │
├──────────┼─────────┤
│ RMSE     │  2.1834 │
├──────────┼─────────┤
│ MSE      │  4.7671 │
╘══════════╧═════════╛
╒══════════╤═════════╕
│ Metric   │   Value │
╞══════════╪═════════╡
│ MAE      │  1.3203 │
├──────────┼─────────┤
│ RMSE     │  1.8239 │
├──────────┼─────────┤
│ MSE      │  3.3266 │
╘══════════╧═════════╛
╒══════════╤═════════╕
│ Metric   │   Value │
╞══════════╪═════════╡
│ MAE      │  1.2972 │
├──────────┼─────────┤
│ RMSE     │  1.7891 │
├──────────┼─────────┤
│ MSE      │  3.201  │
╘══════════╧═════════╛


(Array(0.9052454, dtype=float32),
 Array(0.9338775, dtype=float32),
 np.float64(0.9363737064688338))

In [6]:
elastic.do_all(X, Y)

Model score:0.9052454233169556
R adjusted:0.8944900631904602
Beta: [-5.0410217e-01  1.2894970e+00  9.9426448e-02  1.0410082e+00
  1.9934146e-01  2.6477067e-04  9.5286191e-01]
Cross validation score: {'MAE': 1.6261, 'RMSE': 2.252, 'MSE': 5.1046, 'R2': 0.8965}
        The Ramsey RESET test for linearity
        Test Statistic : 419.8566 
        P-value        : 0.0 
        Interpretation : Model may be misspecified (nonlinearity exists) ❌ 
        
        Breusch-Pagan Test for Heteroscedasticity
        Test Statistic : 225.4683
        P-value        : 0.0
        Interpretation : Heteroscedasticity detected — variance of residuals is not constant. ❌
        
        Durbin-Watson Test for Independence of Errors
        DW Statistic  : 1.9429999589920044
        Interpretation: No autocorrelation ✅ 
        
        VIF Test for Multicollinearity
        Feature 1: VIF = 9.274205935300616 | Multicollinearity ❌
        Feature 2: VIF = 2.9786439383807304 | No Multicollinearity ✅
    

In [7]:
classes = ["A", "B", "C"]

random_classes = np.random.choice(classes, size=n_samples)


class_to_int = {label: idx for idx, label in enumerate(classes)}
random_classes_int = np.vectorize(class_to_int.get)(random_classes)


In [8]:
X = np.random.normal(2, 2, n_samples)
Z = np.random.normal(9, 1, n_samples) 
P = np.random.beta(1, 2, n_samples)


X= np.column_stack((X,X**2, Z + X, X * P, P))

Y = random_classes_int

In [9]:
from models.logistic import LogisticRegression
log = LogisticRegression(random_classes_int.shape[0], random_classes_int[1])
log.softmax(X)

ValueError: zero-size array to reduction operation maximum which has no identity