# Linear model's

In [None]:
# ! pip install scikit-learn==<version>
! pip install scikit-learn 

In [None]:
import sklearn  # type: ignore
print(sklearn.__version__)

1.5.2


### Classical Linear Regressors:

1. LinearRegression
2. SGDRegressor
3. Ridge, RidgeCV

### Linear Regression


In [None]:
from sklearn.linear_model import LinearRegression  # type: ignore


linear_regression = LinearRegression(
    fit_intercept=True,      # Whether to include an intercept in the model (True) or force through the origin (False). Default is True.
    copy_X=True,             # Whether to copy the input data (True) or modify it in-place (False). Default is True.
    n_jobs=-1,               # Number of CPU cores to use for computation (-1 uses all available cores). Default is 1.
    positive=False           # Whether to constrain the coefficients to be positive only (True) or allow negative coefficients (False). Default is False.
)

linear_regression_params = {
    "fit_intercept": [True, False], 
    "copy_X": [True, False],         
    "n_jobs": [-1, 0 to n],              
    "positive": [True, False]
}


# Get the number of CPU cores
import os

num_cores = os.cpu_count()
print(f"Number of CPU cores: {num_cores}")



### SGD Regression (Stochastic Gradient descent)

In [None]:
import sklearn.linear_model



sgd_model = sklearn.linear_model.SGDRegressor(
    loss="squared_error",         # Loss function to minimize. Options:
                                  # - "squared_error": Standard linear regression loss (default).
                                  # - "huber": Robust to outliers.
    epsilon=0.1,                  # For "huber" loss: Defines the threshold to switch from squared to linear loss.

    penalty="elasticnet",         # Regularization type to prevent overfitting:
                                  # - "l2": Squared penalty on weights (default).
                                  # - "l1": Absolute penalty (Lasso-style).
                                  # - "elasticnet": Combination of L1 and L2.
                                  # - "None": l2

    alpha=0.0001,                 # Regularization strength. Larger values mean stricter regularization.
    l1_ratio=0.15,                # Ratio of L1 regularization for "elasticnet". Default is 0.15.
    fit_intercept=True,           # Include an intercept (bias) in the model. Default is True.




    max_iter=1000,                # Maximum number of iterations for convergence. Default is 1000.
    tol=0.0001,                   # Tolerance for convergence. Smaller values = higher precision.
    shuffle=True,                 # Shuffle training data before each epoch. Improves convergence.
    verbose=1,                   # Verbosity level. If set to `1`, the solver will print basic progress information (e.g., iteration count). Higher values give more detailed output.





    learning_rate="invscaling",   # Schedule for learning rate adjustment:
                                  # - "constant": Fixed learning rate.
                                  # - "invscaling": Decreases as iterations increase (default).
                                  # - "adaptive": Adjusts based on validation error.
    eta0=0.01,                    # Initial learning rate. Default is 0.01.
    power_t=0.25,                 # Exponent for inverse scaling learning rate schedule.



    early_stopping=False,         # Stop early if validation error stops improving. Default is False.
    validation_fraction=0.1,      # Fraction of data used for validation during early stopping. Default is 10%.
    n_iter_no_change=5,           # Number of epochs with no improvement before stopping. Default is 5.
    
    random_state=None            # Controls the randomness for reproducibility. None means random behavior, or set an integer (e.g., 0, 42) for consistent results.
    warm_start=False,             # Use previous solution as initialization for new training. Default is False.
    average=False                 # If True, averages weights over iterations for stability.
)

# Parameter options for fine-tuning
sgd_params = {
    "loss": ["squared_error", "huber", "epsilon_insensitive"],
    "penalty": ["l2", "l1", "elasticnet"],
    "alpha": [0.0001, 0.001, 0.01],
    "l1_ratio": [0.1, 0.15, 0.5],
    "fit_intercept": [True, False],
    "max_iter": [500, 1000, 5000],
    "tol": [0.0001, 0.001, 0.01],
    "learning_rate": ["constant", "invscaling", "adaptive"],
    "eta0": [0.01, 0.1, 1.0],
    "early_stopping": [True, False],
    "validation_fraction": [0.1, 0.2],
    "n_iter_no_change": [5, 10, 20],
    "random_state": [None, 42],
    "warm_start": [True, False],
    "average": [True, False]
}

"""
squared_error       : Ordinary least squares loss; minimizes the squared difference between predicted and actual values.
huber               : Combines squared error and absolute error, making it robust to outliers by reducing their influence.
epsilon_insensitive : Ignores errors within a margin of epsilon, focusing only on larger deviations.

constant            : Keeps the learning rate fixed throughout training.
invscaling          : Gradually reduces the learning rate with each iteration using a predefined decay formula.
adaptive            : Adjusts the learning rate dynamically based on validation error; reduces it if the model stops improving.
"""


### Ridge Regression



In [None]:
# Ridge Regression = Linear regression + L2 Regularization (Uses Closed form solution)


import sklearn.linear_model # type: ignore

ridge_model= sklearn.linear_model.Ridge(
    alpha=1,                    # Regularization strength. Larger values mean more regularization (reducing overfitting), smaller values mean less regularization (more flexible model). Default is 1.
    fit_intercept=True,          # Whether to include an intercept term (bias). True means the model will learn the intercept, False means no intercept (useful if data is already centered).
    copy_X=True,                 # Whether to copy the input data X. True keeps the original X unchanged, False modifies X in place (saves memory).
    max_iter=1000,               # Maximum iterations for iterative solvers. Higher values allow the solver more time to converge. Default is 1000. Use None for no limit.
    tol=0.0001,                  # Tolerance for convergence. When the change in the solution becomes smaller than `tol`, the algorithm stops. Lower values mean higher precision.
    solver='auto',               # Solver for optimization. 'auto' lets the algorithm choose, or you can specify solvers like 'svd', 'cholesky', etc.
    positive=False,              # If True, coefficients are constrained to be non-negative. False allows negative coefficients (useful for financial models).
    random_state=None            # Controls the randomness for reproducibility. None means random behavior, or set an integer (e.g., 0, 42) for consistent results.
)

ridge_regression_params = {
    "alpha": [0.1, 1, 10],
    "fit_intercept": [True, False],
    "copy_X": [True, False],
    "max_iter": [100, 500, 1000, 3000, 5000, 7000, 10000, None],
    "tol": [0.0001],
    "solver": ['auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga', 'lbfgs'],
    "positive": [True, False],
    "random_state": [None, 0, 42]
}


'''
auto      : Automatically selects the best solver based on the dataset. Typically selects svd for small datasets & saga for large sparse datasets.
svd       : Singular Value Decomposition. Best for smaller datasets.
cholesky  : Uses Cholesky decomposition to solve linear systems. Suitable for dense matrices.
lsqr      : Least Squares QR decomposition. Efficient for sparse data.
sparse_cg : Conjugate Gradient solver, designed for sparse matrices.
sag       : Stochastic Average Gradient. Fast for large datasets, especially when data is sparse.
saga      : Stochastic Average Gradient with improvements. Good for large datasets, especially sparse ones.
lbfgs     : Limited-memory Broyden-Fletcher-Goldfarb-Shanno. A quasi-Newton method for optimization, suitable for large datasets.'''


In [None]:
from sklearn.linear_model import ridge_regression # type: ignore

ridge_regression_model= ridge_regression(
    X="your_input_matrix",       # 2D input data matrix
    y="your_target_array",       # 1D or 2D output matrix :1D for single-output regression, 2D array for multi-output regression.
    alpha=1.0,                   # Regularization strength. Larger values mean more regularization (reducing overfitting), smaller values mean less regularization (more flexible model). Default is 1.
    sample_weight=None,          # Sample weights allow you to give different importance to each sample. If None, all samples are treated equally.

    verbose=1,                   # Verbosity level. If set to `1`, the solver will print basic progress information (e.g., iteration count). Higher values give more detailed output.
    return_n_iter=True,          # Whether to return the number of iterations taken by the solver for convergence. If `True`, the function will return this value, which can be useful for diagnosing convergence issues.
    return_intercept=True,       # Whether to return the intercept term (bias) along with the coefficients. If `True`, both the coefficients and intercept will be returned.
    check_input=True             # Checks if the input data (X and y) are in the correct format. Helps prevent errors by ensuring valid input.

    max_iter=1000,               # Maximum iterations for iterative solvers. Higher values allow the solver more time to converge. Default is 1000. Use None for no limit.
    tol=0.0001,                  # Tolerance for convergence. When the change in the solution becomes smaller than `tol`, the algorithm stops. Lower values mean higher precision.
    solver='auto',               # Solver for optimization. 'auto' lets the algorithm choose, or you can specify solvers like 'svd', 'cholesky', etc.
    positive=False,              # If True, coefficients are constrained to be non-negative. False allows negative coefficients (useful for financial models).
    random_state=None            # Controls the randomness for reproducibility. None means random behavior, or set an integer (e.g., 0, 42) for consistent results.
)


ridge_regression_params = {
    "alpha": [0.1, 1.0, 10.0],
    "sample_weight": [None, 0.5, 1.0],
    "max_iter": [1000, 5000, None],
    "tol": [0.0001, 0.001, 0.01],
    "solver": ['auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga', 'lbfgs'],
    "positive": [True, False],
    "random_state": [None, 42, 0],
    "verbose": [0, 1, 2],
    "return_n_iter": [True, False],
    "return_intercept": [True, False],
    "check_input": [True, False]
}

"""
Verbosity:
verbose=0: No output. The solver runs silently without showing progress.
verbose=1: Basic output. It shows a summary of progress (e.g., the number of iterations).
verbose=2: Detailed output. It provides more in-depth information, such as the current state of each iteration.

"""

In [None]:
import sklearn.linear_model

ridgecv_model = sklearn.linear_model.RidgeCV(
    alphas=(0.1, 1.0, 10.0),          # Array of alpha values (default is (0.1, 1.0, 10.0)).
    fit_intercept=True,               # Whether to calculate the intercept term (default is True).
    scoring=None,                     # Scoring method (default is None,meaning negative mean squared error).
    cv=None,                          # Cross-validation strategy (default is None, meaning Leave-One-Out CV).
    gcv_mode='auto',                  # Method for Leave-One-Out CV (default is 'auto').
    store_cv_results=False,           # Whether to store cross-validation results (default is False).
    alpha_per_target=False            # Whether to optimize a separate alpha for each target (default is False).
)

ridge_cv_params = {
    "alphas": [(0.1, 1.0, 10.0)],    
    "fit_intercept": [True, False],  
    "cv": [None, 5, 10, 'KFold', 'StratifiedKFold'],
    "gcv_mode": ['auto', 'svd', 'eigen'],
    "store_cv_results": [True, False], 
    "alpha_per_target": [True, False] 
}
