# Multi-Output linear Regression

In [1]:
from sklearn.datasets import make_regression

In [2]:
# Create the dataset
'''n_samples -> how many records
n_features -> how many features
n_informative -> among the features, how many are the important ones
n_targets -> forecast targets'''
X,y=make_regression(n_samples=2000,n_informative=6,n_features=10,n_targets=2)

In [3]:
X[:1]

array([[-0.37105604,  0.06289181,  0.58996881, -0.14113007, -1.5272448 ,
        -0.3146103 , -0.0232462 ,  1.05861793, -0.99307838,  0.48446933]])

In [4]:
y[:1]

array([[-43.07396092,  -4.62859923]])

In [5]:
X_train = X[:1980].copy()
X_test = X[1980:].copy()

y_train = y[:1980].copy()
y_test = y[1980:].copy()

#### MultiOutput Regression Models

In [6]:
from sklearn.linear_model import LinearRegression

In [7]:
Linear_Regression_Model = LinearRegression()
Linear_Regression_Model.fit(X_train,y_train)

In [8]:
Linear_Regression_Model.predict(X_test)

array([[-219.70657932, -318.13030553],
       [ 178.59266639,  177.43660616],
       [  16.69658858,  -23.83905624],
       [ -40.35976192,   29.72635948],
       [   4.07515827,   99.58276891],
       [ -41.45985513, -127.51875482],
       [ 149.75617937,   59.5990617 ],
       [ 113.03195477,  104.83605366],
       [  43.22062367, -127.66191744],
       [-175.02781024,  -30.05178899],
       [ -32.57009377,  -23.63012091],
       [ 270.35156777,   46.08468497],
       [  68.91106594,  121.89906376],
       [  22.29098272,   -8.11033909],
       [  47.86688858,  -60.11281237],
       [-152.65587863,  -28.81317293],
       [ -30.93099299,   13.32402427],
       [ -86.43031384,  -51.10085638],
       [  58.92303822,    1.49236708],
       [ 194.13566881,   84.50052725]])

#### CrossValidation

In [10]:
from sklearn.model_selection import cross_val_score

In [None]:
scores = cross_val_score(Linear_Regression_Model,
                         X_test,y_test,
                         scoring='neg_mean_squared_error', # the closer to 0, the better
                         cv=5) # how many splits
scores 

array([-1.18740921e-25, -5.55168751e-26, -2.28406786e-26, -8.19847362e-26,
       -2.17742435e-26])

#### Example of how to convert single output regressor into multioutput regressor

In [19]:
from sklearn.svm import LinearSVR

'''LinearSVR is Linear Support Vector Regression - a machine learning model for regression tasks (predicting continuous values).
What it is:

SVM-based regression model that finds the best linear relationship between features and target
Uses support vector machine principles adapted for regression instead of classification
Creates a linear decision boundary (like linear regression) but with SVM's approach

How it works:

Tries to fit a line/hyperplane through your data
Uses an epsilon-insensitive loss function - it ignores errors smaller than epsilon (tolerance)
Only "cares about" data points that are more than epsilon away from the predicted line
These important points become support vectors


Compared to Linear Regression:

Linear Regression: Minimizes sum of squared errors for all points
LinearSVR: Only penalizes errors larger than epsilon, more robust to outliers

When to use:

When you want robustness to outliers
When you have noisy data with some bad data points
When you want the regularization benefits of SVM
For high-dimensional data where SVM techniques shine'''

In [None]:
svregressor = LinearSVR()
svregressor.fit(X_train,y_train) # this will generate shape error, as it doesn't natively support multioutput

ValueError: y should be a 1d array, got an array of shape (1980, 2) instead.

In [21]:
# here is an alternative approach to make it work
from sklearn.multioutput import MultiOutputRegressor

In [22]:
MultiOutput_SVR_Regressor = MultiOutputRegressor(svregressor)
MultiOutput_SVR_Regressor.fit(X_train,y_train)


In [24]:
MultiOutput_SVR_Regressor.predict(X_test)

array([[-219.70657932, -318.13030553],
       [ 178.59266639,  177.43660616],
       [  16.69658858,  -23.83905624],
       [ -40.35976192,   29.72635948],
       [   4.07515827,   99.58276891],
       [ -41.45985513, -127.51875482],
       [ 149.75617937,   59.5990617 ],
       [ 113.03195477,  104.83605366],
       [  43.22062367, -127.66191744],
       [-175.02781024,  -30.05178899],
       [ -32.57009377,  -23.63012091],
       [ 270.35156777,   46.08468497],
       [  68.91106594,  121.89906376],
       [  22.29098272,   -8.11033909],
       [  47.86688858,  -60.11281237],
       [-152.65587863,  -28.81317293],
       [ -30.93099299,   13.32402427],
       [ -86.43031384,  -51.10085638],
       [  58.92303822,    1.49236708],
       [ 194.13566881,   84.50052725]])

In [27]:
y_test

array([[-219.70657932, -318.13030553],
       [ 178.59266639,  177.43660616],
       [  16.69658858,  -23.83905624],
       [ -40.35976192,   29.72635948],
       [   4.07515827,   99.58276891],
       [ -41.45985513, -127.51875482],
       [ 149.75617937,   59.5990617 ],
       [ 113.03195477,  104.83605366],
       [  43.22062367, -127.66191744],
       [-175.02781024,  -30.05178899],
       [ -32.57009377,  -23.63012091],
       [ 270.35156777,   46.08468497],
       [  68.91106594,  121.89906376],
       [  22.29098272,   -8.11033909],
       [  47.86688858,  -60.11281237],
       [-152.65587863,  -28.81317293],
       [ -30.93099299,   13.32402427],
       [ -86.43031384,  -51.10085638],
       [  58.92303822,    1.49236708],
       [ 194.13566881,   84.50052725]])

but it seems we can not use cross_val_score directly with the wrapper

In [None]:

scores1 = cross_val_score(MultiOutput_SVR_Regressor,X_test,y_test,cv=5,scoring='neg_mean_squared_error')
scores1

array([-21188.88268988,  -6580.54719893, -11316.77381373,  -3971.08003006,
        -4486.04723329])