<a href="https://colab.research.google.com/github/Daivar/ML-models-training/blob/main/%E2%80%9ESynthetic_problem_ipynb%E2%80%9C.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Synthetic problem

In [1]:
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.multioutput import MultiOutputRegressor

X, y = make_regression(n_targets=2, noise=0.9)
print('Feature vector:', X.shape)
print('Target vector:', y.shape)

X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8)

print('Build and fit a regressor model...')

# model = LinearRegression().fit(X_train, y_train)
model = MultiOutputRegressor(LinearRegression()).fit(X_train, y_train)
score = model.score(X_test, y_test)

print('Done. Score', score)

Feature vector: (100, 100)
Target vector: (100, 2)
Build and fit a regressor model...
Done. Score 0.8842482012115989


In [2]:
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.multioutput import MultiOutputRegressor
from sklearn.metrics import r2_score

lr_results = []
mr_results = []
for i in range(1, 10):
  X, y = make_regression(n_targets=2, noise=0.2)
  X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8)

  lr_model = LinearRegression().fit(X_train, y_train)
  mr_model = MultiOutputRegressor(LinearRegression()).fit(X_train, y_train)
  lr_results.append(r2_score(lr_model.predict(X_test), y_test, multioutput='uniform_average')) # multioutput='variance_weighted' ('raw_values', 'uniform_average', 'variance_weighted')
  mr_results.append(r2_score(mr_model.predict(X_test), y_test, multioutput='uniform_average'))
  

print(f'LR: {sum(lr_results) / len(lr_results)}')
print(f'MR: {sum(mr_results) / len(lr_results)}')

LR: 0.6562422278762172
MR: 0.6558057289492178


In [3]:
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.multioutput import MultiOutputRegressor, RegressorChain
from sklearn.metrics import r2_score

lr_results = []
mr_results = []
rc_results = []
for i in range(1, 10):
  X, y = make_regression(n_targets=2, noise=0.2)
  X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8)

  lr_model = LinearRegression().fit(X_train, y_train)
  mr_model = MultiOutputRegressor(LinearRegression()).fit(X_train, y_train)
  rc_model = RegressorChain(LinearRegression()).fit(X_train, y_train)
  lr_results.append(r2_score(lr_model.predict(X_test), y_test, multioutput='uniform_average')) # multioutput='variance_weighted' ('raw_values', 'uniform_average', 'variance_weighted')
  mr_results.append(r2_score(mr_model.predict(X_test), y_test, multioutput='uniform_average'))
  rc_results.append(r2_score(rc_model.predict(X_test), y_test, multioutput='uniform_average'))
  

print(f'LR: {sum(lr_results) / len(lr_results)}')
print(f'MR: {sum(mr_results) / len(mr_results)}')
print(f'RC: {sum(rc_results) / len(rc_results)}')

LR: 0.7037400976376254
MR: 0.7036505975459657
RC: 0.7030055122040919


In [4]:
import numpy as np
from sklearn.datasets import load_linnerud
from sklearn.multioutput import MultiOutputRegressor
from sklearn.linear_model import Ridge

def print_paired(p1, p2):
  print("-----")
  for i in zip(p1, p2):
    print(f'{i[0]} -> {i[1]}')

X, y = load_linnerud(return_X_y=True)
print_paired(X, y)

clf = Ridge(random_state=123).fit(X, y)
# clf = MultiOutputRegressor(Ridge(random_state=123)).fit(X, y)
pred = clf.predict(X[[0]])
print_paired(X[[0]], pred)

print(clf.score(X, y))

-----
[  5. 162.  60.] -> [191.  36.  50.]
[  2. 110.  60.] -> [189.  37.  52.]
[ 12. 101. 101.] -> [193.  38.  58.]
[ 12. 105.  37.] -> [162.  35.  62.]
[ 13. 155.  58.] -> [189.  35.  46.]
[  4. 101.  42.] -> [182.  36.  56.]
[  8. 101.  38.] -> [211.  38.  56.]
[  6. 125.  40.] -> [167.  34.  60.]
[ 15. 200.  40.] -> [176.  31.  74.]
[ 17. 251. 250.] -> [154.  33.  56.]
[ 17. 120.  38.] -> [169.  34.  50.]
[ 13. 210. 115.] -> [166.  33.  52.]
[ 14. 215. 105.] -> [154.  34.  64.]
[ 1. 50. 50.] -> [247.  46.  50.]
[ 6. 70. 31.] -> [193.  36.  46.]
[ 12. 210. 120.] -> [202.  37.  62.]
[ 4. 60. 25.] -> [176.  37.  54.]
[ 11. 230.  80.] -> [157.  32.  52.]
[ 15. 225.  73.] -> [156.  33.  54.]
[  2. 110.  43.] -> [138.  33.  68.]
-----
[  5. 162.  60.] -> [176.16484296  35.0548407   57.09000136]
0.2968777763173123


In [5]:
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.multioutput import MultiOutputRegressor, RegressorChain
from sklearn.metrics import r2_score

def print_paired(p1, p2):
  print("-----")
  for i in zip(p1, p2):
    print(f'{i[0]} -> {i[1]}')

X, y = load_linnerud(return_X_y=True)
print_paired(X, y)

# clf = Ridge(random_state=123).fit(X, y)
clf = MultiOutputRegressor(Ridge(random_state=123)).fit(X, y)
pred = clf.predict(X[[0]])
print_paired(X[[0]], pred)

print(clf.score(X, y))

-----
[  5. 162.  60.] -> [191.  36.  50.]
[  2. 110.  60.] -> [189.  37.  52.]
[ 12. 101. 101.] -> [193.  38.  58.]
[ 12. 105.  37.] -> [162.  35.  62.]
[ 13. 155.  58.] -> [189.  35.  46.]
[  4. 101.  42.] -> [182.  36.  56.]
[  8. 101.  38.] -> [211.  38.  56.]
[  6. 125.  40.] -> [167.  34.  60.]
[ 15. 200.  40.] -> [176.  31.  74.]
[ 17. 251. 250.] -> [154.  33.  56.]
[ 17. 120.  38.] -> [169.  34.  50.]
[ 13. 210. 115.] -> [166.  33.  52.]
[ 14. 215. 105.] -> [154.  34.  64.]
[ 1. 50. 50.] -> [247.  46.  50.]
[ 6. 70. 31.] -> [193.  36.  46.]
[ 12. 210. 120.] -> [202.  37.  62.]
[ 4. 60. 25.] -> [176.  37.  54.]
[ 11. 230.  80.] -> [157.  32.  52.]
[ 15. 225.  73.] -> [156.  33.  54.]
[  2. 110.  43.] -> [138.  33.  68.]
-----
[  5. 162.  60.] -> [176.16484296  35.0548407   57.09000136]
0.29687777631731227


In [8]:
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.multioutput import MultiOutputRegressor, RegressorChain
from sklearn.metrics import r2_score

def print_paired(p1, p2):
  print("-----")
  for i in zip(p1, p2):
    print(f'{i[0]} -> {i[1]}')

X, y = load_linnerud(return_X_y=True)
print_paired(X, y)

# clf = Ridge(random_state=123).fit(X, y)
# clf = MultiOutputRegressor(Ridge(random_state=123)).fit(X, y)
# clf = RegressorChain(LinearRegression()).fit(X_train, y_train)
clf = RegressorChain(Ridge(random_state=123)).fit(X, y)
pred = clf.predict(X[[0]])
print_paired(X[[0]], pred)

print(clf.score(X, y))


-----
[  5. 162.  60.] -> [191.  36.  50.]
[  2. 110.  60.] -> [189.  37.  52.]
[ 12. 101. 101.] -> [193.  38.  58.]
[ 12. 105.  37.] -> [162.  35.  62.]
[ 13. 155.  58.] -> [189.  35.  46.]
[  4. 101.  42.] -> [182.  36.  56.]
[  8. 101.  38.] -> [211.  38.  56.]
[  6. 125.  40.] -> [167.  34.  60.]
[ 15. 200.  40.] -> [176.  31.  74.]
[ 17. 251. 250.] -> [154.  33.  56.]
[ 17. 120.  38.] -> [169.  34.  50.]
[ 13. 210. 115.] -> [166.  33.  52.]
[ 14. 215. 105.] -> [154.  34.  64.]
[ 1. 50. 50.] -> [247.  46.  50.]
[ 6. 70. 31.] -> [193.  36.  46.]
[ 12. 210. 120.] -> [202.  37.  62.]
[ 4. 60. 25.] -> [176.  37.  54.]
[ 11. 230.  80.] -> [157.  32.  52.]
[ 15. 225.  73.] -> [156.  33.  54.]
[  2. 110.  43.] -> [138.  33.  68.]
-----
[  5. 162.  60.] -> [176.16484296  35.0548407   57.09000136]
0.29687777631731227


In [9]:
from sklearn.multioutput import MultiOutputRegressor, RegressorChain
from sklearn.linear_model import Ridge

ridge = Ridge(random_state=1)

chained = RegressorChain(ridge, order=[1, 2, 0])
chained.fit(X, y)
pred = chained.predict(X[[0]])
print_paired(X[[0]], pred)
print(chained.score(X,y))
print(chained.estimators_)

-----
[  5. 162.  60.] -> [176.16484296  35.0548407   57.09000136]
0.2968777763173123
[Ridge(random_state=1), Ridge(random_state=1), Ridge(random_state=1)]


In [11]:
from sklearn.multioutput import MultiOutputRegressor, RegressorChain
from sklearn.linear_model import Ridge

X, y = load_linnerud(return_X_y=True)
ridge = Ridge() # random_state=1
chained = RegressorChain(ridge, order=[0, 1, 2])
chained.fit(X, y)
pred = chained.predict(X[[0]])
print(chained.score(X,y))

print(chained.estimators_)

0.29687777631731227
[Ridge(), Ridge(), Ridge()]
