# Linnerud

In [1]:
from sklearn import datasets
ds = datasets.load_linnerud()
print(ds.DESCR)

.. _linnerrud_dataset:

Linnerrud dataset
-----------------

**Data Set Characteristics:**

    :Number of Instances: 20
    :Number of Attributes: 3
    :Missing Attribute Values: None

The Linnerud dataset is a multi-output regression dataset. It consists of three
excercise (data) and three physiological (target) variables collected from
twenty middle-aged men in a fitness club:

- *physiological* - CSV containing 20 observations on 3 physiological variables:
   Weight, Waist and Pulse.
- *exercise* - CSV containing 20 observations on 3 exercise variables:
   Chins, Situps and Jumps.

.. topic:: References

  * Tenenhaus, M. (1998). La regression PLS: theorie et pratique. Paris:
    Editions Technic.





In [2]:
import pandas as pd
X = pd.DataFrame(ds.data,columns = ds.feature_names)
X.head(10)

Unnamed: 0,Chins,Situps,Jumps
0,5.0,162.0,60.0
1,2.0,110.0,60.0
2,12.0,101.0,101.0
3,12.0,105.0,37.0
4,13.0,155.0,58.0
5,4.0,101.0,42.0
6,8.0,101.0,38.0
7,6.0,125.0,40.0
8,15.0,200.0,40.0
9,17.0,251.0,250.0


In [5]:
y = ds.target
y

array([[191.,  36.,  50.],
       [189.,  37.,  52.],
       [193.,  38.,  58.],
       [162.,  35.,  62.],
       [189.,  35.,  46.],
       [182.,  36.,  56.],
       [211.,  38.,  56.],
       [167.,  34.,  60.],
       [176.,  31.,  74.],
       [154.,  33.,  56.],
       [169.,  34.,  50.],
       [166.,  33.,  52.],
       [154.,  34.,  64.],
       [247.,  46.,  50.],
       [193.,  36.,  46.],
       [202.,  37.,  62.],
       [176.,  37.,  54.],
       [157.,  32.,  52.],
       [156.,  33.,  54.],
       [138.,  33.,  68.]])

In [7]:
X.isnull().sum().sum()

0

In [8]:
X.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20 entries, 0 to 19
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Chins   20 non-null     float64
 1   Situps  20 non-null     float64
 2   Jumps   20 non-null     float64
dtypes: float64(3)
memory usage: 608.0 bytes


In [9]:
X.describe()

Unnamed: 0,Chins,Situps,Jumps
count,20.0,20.0,20.0
mean,9.45,145.55,70.3
std,5.286278,62.566575,51.27747
min,1.0,50.0,25.0
25%,4.75,101.0,39.5
50%,11.5,122.5,54.0
75%,13.25,210.0,85.25
max,17.0,251.0,250.0


In [10]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = .2)
X_train.shape,X_test.shape,y_train.shape,y_test.shape

((16, 3), (4, 3), (16, 3), (4, 3))

In [11]:
y_train

array([[157.,  32.,  52.],
       [162.,  35.,  62.],
       [193.,  36.,  46.],
       [176.,  37.,  54.],
       [182.,  36.,  56.],
       [189.,  37.,  52.],
       [154.,  34.,  64.],
       [176.,  31.,  74.],
       [191.,  36.,  50.],
       [156.,  33.,  54.],
       [189.,  35.,  46.],
       [138.,  33.,  68.],
       [202.,  37.,  62.],
       [169.,  34.,  50.],
       [167.,  34.,  60.],
       [211.,  38.,  56.]])

In [13]:
from sklearn import preprocessing
scaler = preprocessing.StandardScaler()
X_train_std = scaler.fit_transform(X_train)
X_test_std = scaler.transform(X_test)

In [14]:
from sklearn.linear_model import LinearRegression
LinearRegressionObject = LinearRegression(normalize = True)
LinearRegressionObject.fit(X_train,y_train)

LinearRegression(normalize=True)

In [15]:
from sklearn.metrics import mean_squared_error
y_pred = LinearRegressionObject.predict(X_test)
mean_squared_error(y_test,y_pred)

680.9700394049933

In [16]:
from sklearn.metrics import r2_score
r2_score(y_test,y_pred)

-1.3144541247917503

In [17]:
LinearRegressionObject.score(X_test,y_test)

-1.3144541247917503