In [2]:
from sklearn import datasets
from sklearn import linear_model
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error, r2_score

In [3]:
# Import Diabetes dataset
diabetes = datasets.load_diabetes()
x = diabetes.data
y = diabetes.target
features = diabetes.feature_names
n_features = len(features)

In [5]:
###### Train linear regression model
regr = linear_model.LinearRegression()
regr.fit(x, y)
print("Coeficientes del modelo: ", regr.coef_)

y_pred = regr.predict(x)
print('MSE: ', mean_squared_error(y, y_pred))
print("R^2: ", r2_score(y, y_pred))

Coeficientes del modelo:  [ -10.0098663  -239.81564367  519.84592005  324.3846455  -792.17563855
  476.73902101  101.04326794  177.06323767  751.27369956   67.62669218]
MSE:  2859.69634758675
R^2:  0.5177484222203499


In [7]:
# k-fold cross-validation
n_folds = 5
kf = KFold(n_splits=n_folds, shuffle = True)

mse = 0
r2 = 0
for train_index, test_index in kf.split(x):

    # Training phase
    x_train = x[train_index, :]
    y_train = y[train_index]

    regr_cv = linear_model.LinearRegression()
    regr_cv.fit(x_train, y_train)

    # Test phase
    x_test = x[test_index, :]
    y_test = y[test_index]    

    y_pred = regr_cv.predict(x_test)

    # Calculate MSE and R^2    
    mse_i = mean_squared_error(y_test, y_pred)
    print('mse = ', mse_i)

    r2_i = r2_score(y_test, y_pred)
    print('r^2= ', r2_i)

    mse += mse_i 
    r2 += r2_i 

mse = mse/n_folds
print('MSE = ', mse)

r2 = r2/n_folds
print('R^2 = ', r2)

mse =  3269.0293869981106
r^2=  0.478240456192095
mse =  2892.604434642402
r^2=  0.548010531830972
mse =  2613.372915882429
r^2=  0.5446668291108467
mse =  2730.1532432218023
r^2=  0.48571572641296623
mse =  3447.131208779765
r^2=  0.41260653632253963
MSE =  2990.4582379049016
R^2 =  0.49384801597388395


In [9]:
# k-folds cross-validation using cross_val_predict
from sklearn.model_selection import cross_val_predict
n_folds = 5
regr = linear_model.LinearRegression()
y_pred = cross_val_predict(regr, x, y, cv=n_folds)

print('MSE: ', mean_squared_error(y, y_pred))
print("R^2: ", r2_score(y, y_pred))

MSE:  2992.679946593995
R^2:  0.49532242216821853
