# An example of linear regression

Based on http://scikit-learn.org/stable/auto_examples/linear_model/plot_ols.html

## Import some needed packages

In [None]:
import matplotlib.pyplot as plt
import numpy as np
from sklearn import datasets, linear_model
from sklearn.metrics import mean_squared_error, r2_score

## Load a dataset (the diabetes dataset)

In [None]:
diabetes = datasets.load_diabetes()

In [None]:
print(diabetes)

### Select only one feature

In [None]:
diabetes_X = diabetes.data[:, np.newaxis, 2]
print(diabetes_X)

## Plot the dataset

In [None]:
plt.scatter(diabetes_X, diabetes.target,  color='black')
plt.show()

### Split the data into training/testing sets

In [None]:
diabetes_X_train = diabetes_X[:-20]
diabetes_X_test = diabetes_X[-20:]
diabetes_y_train = diabetes.target[:-20]
diabetes_y_test = diabetes.target[-20:]
print(diabetes_X_test)

In [None]:
print(diabetes_X_test)

## Create a linear regression model

In [None]:
regr = linear_model.LinearRegression()

## Train the model using the training sets

In [None]:
regr.fit(diabetes_X_train, diabetes_y_train)

## Make predictions using the testing set

In [None]:
diabetes_y_pred = regr.predict(diabetes_X_test)

## Coefficients & mean squared error:

In [None]:
print('Coefficients: \n', regr.coef_)
# The mean squared error
print("Mean squared error: %.2f"
      % mean_squared_error(diabetes_y_test, diabetes_y_pred))
# Explained variance score: 1 is perfect prediction
print('Variance score: %.2f' % r2_score(diabetes_y_test, diabetes_y_pred))

## Plot outputs

In [None]:
plt.scatter(diabetes_X_test, diabetes_y_test,  color='black')
plt.plot(diabetes_X_test, diabetes_y_pred, color='blue', linewidth=3)

#plt.xticks(())
#plt.yticks(())

plt.show()

In [None]:
plt.scatter(diabetes_X, diabetes.target,  color='black')
plt.plot(diabetes_X_test, diabetes_y_pred, color='blue', linewidth=3)
plt.show()