In [6]:
# Setup
from mlwpy import *
%matplotlib inline
diabetes = datasets.load_diabetes()
tts = skms.train_test_split(diabetes.data,
diabetes.target,
test_size=.25,
random_state=42)
(diabetes_train_ftrs, diabetes_test_ftrs,
diabetes_train_tgt,
diabetes_test_tgt) = tts

In [7]:
ape_df = pd.DataFrame({'predicted' : [4, 2, 9],
'actual'
: [3, 5, 7]})
ape_df['error'] = ape_df['predicted'] - ape_df['actual']
ape_df['resid'] = ape_df['actual'] - ape_df['predicted']

In [8]:
def regression_residuals(ax, predicted, actual, show_errors=None, right=False):
	''' figsize -> subplots;
	predicted/actual data -> columns of a DataFrame
	errors -> "all" or sequence of indices '''
	df = pd.DataFrame({'actual': actual, 'predicted': predicted})
	df['error'] = df.actual - df.predicted
	ax.plot(df.predicted, df.error, '.')
	ax.plot(df.predicted, np.zeros_like(predicted), '-')
	if right:
		ax.yaxis.tick_right()
		ax.yaxis.set_label_position("right")
	ax.set_xlabel('Predicted Value')
	ax.set_ylabel('Residual')
	if show_errors == 'all':
		show_errors = range(len(df))
	if show_errors:
		preds = df.predicted.iloc[show_errors]
		errors = df.error.iloc[show_errors]
		ax.vlines(preds, 0, errors, 'r')

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(8, 4))
ax1.plot(ape_df.predicted, ape_df.actual, 'r.', [0, 10], [0, 10], 'b-')  # pred vs actual
# perfect line
ax1.set_xlabel('Predicted')
ax1.set_ylabel('Actual')
regression_residuals(ax2, ape_df.predicted, ape_df.actual, 'all', right=True)

In [9]:
from sklearn import linear_model, neighbors

lr = linear_model.LinearRegression()
knn = neighbors.KNeighborsRegressor()
models = [lr, knn]
fig, axes = plt.subplots(1, 2, figsize=(10, 5), sharex=True, sharey=True)
fig.tight_layout()
for model, ax, on_right in zip(models, axes, [False, True]):
	preds = (model.fit(diabetes_train_ftrs, diabetes_train_tgt)
			 .predict(diabetes_test_ftrs))
	regression_residuals(ax, preds, diabetes_test_tgt, [-20], on_right)
axes[0].set_title('Linear Regression Residuals')
axes[1].set_title('k-NN-Regressor Residuals');

In [10]:
print(diabetes_test_tgt[-20])

280.0
