# Single-Variable Regression

We're not concerned with classifying features with a fixed set of labels anymore.  Now, given a real-valued input we want to make a pretty good guess for a real-valued output.

The input variables *don't have to be* independent for this to be useful!

However, we assume that the output depends on the input.

In [2]:
import numpy
from sklearn import linear_model, metrics, model_selection
from matplotlib import pyplot
import ipywidgets as widgets

import sys

sys.path.append("../")

from common import common_widgets

In [3]:
input_file = "data_singlevar_regr.txt"

data = numpy.loadtxt(input_file, delimiter=",")

# This time it literally does mean X and Y
X, Y = data[:, :-1], data[:, -1]

In [4]:
def regress(test_size=0.2, random_state=0):
    split = model_selection.train_test_split(X, Y, test_size=test_size, random_state=random_state)
    X_train, X_test = split[0], split[1]
    Y_train, Y_test = split[2], split[3]
    regressor = linear_model.LinearRegression()

    regressor.fit(X_train, Y_train)

    predictions = regressor.predict(X_test)

    pyplot.scatter(X_test, Y_test, color="green")
    pyplot.plot(X_test, predictions, color="black", linewidth=4)
    pyplot.show()

    print("Linear Regressor Performance:")
    print("\tMean Absolute Error\t= {:.2f}".format(metrics.mean_absolute_error(Y_test, predictions)))
    print("\tMean Squared Error\t= {:.2f}".format(metrics.mean_squared_error(Y_test, predictions)))
    print("\tMedian Absolute Error\t= {:.2f}".format(metrics.median_absolute_error(Y_test, predictions)))
    print("\tExplain Variance Score\t= {:.2f}".format(metrics.explained_variance_score(Y_test, predictions)))
    print("\tR2 Score\t= {:.2f}".format(metrics.r2_score(Y_test, predictions)))

widgets.interactive(
    regress,
    test_size=common_widgets.test_size,
    random_state=common_widgets.random_seed
)

interactive(children=(FloatSlider(value=0.2, continuous_update=False, description='% Test Data', max=0.95, min…