# Random regression problems

In [17]:
import numpy as np
import pandas as pd

In [18]:
# synthetic dataset for simple regression
from sklearn.datasets import make_regression

X_R1, y_R1 = make_regression(n_samples = 200, n_features=1,
                            n_informative=1, bias = 150.0,
                            noise = 30, random_state=0)

In [23]:
%matplotlib notebook
import seaborn as sn
import matplotlib.pyplot as plt

plt.figure()
plt.title('Simple regression problem with one input variable')

plt.scatter(X_R1, y_R1, marker= 'o', s=50)
plt.show()

<IPython.core.display.Javascript object>

In [24]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X_R1, y_R1,
                                                   random_state = 0)

from sklearn.linear_model import LinearRegression

linreg = LinearRegression().fit(X_train, y_train)

print('linear model coeff (w): {}'
     .format(linreg.coef_))
print('linear model intercept (b): {:.3f}'
     .format(linreg.intercept_))
print('R-squared score (training): {:.3f}'
     .format(linreg.score(X_train, y_train)))
print('R-squared score (test): {:.3f}'
     .format(linreg.score(X_test, y_test)))

linear model coeff (w): [97.43720625]
linear model intercept (b): 145.065
R-squared score (training): 0.922
R-squared score (test): 0.900


In [25]:
# Plot the predicted line through linear regression

plt.figure(figsize=(5,4))
plt.scatter(X_R1, y_R1, marker= 'o', s=50, alpha=0.8)
plt.plot(X_R1, linreg.coef_ * X_R1 + linreg.intercept_, 'r-')
plt.title('Least-squares linear regression')
plt.xlabel('Feature value (x)')
plt.ylabel('Target value (y)')
plt.show()

<IPython.core.display.Javascript object>

In [26]:
# synthetic dataset for more complex regression
from sklearn.datasets import make_friedman1

X_F1, y_F1 = make_friedman1(n_samples = 200,
                           n_features = 7, random_state=0)

In [27]:
plt.figure()
plt.title('Complex regression problem with one input variable')


plt.scatter(X_F1[:, 2], y_F1, marker= 'o', s=50)
plt.show()

<IPython.core.display.Javascript object>

In [28]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X_F1, y_F1,
                                                   random_state = 0)

from sklearn.linear_model import LinearRegression


linreg = LinearRegression().fit(X_train, y_train)

print('linear model coeff (w): {}'
     .format(linreg.coef_))
print('linear model intercept (b): {:.3f}'
     .format(linreg.intercept_))
print('R-squared score (training): {:.3f}'
     .format(linreg.score(X_train, y_train)))
print('R-squared score (test): {:.3f}'
     .format(linreg.score(X_test, y_test)))

linear model coeff (w): [ 4.55883162  8.09721141 -0.30963184  8.93653429  5.05658949 -0.43856919
  0.78564894]
linear model intercept (b): 1.285
R-squared score (training): 0.754
R-squared score (test): 0.672
