# OLS: Ordinary Least Squares Regression

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression

In [None]:
X = np.array([58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72]).reshape((-1, 1))
y = np.array([115, 117, 120, 123, 126, 129, 132, 135, 139, 142, 146, 150, 154, 159, 164])

In [None]:
X

In [None]:
y

In [None]:
# Splitting the dataset into the Training set and Test set
from sklearn.model_selection import train_test_split 
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

Code explanation:

 - **test_size** = 0.2: we will split our dataset (10 observations) into 2 parts (training set, test set) and the ratio of test set compare to dataset is 0.2 (2 observations will be put into the test set. You can put it 1/5 to get 20% or 0.2, they are the same. We should not let the test set too big; if it’s too big, we will lack of data to train. Normally, we should pick around 5% to 30%.
 - **train_size**: if we use the test_size already, the rest of data will automatically be assigned to train_size.
 - **random_state**: this is the seed for the random number generator. We can put an instance of the RandomState class as well. If we leave it blank or 0, the RandomState instance used by np.random will be used instead.

In [None]:
# Fitting Linear Regression to the dataset
from sklearn.linear_model import LinearRegression
lin_reg = LinearRegression()
lin_reg.fit(X, y)

In [None]:
#y = b + mX

In [None]:
# slope
lin_reg.coef_

In [None]:
# intercept
lin_reg.intercept_

In [None]:
#y = -87.52 + 3.45X

In [None]:
# Visualizing the Linear Regression results
def viz_linear():
    plt.scatter(X, y, color='red')
    plt.plot(X, lin_reg.predict(X), color='blue')
    plt.title('OLS Model')
    plt.xlabel('X')
    plt.ylabel('Y')
    plt.show()
    return
viz_linear()

In [None]:
#pip install statsmodels

In [None]:
import statsmodels.api as sm
from statsmodels.formula.api import ols

In [None]:
data = {'X': [58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72],
        'y': [115, 117, 120, 123, 126, 129, 132, 135, 139, 142, 146, 150, 154, 159, 164]
        }
df = pd.DataFrame(data)
df

In [None]:
#~ = ALT + 126

In [None]:
#fit simple linear regression model
model = ols('y ~ X', data=df).fit()

In [None]:
#view model summary
print(model.summary())

In [None]:
#define figure size
fig = plt.figure(figsize=(12,8))

#produce regression plots
fig = sm.graphics.plot_regress_exog(model, 'X', fig=fig)

In [None]:
model_quad = ols(formula = 'y ~ X + I(X**2)', data = df).fit()

In [None]:
#view model summary
print(model_quad.summary())

In [None]:
#y = 261.87 - 7.35X + 0.0831X^2

In [None]:
#define figure size
fig = plt.figure(figsize=(12,8))

#produce regression plots
fig = sm.graphics.plot_regress_exog(model_quad, 'X', fig=fig)

In [None]:
# Visualising the Linear Regression results
plt.scatter(X, y, color = 'blue')
 
plt.plot(X, lin_reg.predict(X), color = 'red')
plt.title('Linear Regression')
plt.xlabel('X')
plt.ylabel('Y')
 
plt.show()

In [None]:
# Fitting Quadratic Regression to the dataset
from sklearn.preprocessing import PolynomialFeatures
 
poly = PolynomialFeatures(degree = 2)
X_poly = poly.fit_transform(X)
 
poly.fit(X_poly, y)
lin2 = LinearRegression()
lin2.fit(X_poly, y)

In [None]:
# Visualising the Quadratic Regression results
plt.scatter(X, y, color = 'blue')
 
plt.plot(X, lin2.predict(poly.fit_transform(X)), color = 'red')
plt.title('Quadratic Regression')
plt.xlabel('X')
plt.ylabel('y')
 
plt.show()

In [None]:
# Print confidence intervals
from statsmodels.sandbox.regression.predstd import wls_prediction_std
from sklearn.preprocessing import PolynomialFeatures
polynomial_features= PolynomialFeatures(degree=2)


xp = polynomial_features.fit_transform(X)
xp.shape

model = sm.OLS(y, xp).fit()
ypred = model.predict(xp) 
_, upper,lower = wls_prediction_std(model)

plt.scatter(X,y)
plt.plot(X,ypred)
plt.plot(X,upper,'--',label="Upper") # confid. intrvl
plt.plot(X,lower,':',label="lower")
plt.legend(loc='upper left')
