#In statistics, ordinary least squares (OLS) is a type of linear least squares method for estimating the unknown parameters in a linear regression model. OLS chooses the parameters of a linear function of a set of explanatory variables by the principle of least squares: minimizing the sum of the squares of the differences between the observed dependent variable (values of the variable being observed) in the given dataset and those predicted by the linear function.

In [None]:
import numpy as np   # Array # Numerical Operation 
import pandas as pd   # Tabular Anaylysis
import matplotlib.pyplot as plt   # Data Visual
import statsmodels.api as sm  # Statics / ML 

In [None]:
# Load the data from a .csv in the same folder
data = pd.read_csv('Simple linear regression.csv')

In [None]:
data

In [None]:
data.head()

In [None]:
data.tail()

In [None]:
data.sample(5)

In [None]:
data.info()

In [None]:
data.describe()

In [None]:
# Following the regression equation, our dependent variable (y) is the GPA
y = data ['Price']
# Similarly, our independent variable (x) is the SAT score
x1 = data ['Size']

In [None]:
y

In [None]:
x1

In [None]:
# Plot a scatter plot (first we put the horizontal axis, then the vertical axis)
plt.scatter(x1,y)
# Name the axes
plt.xlabel('Size', fontsize = 20)
plt.ylabel('Price', fontsize = 20)
# Show the plot
plt.show()

In [None]:
# Add a constant. Essentially, we are adding a new column (equal in lenght to x), which consists only of 1s
x = sm.add_constant(x1)
# Fit the model, according to the OLS (ordinary least squares) method with a dependent variable y and an idependent x
results = sm.OLS(y,x).fit()
# Print a nice summary of the regression. That's one of the strong points of statsmodels -> the summaries
results.summary()

In [None]:
# c= 0.2750
# m = 0.0017

In [None]:
# Create a scatter plot
plt.scatter(x1,y)
# Define the regression equation, so we can plot it later
yhat = 0.0017*x1 + 0.275
# Plot the regression line against the independent variable (SAT)
fig = plt.plot(x1,yhat, lw=3, c='orange', label ='regression line')
# Label the axes
plt.xlabel('Size', fontsize = 20)
plt.ylabel('Price', fontsize = 20)
plt.show()