## Classes for Data Science: a linear regression example

We will try and do a regression analysis to predict the weight of children from their age. We will generate some fake data for children between 0 and 12 months of age.

In [None]:
import numpy as np

In [None]:
# generate fake data
babies = range(10)
months = np.arange(13)
AGE_TO_WEIGHT_COEFF: float = 24.7
data = [(month, np.dot(month, AGE_TO_WEIGHT_COEFF) + 96 + np.random.normal(loc=0, scale=20))
        for month in months
        for baby in babies]
month_data = [element[0] for element in data]
weight_data = [element[1] for element in data]

In [None]:
fig, axes = plt.subplots(1, 1, figsize=(9, 6))
_ = axes.scatter(month_data, weight_data)

In [None]:
X_pre = np.array(month_data)
X_pre = np.c_[np.ones(X_pre.shape[0]), X_pre] # little trick to add vector of 1's
y_pre = np.array(weight_data)
X_pre

In [None]:
X = np.array(month_data)
print(X)
X_0 = np.ones([X.shape[0], 1])
X_1 = X[:, np.newaxis]
print(X_0)
print(X_1)
X = np.concatenate((X_0, X_1), axis=1)
# X = np.vstack([, ]) # little trick to add vector of 1's
y = np.array(weight_data)
X

In [None]:
np.array_equal(X, X_pre)

In [None]:
def ols(X, y):
    '''returns parameters based on Ordinary Least Squares.'''
    xtx = np.dot(X.T, X) ## x-transpose times x
    inv_xtx = np.linalg.inv(xtx) ## inverse of x-transpose times x
    xty = np.dot(X.T, y) ## x-transpose times y
    return np.dot(inv_xtx, xty)

In [None]:
# find parameters
params = ols(X,y)
print('intercept: {} | slope: {}'.format(params[0], params[1]))

In [None]:
## here we draw a graph of the linear estimator
import matplotlib.pyplot as plt
%matplotlib inline

fig, axes = plt.subplots(1, 1, figsize=(9, 6))
axes.scatter(month_data, weight_data)
x_vals = np.array(axes.get_xlim())
y_vals = params[0] + params[1] * x_vals
axes.plot(x_vals, y_vals, 'r--')
plt.title('Linear Regression example')
plt.xlabel('Age (Months)')
plt.ylabel('Weight (?)')

In [None]:
class Regressor:
    
    def fit():
        print("Regressor.fit() - override me")
        
    def predict():
        print("Regressor.predict() - override me")

class LinearRegressor(Regressor):
    
    def __init__(self):
        self.coef = None
        self.slope = None

    
    def fit(self, X, y):
        """
        Fit model coefficients.

        Arguments:
        X: 1D or 2D numpy array 
        y: 1D numpy array
        """     
        # check if X is 1D or 2D array
        # if it's 1D add one dimension
        ...
        # implement the closed form solution for OLS
        ...      
        # set coef and slope attributes
        ...

            
    def predict(self, X):
        """
        Output model prediction.

        Arguments:
        X: 1D or 2D numpy array 
        """       
        # check if X is 1D or 2D array
        if len(X.shape) == 1:
            X = X.reshape(-1,1) 
        return self.intercept + np.dot(X, self.slope)
    

In [None]:
regressor = LinearRegressor()
regressor.fit(np.array(month_data), np.array(weight_data))
print('intercept: {} | slope: {}'.format(regressor.intercept, regressor.slope))
regressor.predict(np.arange(4))