# This Jupyter notebook contains the classes that are useful for the home assignment

### 1.0 OLS class

In [None]:
class OLS:

    """
    This code implements a simple OLS regression. The inputs Y and X must be in numpy matrix format. 
    """
    
    def __init__(self, Y, X):
        #Initialize the dependent and independent variables
        self.Y, self.X = Y, X 
        
        # terminate and produce error message, if Y or X are of wrong type
        if isinstance(X,np.ndarray) == False:
            raise TypeError('X is not a numpy ndarray!')
        if isinstance(Y,np.ndarray) == False:
            raise TypeError('Y is not a numpy ndarray!')

    def estimate(self):
        #unpack Y and X
        Y, X = self.Y, self.X
        
        #Estimate the beta coefficients
        self.beta = np.linalg.inv(X.T @ X) @ (X.T @ Y)
        
        #compute the regression residuals
        eps = Y - X @ self.beta
        
        #compute the residual variance
        s_hat = 1/(Y.shape[0]-self.beta.shape[0]) * eps.T @ eps
        
        #compute the standard errors
        self.se = np.sqrt(np.diag(np.linalg.inv(X.T @ X) * s_hat.item())).reshape((self.beta.shape[0],1))
        
        #compute t-statistic for standard hypothesis test
        t = np.abs(self.beta/self.se)

        #compute p-values for standard hypothesis test
        p_vals = 2*(1-stats.norm.cdf(np.abs(t)))
             
        #compute confidence intervals
        CI_upper = self.beta + stats.norm.ppf(0.975)*self.se
        CI_lower = self.beta - stats.norm.ppf(0.975)*self.se
        
        MSE = np.mean(np.square(eps),axis=0)
        self.R2 = 1 - MSE/np.var(Y)
        self.eps = eps
        self.s_hat = s_hat
        
        #generate an output table
        outmat = np.hstack((self.beta,self.se,t,p_vals,CI_lower,CI_upper))
        table = pd.DataFrame(outmat)
        table.columns =['beta', 'se','t-statistic','p-value','CI - lower','CI - upper'] 
        
        return table
    

### 2.0 PCA class

In [None]:
class PCA:
    
    def __init__(self, X):
        self.X = X

    def get_components(self, X = None, kk = 1, λ = None):
        
        # build the function such that it can also back out the factors given the loadings for arbitrary samples
        if X is None:
            X = self.X
            
        if λ is not None:
            # back out the size of the data
            [t,n] = X.shape
            
            self.λ = λ
            self.f = X @ self.λ /n
        else:
            #X = self.X
            
            # back out the size of the data
            [t,n] = X.shape

            # since the data is standardized, we can compute the variance-covariance matrix like so
            covm = X.T @ X

            # now we extract the eigenvectors and eigenvalues
            w,v = np.linalg.eig(covm)

            # sort in descending order
            sort_id = np.flipud(np.argsort(w))
            v = v[:,sort_id]
            # diagonalize eigenvalues
            w = np.diag(w[sort_id])
    
            # compute the factor loadings
            self.λ = np.sqrt(n)*v[:,0:kk]

            # compute the principal components
            self.f = X @ self.λ /n
            
        self.λ = self.λ.real
        self.f = self.f.real

### 3.0 Miscellaneous 
#### 3.1 Exercise B:

In [None]:
# read in the data sheet
varekonsum = pd.read_excel('varekonsum.xlsx', usecols="C:IT", header=3).T
varekonsum = varekonsum.dropna(axis=1)

# adjust columnlabel
varekonsum.columns =['varekonsum']

# replace the index with a machine readable format
varekonsum.index = pd.date_range(start='31/01/2000', end='31/12/2020', freq='M')

#### 3.2 Exercise C:

In [None]:
# set company name as index
financial_data = financial_data.set_index('Unnamed: 0')

# convert sector to categorical
financial_data["Sector"] = financial_data["Sector"].astype('category')
financial_data["Sector"] = financial_data["Sector"].cat.codes