# JEM092 Asset Pricing - Homework 2
### Summer Semester 2021/2022

### Authors: *Petr Čala, Tereza Čechová, Vilém Krejcar*
___

First things first, we install and load the necessary packages. Then we arbitrarily set a seed for reproducibility.

In [2]:
# Required packages
packages <- c("stringr", "quantmod", "tseries", "dplyr", "xml2", "httr", "PortfolioAnalytics",
              "lubridate")

# Install packages not yet installed
installed_packages <- packages %in% rownames(installed.packages())
if (any(installed_packages == FALSE)) {
  install.packages(packages[!installed_packages])
  print(paste("Installing package ", packages[!installed_packages],"...", sep = ""))
}

# Packages loading
invisible(lapply(packages, library, character.only = TRUE))
rm(list = ls()) #Clean environment
print('All packages loaded successfully...')

# Miscellaneous
options(repr.plot.width = 6, repr.plot.height = 5)
set.seed(420)

[1] "All packages loaded successfully..."


### Groundwork

Although we were able to obtain the data in the first homework, **we choose to use the provided dataset**, as this allows for easier and **more reliable reproducibility**. First we load the data, subset the necessary stocks, save these as new variables, and then discard the source data in order to alleviate some memory.

In [3]:
# Static variables
group_number <- 61505008
ticker_file <- "data/61505008_data_download.csv"
data_file <- "data/Asset_Pricing_HW_2_data.RData"
group_factor <- "data/third_factor_rand.csv"
data_files <- list(ticker_file, data_file, group_factor)

In [4]:
 # Set wd to project root
if (str_detect(getwd(), 'scripts')) {
    setwd('..')
}

# Assert presence of all required files
for (file in data_files) {
    if (!file.exists(file)) {
        print(paste0('File ', file, ' does not exist or has been misplaced.'))
    }
}

# Load tickers of required stocks
tickers <- as.character(unlist(read.csv(ticker_file)[2]))

# Check the assigned group factor
getFactor <- function(factor){
    data <- read.csv(factor)
    matching_row <- which(data[2] == group_number)
    assigned_factor <- as.character(data[matching_row, 3])
    print(paste0('The factor assigned to this group is ', assigned_factor, '.'))
}
getFactor(group_factor)

[1] "The factor assigned to this group is VOL."


While selecting only the desired tickers, we found that **a ticker that was assigned to our group was missing from the provided data**. We decided to remedy for this by writing a simple fix which replaces all of these missing assigned data by random data of a stock, that is provided in the source data set. As this was the case for only one stock, no trend bias should be introduced into the data.

In [5]:
# Load source data into the working directory
source_data <- load(data_file)

# Subset only for the stocks we will need
# Note - use these as BV$AAPL, not BV['AAPL']
BV <- book_value_sap500[tickers]
MKT <- MktCap_sap500[tickers]
OHLCV <- OHLCV_sap500[tickers]

# Assert presence of all assigned tickers in the source data - replace data of those that are missing
data_replacement_counter <- 1
for (ticker in tickers) {
    all_stock_names <- names(OHLCV)
    if (!ticker %in% all_stock_names) { # Missing file found
        print(paste0(ticker, ' is missing from the source data...'))
        missing_ticker_idx <- which(tickers == ticker) # Index of the missing file
        
        # Get replacement data
        while (names(book_value_sap500[data_replacement_counter]) %in% tickers) {
            data_replacement_counter <- data_replacement_counter + 1 # Search for a stock not from the assigned set
        }

        # Replace the data
        replacement_stock <- names(book_value_sap500)[data_replacement_counter] # Stock which shall serve as a replacement
        BV[missing_ticker_idx] <- book_value_sap500[replacement_stock]
        MKT[missing_ticker_idx] <- MktCap_sap500[replacement_stock]
        OHLCV[missing_ticker_idx] <- OHLCV_sap500[replacement_stock]
        print(paste0('Data for ', ticker, ' replaced successfully with data from ', replacement_stock))

        data_replacement_counter <- data_replacement_counter + 1
    }
}

# Remove source data from working directory for better memory management
rm(list = source_data)

[1] "MOH is missing from the source data..."
[1] "Data for MOH replaced successfully with data from ABT"


### Data preprocessing

We define several functions to help us preprocess data.

In [12]:
# Input a list of OHLCV datasets and return a similar object, only subsetted for adjusted prices. Discard the rest of the columns
getAdjustedPrices <- function(OHLCV_data) {
    if (!class(OHLCV_data) == 'list') {
        print('The input data must be a list')
        break
    }
    yankPrices <- function(data) {
        adj_col_idx <- which(str_detect(colnames(data), 'Adjusted')) # Get index of column containing adjusted prices
        prices <- data[, adj_col_idx]
        return(prices)
    }
    data_out <- lapply(OHLCV_data, yankPrices)
    data_out <- do.call('cbind', data_out)
    return(data_out)
}

# Input an xts data frame containing raw adjusted prices and convert these to daily or monthly returns
getReturns <- function(adjusted_prices_data, type = 'monthly') {
    if (!type %in% c('daily', 'monthly')) {
        print('Can only handle daily or monthly returns calculation...')
        break
    }
    getReturn <- function(data) {
        if (type == 'monthly') {
            return(suppressWarnings(monthlyReturn(data)))
        } else {
            return(suppressWarnings(dailyReturn(data)))
        }
    }    
    returns <- lapply(adjusted_prices_data, getReturn) # Calculate returns for all stocks
    returns <- do.call('cbind', returns)
    colnames(returns) <- gsub('Adjusted', 'returns', names(adjusted_prices_data)) # Rename columns
    return(returns)
}

# Input a list of xts objects containing market capitalization info and convert these to monthly sizes
getMonthlySizes <- function(mkt_cap_data) {
    monthly_sizes <- lapply(mkt_cap_data, log) # Calculate monthly sizes
    monthly_sizes <- do.call('cbind', monthly_sizes)
    monthly_sizes <- as.xts(apply(monthly_sizes, 2, function(x) ifelse(is.finite(x), x, NA))) # inf to NA
    colnames(monthly_sizes) <- paste0(colnames(monthly_sizes), '.Sizes') # Rename columns
    return(monthly_sizes)
}

# Input two data frames (containing daily and monthly returns data) and return a data frame with monthly volatility
getMonthlyVolatilities <- function(daily_returns_data, monthly_returns_data) {
    names(monthly_returns_data) <- gsub('returns', 'volatility', names(monthly_returns_data))
    stocks <- names(monthly_returns_data) # Stock names - AAPL.returns
    monthly_dates <- index(monthly_returns_data) # Dates by months

    monthly_volatilities <- lapply(monthly_dates, function(end_date) { # Volatilities for all time frames, all stocks
        start_date <- end_date %m-% months(1)
        date_range <- paste0(start_date, '/', end_date)
        daily_returns <- daily_returns_data[date_range]
        stocks_volatilities <- lapply(daily_returns, function(stock) {
            average_stock_return <- mean(stock)
            nominator <- sum((stock - average_stock_return) ^ 2)
            denominator <- length(stock) - 1
            monthly_volatility <- 100 * sqrt(nominator/denominator) * sqrt(12)
            return(monthly_volatility) # Volatility for a given time frame, one stock
        })
        return(unlist(stocks_volatilities)) # Volatility for a given time frame, all stocks
    })
    monthly_volatilities <- do.call('rbind', monthly_volatilities) # Data frame of volatilities
    monthly_volatilities <- as.xts(monthly_volatilities, order.by = monthly_dates) # To xts
    colnames(monthly_volatilities) <- stocks
    return(monthly_volatilities)
}

In [14]:
# Compute the various metrics using previously defined functions
adjusted_prices <- getAdjustedPrices(OHLCV)
daily_returns <- getReturns(adjusted_prices, type = 'daily')
monthly_returns <- getReturns(adjusted_prices, type = 'monthly')
monthly_sizes <- getMonthlySizes(MKT)

In [15]:
# Calculate monthly volatilities (vectors go brrrrrrr) 
monthly_volatilities <- getMonthlyVolatilities(daily_returns, monthly_returns)

### Portfolio analysis

In [135]:
# 3 sort variables (beta, size, volatility) - for each, make 5 portfolios, sorted by that variable (each of those portfolios should have equally weighted and value weighted part)
# Monthly rebalancing

head(BV$SJM)

           Stock Price Book Value per Share Price/Book Ratio
2006-01-31       25.97                29.93             0.87
2006-04-30       23.59                30.34             0.78
2006-07-31       27.00                30.49             0.89
2006-10-31       29.83                30.90             0.97
2007-01-31       29.08                31.04             0.94
2007-04-30       34.38                31.62             1.09

In [1]:
# Beta - portfolio analysis - seminar 10 - equal-weighted average returns
# Get monthly returns, use these returns to sort the portfolios by beta

In [None]:
# Beta - portfolio analysis - seminar 10 - value-weighted average returns
