# QARP Model

### 1. Compute Factors

In [1]:
# import libraries
import pandas
import numpy as np
import datetime

In [2]:
# helper functions

def diffCol(col, gvkey, step):
    diff = col.diff(periods=step).to_frame("colRaw")
    diff["gvkeyDiff"] = model_data["gvkey"].diff(periods=step)
    diff["col"] = diff.apply(lambda x: x["colRaw"] if x["gvkeyDiff"] == 0 else float("nan"), axis=1)
    return diff["col"]

In [3]:
# Import csv data
compustat = pandas.read_csv("Compustat19612018.csv")
# Used dtypes here to speed up function
crsp = pandas.read_csv("CRSP1960_2018.csv", \
    dtype={'PERMNO': int, 'date': str, 'EXCHCD': float, 'SICCD': str, 'SHRCLS': str, 'PRIMEXCH': str, \
           'PRC': float, 'RET': str, 'SHROUT': float, 'CFACSHR': float, 'vwretd': float, 'sprtrn': float})
cpi = pandas.read_csv("CPIData.csv")
linking_table = pandas.read_csv('LinkingTable.csv')

#### Link CRSP and Compustat

In [4]:
# Fix LINKDT and LINKENDDT dates
linking_table.LINKDT = pandas.to_datetime(linking_table.LINKDT.astype(str), format='%Y%m%d', errors='coerce').fillna(datetime.date.today())
linking_table.LINKENDDT = pandas.to_datetime(linking_table.LINKENDDT.astype(str), format='%Y%m%d', errors='coerce').fillna(datetime.date.today())

In [5]:
# Merge compustat and linking_table based on gvkey to get permnos for each compustat company
compustat = compustat.merge(linking_table, on='gvkey')

In [6]:
# Create year column in crsp to link with permno
crsp['year'] = pandas.to_datetime(crsp.date, format='%m/%d/%Y').dt.year;

In [7]:
# Create permnoyear column for crsp and compustat
crsp['permnoyear'] = crsp.PERMNO.map(str) + crsp.year.map(str)
compustat['permnoyear'] = compustat.LPERMNO.map(str) + compustat.fyear.map(str)

In [8]:
# For each year, we need the most recent value, so I am goruping it by permnoyear and then
# just taking the tail (last member) of the group. Its quick and dirty and we probably want 
# a better way to do this so we know we are getting the last value if the data isn't sorted
crspGetLastYear = crsp[['permnoyear','EXCHCD','SICCD','SHROUT' ,'SHRCLS','PRIMEXCH', 'date']]
crspGetLastYear = crspGetLastYear.groupby('permnoyear').tail(1)

In [9]:
# These columns need to be summed for each year, which is being done here
# RET is a str column and sometimes has error characters so still need to handle that
crspSum = crsp[['permnoyear', 'PRC', 'RET', 'vwretd', 'sprtrn']]
crspSum = crspSum.groupby(by=['permnoyear'])['sprtrn', 'PRC', 'RET', 'vwretd', 'sprtrn'].sum()

In [10]:
# Merge the two annualized subset dataframes back together
crsp = crspSum.merge(crspGetLastYear, on='permnoyear')

In [11]:
# Merge crsp and compustat by permnoyear and convert permnoyear to ints
model_data = compustat.merge(crsp, on='permnoyear')
model_data["permnoyear"] = pandas.to_numeric(model_data["permnoyear"])

#### Profitability Factors
1. gross profits over assets (GPOA) = (Revenue - costs of goods sold) / total assets
2. return on equity (ROE) = net income / book-equity 
3. return on assets (ROA) = net income / total assets
4. cash flow over assets (CFOA) = (net income + depreciation - changes in working capital - capital expenditures) / total assets
5. gross margin (GMAR) = (revenue - cost of goods sold) / total sales
6. low accruals (ACC) = - (change in working capital - depreciation) / total assets


In [12]:
# GPOA
gp = model_data.revt - model_data.cogs
gpoa = gp / model_data["at"]

# ROE
be = (model_data.seq - model_data.pstk).fillna(model_data.ceq + model_data.pstk).fillna(model_data["at"] - model_data["lt"] + compustat["mibt"])
roe = model_data["ib"] / be

# ROA
roa = model_data["ib"] / model_data["at"]

# CFOA
wc = model_data["act"] - model_data["lct"] - model_data["che"] + model_data["dlc"] + model_data["txp"]
wcDiff = diffCol(wc, model_data["gvkey"], 1)
cf = model_data["ib"] + model_data["dp"] - wcDiff - model_data["capx"]
cfoa = cf / model_data["at"]

# GMAR
gmar = gp / model_data["sale"]

# ACC
acc = -wcDiff / model_data["at"]

#### Growth Factors
Five year growth of profitability factors

In [13]:
# Store for calculating later
ib = model_data["ib"]
diffIb = diffCol(ib, model_data["gvkey"], 5)
diffGp = diffCol(gp, model_data["gvkey"], 5)
atShift = model_data["at"].shift(5)
ceqShift = model_data["ceq"].shift(5)

# Growth factors
delGpoa = diffGp / atShift
delRoe = diffIb / ceqShift
delRoa = diffIb / atShift
delCfoa = diffCol(cf, model_data["gvkey"], 5) / ceqShift
delGpoa = diffGp / ceqShift

#### Safety Factors

1. low beta (BAB) = cov(value-weighted return, S&P 500 return) / var(S&P 500 return)
2. low leverage (LEV) = - (total debt) / total assets
3. Ohlson’s O-score (OSCORE) = - (-1.32 - 0.407 * log(ADJASSET/CPI) + 6.03 * TLTA - 1.43 * WCTA + 0.076 * CLCA - 1.72 * OENEG - 2.37 * NITA - 1.83 * FUTL + 0.285 * INTWO - 0.521 * CHIN)
    *  Adjusted Total Assets (ADJASSET) = total assets + 10% * (Market equity - Book Equity)
    * Consumer Price Index (CPI)
    * Book Value of Debt (TLTA) = book value of debt / ADJASSET
    * Working Capital to Assets (WCTA) = (current assets - current liabilities) / ADJASSET
    * Current Liabilities to Assets (CLCA) = current liabilities / current assets
    * OENEG = 1 if total liabilities exceed total assets
    * Net income to asssets (NITA) = net income / total assets
    * Pretax Income to Liabilities(FUTL) = pretax income / total liabilites
    * INTWO = 1 if net income is negative for the current and prior fiscal year
    * (CHIN) = change in net income
4. Altmans Z-Score (AZSCORE) = (1.2 Working Capital + 1.4 Retained Earnings + 3.3 EBIT + 0.6 Market Cap + Sales) / Total Assets
5. low ROE volatility (EVOL) = Standard deviation of quarterly ROE over the past 60 quarters or 5 years (if quarterly null)
