# Financial Econometrics Report

## Housekeeping

In [1]:
from simfin.names import *
import pandas as pd
import numpy as np
import seaborn as sns
import time

# Import the main functionality from the SimFin Python API.
import simfin as sf
import pandas_datareader.data as web

api_key = 'free'
sf.set_api_key(api_key)

root = '/Users/Wanderer/Desktop/hu/Report/' # Change this when running locally
sf.set_data_dir(root + 'simfin_data/')
        
pd.set_option('display.max_columns', None)

## Load Data

In [3]:
def timecheck(price):
    '''
    Input: (DataFrame) data of one ticker only.
    Output: (Boolean) True only if data begin earlier than 2018-01-01 and end later than 2019-12-31.
    '''
    precheck = (price.index[0] <= pd.to_datetime('2018-01-01',format = '%Y-%m-%d'))
    postcheck = (price.index[0] <= pd.to_datetime('2018-01-01',format = '%Y-%m-%d'))
    check = precheck & postcheck
    return check

def tickerfilter(prices, name):
    '''
    Input1: (DataFrame) Data of multple tickers.
    Input2: (str) name of the indicator.
    Output: Writes pickle: filtered Input1
    '''
    good = sf.apply(df = prices, func = timecheck)
    good = good[good]
    good = list(good.index)
    writedata = prices.loc[good]
    writedata.to_pickle(root + 'good' + str(name) + '.pkl')
    print(name + ' saved!')
    return 0

def reloaddata():
    '''
    Loads data from SimFin.com or from local disk.
    Writes pickle
    '''
    incomes = sf.load(dataset = 'income', variant = 'ttm', market = 'us',
                index = [TICKER, REPORT_DATE],
                parse_dates = [REPORT_DATE, PUBLISH_DATE, RESTATED_DATE])
    balances = sf.load(dataset = 'balance', variant = 'ttm', market = 'us',
                      index = [TICKER, REPORT_DATE],
                      parse_dates = [REPORT_DATE, PUBLISH_DATE, RESTATED_DATE])
    cashflows = sf.load(dataset = 'cashflow', variant = 'ttm', market = 'us',
                       index = [TICKER, REPORT_DATE],
                       parse_dates = [REPORT_DATE, PUBLISH_DATE, RESTATED_DATE])

    prices = sf.load_shareprices(variant = 'daily', market = 'us')
    
    tickerfilter(prices, 'prices')
    tickerfilter(incomes, 'incomes')
    tickerfilter(balances, 'balances')
    tickerfilter(cashflows, 'cashflows')
    
##================================##
# Last refresh on **20210613**
# Set to True on a new device / when data is too old.
if False:
    reloaddata()
##================================##

## Functions

### Backtesting

In [10]:
def topandbottom(df, text, prices):
    bracket = 100
    winner = df[-bracket:]
    loser = df[:bracket]
    pricefilled = sf.asfreq(df = prices, freq = 'D', method = 'ffill')
    badstock = []
    for stock in winner.index:
        try:
            winner.loc[stock,'annualret'] = pricefilled.loc[(stock,'2019-12-31')]['Adj. Close'] / pricefilled.loc[(stock,'2019-01-01')]['Adj. Close'] - 1
        except:
            badstock.append(stock)
    for stock in loser.index:
        try:
            loser.loc[stock,'annualret'] = pricefilled.loc[(stock,'2019-12-31')]['Adj. Close'] / pricefilled.loc[(stock,'2019-01-01')]['Adj. Close'] - 1
        except:
            badstock.append(stock)
    winner_yield = winner['annualret'].mean()
    loser_yield = loser['annualret'].mean()
    
    # print(badstock) # Un-comment if you need to see which stocks are problematic.
    
    print('According to {}, \nthe top 100 stocks yield annually {:.4f};\nthe bottom 100 stocks yield annually {:.4f}'.format(text,winner_yield,loser_yield))
    return 0

def bt_prices(func, text):
    '''
    func: (Function) A function whose input is the price data for one ticker only.
    name: (str) Name of the factor
    prices: 
    '''
    prices = pd.read_pickle('goodprices.pkl')
    factors = sf.apply(df = prices, func = func)
    factor = factors.groupby(['Ticker'])[factors.columns[-1]].mean().sort_values().dropna()
    factor = pd.DataFrame(factor)
    
    topandbottom(factor, text, prices)

### Factors
Use bt_prices to backtest price-related factors.  
Use bt_volumes to backtest volume-related factors.

In [16]:
def longreversal(prices):
    '''
    Long-term Reversal
    Debondt and Thaler (JF 1985)
    '''
    longrev = sf.asfreq(df = prices, freq = 'D', method = 'ffill')
    longrev['longreversal'] = longrev['Adj. Close'].pct_change(1410).shift(390)
    longrev = longrev['longreversal']
    prices = pd.concat([prices, longrev], axis = 1, join = 'inner')
    return prices

bt_prices(longreversal, 'Long-term Reversal: Debondt and Thaler (JF 1985)')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)


According to Long-term Reversal: Debondt and Thaler (JF 1985), 
the top 100 stocks yield annually 0.2859;
the bottom 100 stocks yield annually 0.1144


In [15]:
def Share_Volume(volume):
    volume = volume.resample('M').mean()
    volume['Share_Volume'] = volume['Volume'].rolling(window=3).mean()
    return volume

bt_prices(Share_Volume, 'Share volume')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)


According to Share volume, 
the top 100 stocks yield annually 0.3552;
the bottom 100 stocks yield annually 1.4807
