In [1]:
from functools import partial
import sys
import pandas as pd
import numpy as np
import talib as ta

sys.path.append('..')
pd.set_option("plotting.backend", "plotly")
df = pd.read_hdf('../data/cleaned_data.h5').droplevel(0)
shanghai_composite = pd.read_hdf('../data/shanghai_composite.h5', key='shanghai_composite')
factors = pd.read_hdf('../data/ch3factors.h5', key='factors')

### GET SUMMARY STATISTICS FOR ALL THE INDICES

In [2]:
# create a table for 1. Data start date, 2. Annualized mean, 3. annualized volatility for each column for df
def get_annualized_mean(df):
    return df.mean() * 252

def get_annualized_volatility(df):
    return df.std() * np.sqrt(252)

def get_annualized_sharpe_ratio(df):
    return get_annualized_mean(df) / get_annualized_volatility(df)

In [3]:
annualized_mean_returns = df.pct_change().apply(get_annualized_mean).to_frame('Annualized Mean')
annualized_volatility = df.pct_change().apply(get_annualized_volatility).to_frame('Annualized Volatility')
annualized_sharpe_ratio = df.pct_change().apply(get_annualized_sharpe_ratio).to_frame('Annualized Sharpe Ratio')
start_dates = df.apply(lambda x: x.dropna().index[0]).to_frame('Start Date')

In [4]:
annualized_table = pd.concat([start_dates, annualized_mean_returns, annualized_volatility, annualized_sharpe_ratio], axis=1)

In [5]:
shanghai_composite.rename(columns={'close': '000001'}, inplace=True)

In [6]:
shanghai_composite_annualized_mean_returns = shanghai_composite.pct_change().apply(get_annualized_mean).to_frame('Annualized Mean')
shanghai_composite_annualized_volatility = shanghai_composite.pct_change().apply(get_annualized_volatility).to_frame('Annualized Volatility')
shanghai_composite_annualized_sharpe_ratio = shanghai_composite.pct_change().apply(get_annualized_sharpe_ratio).to_frame('Annualized Sharpe Ratio')
shanghai_composite_start_dates = shanghai_composite.apply(lambda x: x.dropna().index[0]).to_frame('Start Date')

In [7]:
shanghai_composite_annualized_table = pd.concat([shanghai_composite_start_dates, shanghai_composite_annualized_mean_returns, shanghai_composite_annualized_volatility, shanghai_composite_annualized_sharpe_ratio], axis=1)

In [16]:
summary_stats = pd.concat([annualized_table, shanghai_composite_annualized_table], axis=0)
summary_stats.index.name = "tickers"
from src import config
converter = config.ticker_to_en_name
summary_stats['Start Date'] = summary_stats['Start Date'].dt.to_period('M')
summary_stats.index = summary_stats.index.map(converter, na_action='ignore')

### GET TOTAL ALPHAS AND BETAS FOR EACH SECTOR

In [23]:
factors.index = factors.index.to_period('M')
factors.drop(columns=['rf_mon'], inplace=True)

In [60]:
df_monthly = df.resample('M').last()
df_monthly.index = df_monthly.index.to_period('M')
df_monthly = df_monthly.pct_change().dropna()

In [61]:
# make the length of both df_monthly and factors the same, one is longer at one end, the other is longer at the other end
factors = factors.loc[df_monthly.index[0]:factors.index[-1]]
df_monthly = df_monthly.loc[factors.index[0]:factors.index[-1]]

In [90]:
# get factor exposures for each ticker
factor_exposures = pd.DataFrame(index=df_monthly.index, columns=df_monthly.columns)
for ticker in df_monthly.columns:
    betas, alphas = np.linalg.lstsq(factors, df_monthly[ticker], rcond=None)[0:2]
    factor_exposures[ticker] = betas

ValueError: Length of values (3) does not match length of index (222)

In [87]:
np.linalg.lstsq(factors, df_monthly['801010'], rcond=None)

(array([ 0.90650604,  0.72560119, -0.12313525]),
 array([0.4373934]),
 3,
 array([1.18094697, 0.76373449, 0.46122097]))

In [88]:
betas, alphas = (lambda x: (x[0], x[1]))(np.linalg.lstsq(factors, df_monthly['801010'], rcond=None))

In [89]:
betas, alphas

(array([ 0.90650604,  0.72560119, -0.12313525]), array([0.4373934]))