# Synthetic data examples

In this Notebook we will build synthetic data suitable to Alphalens analysis. This is useful to understand how Alphalens expects the input to be formatted and also it is a good testing environment to experiment with Alphalens.

In [1]:
import matplotlib.pyplot as plt
import pandas as pd
from numpy import nan
from pandas import (DataFrame, date_range)

from alphalens.tears import (create_returns_tear_sheet,
                      create_information_tear_sheet,
                      create_turnover_tear_sheet,
                      create_summary_tear_sheet,
                      create_full_tear_sheet,
                      create_event_returns_tear_sheet,
                      create_event_study_tear_sheet)

from alphalens.utils import get_clean_factor_and_forward_returns

In [2]:
import alphalens as al

In [3]:
#
# build price
#
price_index = date_range(start='2015-1-10', end='2015-2-28')
price_index.name = 'date'
tickers = ['A', 'B', 'C', 'D', 'E', 'F']
data = [[1.0025**i, 1.005**i, 1.00**i, 0.995**i, 1.005**i, 1.00**i]
        for i in range(1, 51)]
base_prices = DataFrame(index=price_index, columns=tickers, data=data)

#
# build factor
#
factor_index = date_range(start='2015-1-15', end='2015-2-13')
factor_index.name = 'date'
factor = DataFrame(index=factor_index, columns=tickers,
                   data=[[3, 4, 2, 1, nan, nan], [3, nan, nan, 1, 4, 2],
                         [3, 4, 2, 1, nan, nan], [3, 4, 2, 1, nan, nan],
                         [3, 4, 2, 1, nan, nan], [3, 4, 2, 1, nan, nan],
                         [3, nan, nan, 1, 4, 2], [3, nan, nan, 1, 4, 2],
                         [3, 4, 2, 1, nan, nan], [3, 4, 2, 1, nan, nan],
                         [3, nan, nan, 1, 4, 2], [3, nan, nan, 1, 4, 2],
                         [3, nan, nan, 1, 4, 2], [3, nan, nan, 1, 4, 2],
                         [3, nan, nan, 1, 4, 2], [3, nan, nan, 1, 4, 2],
                         [3, nan, nan, 1, 4, 2], [3, nan, nan, 1, 4, 2],
                         [3, nan, nan, 1, 4, 2], [3, nan, nan, 1, 4, 2],
                         [3, 4, 2, 1, nan, nan], [3, 4, 2, 1, nan, nan],
                         [3, 4, 2, 1, nan, nan], [3, 4, 2, 1, nan, nan],
                         [3, 4, 2, 1, nan, nan], [3, 4, 2, 1, nan, nan],
                         [3, 4, 2, 1, nan, nan], [3, 4, 2, 1, nan, nan],
                         [3, nan, nan, 1, 4, 2], [3, nan, nan, 1, 4, 2]])
factor_groups = {'A': 'Group1', 'B': 'Group2', 'C': 'Group1', 'D': 'Group2', 'E': 'Group1', 'F': 'Group2'}

In [4]:
# create artificial intraday prices

today_open = base_prices.copy()
today_open.index += pd.Timedelta('9h30m')

# every day, after 1 hour from open all stocks increase by 0.1%
today_open_1h = today_open.copy()
today_open_1h.index += pd.Timedelta('1h')
today_open_1h += today_open_1h*0.001

# every day, after 3 hours from open all stocks decrease by 0.2%
today_open_3h = today_open.copy()
today_open_3h.index += pd.Timedelta('3h')
today_open_3h -= today_open_3h*0.002

# prices DataFrame will contain all intraday prices
prices = pd.concat([today_open, today_open_1h, today_open_3h]).sort_index()

In [5]:
# Align factor to open price
factor.index += pd.Timedelta('9h30m')
factor = factor.stack()
factor.index = factor.index.set_names(['date', 'asset'])

In [6]:
# Period 1: today open to open + 1 hour
# Period 2: today open to open + 3 hours
# Period 3: today open to next day open
# Period 6: today open to 2 days open

factor_data = get_clean_factor_and_forward_returns(
    factor,
    prices,
    groupby=factor_groups,
    quantiles=4,
    periods=(1, 2, 3, 6), 
    filter_zscore=None)

Dropped 0.0% entries from factor data: 0.0% in forward returns computation and 0.0% in binning phase (set max_loss=0 to see potentially suppressed Exceptions).
max_loss is 35.0%, not exceeded: OK!


# IC

In [7]:
ic = al.performance.factor_information_coefficient(factor_data)

In [8]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [9]:
period_num='1h'

# 柱状分布图

In [10]:
hist_fig = al.plotting._ic_hist_fig(ic, period_num)

In [11]:
hist_fig

# QQ图

In [12]:
qq_fig = al.plotting._ic_qq_fig(ic, period_num)

In [13]:
qq_fig

# 合并图

In [14]:
hist_layout = hist_fig.layout
qq_layout = qq_fig.layout
subplot_titles = (hist_layout.title.text, qq_layout.title.text)
fig = make_subplots(rows=1, cols=2, subplot_titles=subplot_titles,horizontal_spacing=0.15)

## 添加柱状图

In [15]:
fig.add_traces(hist_fig.data, rows=1,cols=1)

In [16]:
# 不得指定row、col
for an in hist_layout.annotations:
    fig.add_annotation(
        showarrow=False,
        text=an.text, 
        x=0.05,
        y=0.95,
        xref='paper',
        yref='paper')

In [17]:
fig

In [18]:
fig.layout

Layout({
    'annotations': [{'font': {'size': 16},
                     'showarrow': False,
                     'text': '1h 周期信息系数',
                     'x': 0.2125,
                     'xanchor': 'center',
                     'xref': 'paper',
                     'y': 1.0,
                     'yanchor': 'bottom',
                     'yref': 'paper'},
                    {'font': {'size': 16},
                     'showarrow': False,
                     'text': '1h 周期信息系数 Normal Q-Q分布',
                     'x': 0.7875,
                     'xanchor': 'center',
                     'xref': 'paper',
                     'y': 1.0,
                     'yanchor': 'bottom',
                     'yref': 'paper'},
                    {'showarrow': False,
                     'text': '均值 0.005 \n 标准差 0.441',
                     'x': 0.05,
                     'xref': 'paper',
                     'y': 0.95,
                     'yref': 'paper'}],
    'template': '...',
    'xaxis': {

In [19]:
x1_factor = fig.layout.xaxis.domain[1]

In [20]:
hist_layout

Layout({
    'annotations': [{'showarrow': False,
                     'text': '均值 0.005 \n 标准差 0.441',
                     'x': 0.05,
                     'xref': 'paper',
                     'y': 0.95,
                     'yref': 'paper'}],
    'barmode': 'overlay',
    'hovermode': 'closest',
    'legend': {'traceorder': 'reversed'},
    'shapes': [{'line': {'color': 'black', 'dash': 'dash', 'width': 2},
                'type': 'line',
                'x0': 0.0051247543699529064,
                'x1': 0.0051247543699529064,
                'y0': 0,
                'y1': 1,
                'yref': 'paper'}],
    'showlegend': False,
    'template': '...',
    'title': {'text': '1h 周期信息系数'},
    'xaxis': {'anchor': 'y2', 'domain': [0.0, 1.0], 'range': [-1, 1], 'title': {'text': 'IC'}, 'zeroline': False},
    'yaxis': {'anchor': 'free', 'domain': [0.0, 1], 'position': 0.0}
})

In [21]:
for s in hist_layout.shapes:
    x0=s.x0 * x1_factor
    x1=s.x1 * x1_factor
    s.update(dict(x0=x0,x1=x1))
    fig.add_shape(s)

In [22]:
fig

## 添加QQ

In [23]:
fig.add_traces(qq_fig.data,rows=1,cols=2)

In [24]:
qq_layout

Layout({
    'shapes': [{'line': {'color': 'red'},
                'type': 'line',
                'x0': 0,
                'x1': 1,
                'xref': 'paper',
                'y0': 0,
                'y1': 1,
                'yref': 'paper'}],
    'showlegend': False,
    'template': '...',
    'title': {'text': '1h 周期信息系数 Normal Q-Q分布'},
    'xaxis': {'range': [-1.877223317059522, 2.1869953901013033], 'title': {'text': 'Normal分布分位数'}},
    'yaxis': {'range': [-1.877223317059522, 2.1869953901013033], 'title': {'text': '观测分位数'}}
})

In [25]:
m_min, m_max = qq_layout.xaxis.range[0], qq_layout.xaxis.range[1]
fig.add_shape(
    dict(
        type="line",
        xref='paper',
        yref='paper',
        x0=m_min,
        y0=m_min,
        x1=m_max,
        y1=m_max,
        line=dict(
            color="red",
        )
    ),
    row=1,
    col=2
)

In [26]:
fig.update_layout(
    # 不显示图例
    showlegend=False,
    xaxis=dict(
        title_text=hist_layout.xaxis.title.text
    ),
    xaxis2=dict(
        title_text=qq_layout.xaxis.title.text,
    ),
    yaxis2=dict(
        title_text=qq_layout.yaxis.title.text,
    ),
)

# 使用`plotting`

In [27]:
al.plotting.plot_ic_hist_qq(ic,period_num)