In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
data = pd.read_csv('factor_pass9.csv', index_col=0)
data.dropna(inplace=True)

insample_data = data.loc[data.loc[:, 'date'] < "2022-01-01", :]
outdsample_data = data.loc[data.loc[:, 'date'] >= "2022-01-01", :]

X = insample_data.iloc[:, 3:-1]
y = insample_data.iloc[:, -1]
other_info_outsample_test = outdsample_data.iloc[:, :3]
X_outsample_test = outdsample_data.iloc[:, 3:-1]
y_outsample_test = outdsample_data.iloc[:, -1]

In [3]:
X = np.c_[np.ones(X.shape[0]), X]
theta = np.linalg.inv(X.T.dot(X)).dot(X.T).dot(y)
theta

array([-1.18619009e-02, -1.03243212e-07,  8.60824082e-05,  4.34762158e-03,
        5.25827411e-05, -8.92364556e-04,  8.44783618e-04, -1.07777767e-03,
       -3.15561259e-04, -1.52970053e-04, -8.45939859e-02,  1.17726533e-05,
       -1.06176352e-15,  4.52856313e-03, -6.81597089e-03,  4.01565725e-04,
        7.74141970e-07,  5.08810551e-03, -1.15042225e-03, -7.90508842e-05,
        3.25108351e-04, -1.28833226e-04,  4.00996586e-03,  3.92489181e-05,
        2.61138144e-03,  2.44145753e-03,  4.10209357e-04, -2.29468340e-09])

In [4]:
def cal_ICIR(data: pd.DataFrame, feild: str) -> tuple[float, float]:
    """
    data is a dataframe with columns: date, return, factor feild
    feild is the factor name
    return IC and IR
    """
    data = data.loc[:, ['date', 'return', feild]]
    data.dropna(inplace=True)
    IC_dataframe = data.groupby('date').apply(lambda x: x.corr(method='spearman')[feild]['return'])
    return IC_dataframe.mean(), IC_dataframe.mean()/IC_dataframe.std()

def test_factor(ICIR: tuple[float, float]) -> str:
    """
    ICIR is a tuple of IC and IR
    return the test result
    """
    if abs(ICIR[0]) > 0.01 and abs(ICIR[1]) > 0.03:
        return 'pass'
    else:
        return 'fail'

In [5]:
preds = np.c_[np.ones(X_outsample_test.shape[0]), X_outsample_test].dot(theta)
matrix = pd.concat([other_info_outsample_test, pd.DataFrame(preds, columns=['preds'], index = X_outsample_test.index), y_outsample_test], axis=1)
ICIR = cal_ICIR(matrix, "preds")
print(ICIR)
result = test_factor(ICIR)

(0.018460805431904946, 0.04888638032153474)


In [7]:
matrix.to_csv('./result/APT.csv')