In [1]:
from XQuant import BackTestRunner, BackTestOptions, BARRA, Basic, BARRA
from XQuant import Strategy, DataAPI, Config, Tools, DataReady, EnhancingDividend, Formatter, TradeDate
import pandas as pd
import numpy as np
import statsmodels.api as sm

ed = Basic(begin='20200101')
df = ed.financial_expense_ttm
df

ticker,600000.SH,600001.SH,600002.SH,600003.SH,600004.SH,600005.SH,600006.SH,600007.SH,600008.SH,600009.SH,...,301387.SZ,301388.SZ,301389.SZ,301391.SZ,301396.SZ,301398.SZ,301408.SZ,301419.SZ,301429.SZ,301439.SZ
2020-01-02,,,,,,,,,,,...,,,,,,,,,,
2020-01-03,,,,,,,,,,,...,,,,,,,,,,
2020-01-06,,,,,,,,,,,...,,,,,,,,,,
2020-01-07,,,,,,,,,,,...,,,,,,,,,,
2020-01-08,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-09-08,,,,,3.368307e+08,,7754013.88,346973858.0,4.318289e+09,161913.76,...,9905947.59,24540889.2,-16840962.84,5752784.885,10618303.76,202629.43,53170063.62,5034954.585,1.324419e+07,94005869.12
2023-09-11,,,,,3.368307e+08,,7754013.88,346973858.0,4.318289e+09,161913.76,...,9905947.59,24540889.2,-16840962.84,5752784.885,10618303.76,202629.43,53170063.62,5034954.585,1.324419e+07,94005869.12
2023-09-12,,,,,3.368307e+08,,7754013.88,346973858.0,4.318289e+09,161913.76,...,9905947.59,24540889.2,-16840962.84,5752784.885,10618303.76,202629.43,53170063.62,5034954.585,1.324419e+07,94005869.12
2023-09-13,,,,,3.368307e+08,,7754013.88,346973858.0,4.318289e+09,161913.76,...,9905947.59,24540889.2,-16840962.84,5752784.885,10618303.76,202629.43,53170063.62,5034954.585,1.324419e+07,94005869.12


In [3]:
Tools.search_keyword('fin_exp', limit=100, fuzzy=False)

[32m2023-09-14 21:39:21.328[0m | [1mINFO    [0m | [36mXQuant.Utils[0m:[36msearch_keyword[0m:[36m647[0m - [1m缓存文件被保存至E:\yuankangrui\XQuant\XQuant\Temp\attrs.json[0m


{'fin_exp': ['fundamentals_cashflow',
  'financial_sheet',
  'market_financial_sheet']}

In [None]:
Config.datatables['fundamentals_balance']

In [None]:
df = DataAPI.get_data('fundamentals_balance', begin='20220101')
df

In [None]:
df = df.pivot(index='pub_date', values='sur_rsv', columns='symbol')
df

In [None]:
df.groupby(pd.Grouper(freq="Q")).mean().rolling(window=4).sum()

In [None]:
from typing import Sequence
import statsmodels.api as sm
def ACF(series:Sequence,  nlags: int = 10, **kwargs):
    """
    根据弱平稳性质$σ_s=σ_t$，有$ρ_k=(∑_t^T((X_t-\overline{X_t}))(X_s-\overline{X_s }))/(∑_t^Tσ_t^2 )$，则$ρ_1,ρ_2,…$称为时间序列$X_t$的样本自相关系数ACF。
    自相关系数呈指数的速度锐减，始终有非零取值，不会在k大于某个常数之后就恒等于零，这个性质就是平稳$AR(p) $模型的自相关系数$ρ_k$具有拖尾性。
    当自相关函数中所有的值都为0时，我们认为该序列是完全不相关的；因此，我们经常需要检验多个自相关系数是否为0。<br>
    混成检验：<br>
    原假设$H_0：ρ_1=⋯=ρ_m=0$<br>
    统计量：$Q(m)=T(T+2)∑_k^m{\frac{(ρ_k)^2}{(T-k)}}$<br>
    $Q(m)$渐进服从自由度为m的$χ^2$分布，如果$Q(m)>χ_α^2$，即Q(m)统计量的$pvalue≤显著性水平α，则拒绝H_0。$<br>
    :param series:
    :param nlags:
    :param kwargs:
    :return:
    """
    if not isinstance(series, np.ndarray):
        series = np.array(series)
    acf,q,p= sm.tsa.acf(x=series, nlags=nlags, qstat=True)
    out = np.c_[range(1,nlags+1), acf[1:], q, p]
    output=pd.DataFrame(out, columns=['lag', "AC", "Q", "P-value"])
    output = output.set_index('lag')
    if all(p < 0.05):
        print("所有的p-value都小于0.05，拒绝原假设H_0，认为该序列是序列相关的")
    return output

ACF(series=ed.close.iloc[:,0])

In [None]:
import statsmodels.tsa.api as smt
import statsmodels.formula.api as smf
from statsmodels.tsa.ar_model import AutoReg as AR
import scipy.stats as scs
from arch import arch_model
import matplotlib.pyplot as plt

In [None]:
def tsplot(
        y:Sequence,
        diff: bool = False,
        nlags:int=30, figsize:tuple[int, int] =(8, 8), style='bmh', **kwargs):
    if not isinstance(y, pd.Series):
        y = pd.Series(y)
    if diff:
        y = y.diff(1).dropna()

    with plt.style.context(style):
        fig = plt.figure(figsize=figsize)
        layout = (3, 2)
        ts_ax = plt.subplot2grid(layout, (0, 0), colspan=2)
        acf_ax = plt.subplot2grid(layout, (1, 0))
        pacf_ax = plt.subplot2grid(layout, (1, 1))
        qq_ax = plt.subplot2grid(layout, (2, 0))
        pp_ax = plt.subplot2grid(layout, (2, 1))

        y.plot(ax=ts_ax)
        ts_ax.set_title('Time Series Analysis')
        smt.graphics.plot_acf(y, lags=nlags, ax=acf_ax, alpha=0.05,title='ACF')
        smt.graphics.plot_pacf(y, lags=nlags, ax=pacf_ax, alpha=0.05,title='PACF')
        sm.qqplot(y, line='s', ax=qq_ax)
        qq_ax.set_title('QQ Plot')
        scs.probplot(y, sparams=(y.mean(), y.std()), plot=pp_ax)
        plt.tight_layout()
        plt.show()

tsplot(ed.close.iloc[:,0], lags=30, diff=True)

In [None]:
est_order = statsmodels.tsa.ar_model.ar_select_order(ed.close.iloc[:,0].values, maxlag=30, ic='aic')
est_order.ar_lags

In [None]:
import statsmodels
max_lag = 30
mdl = sm.tsa.ARIMA(ed.close.iloc[:,0].values, order=(16, 1, 1)).fit()
print(mdl.summary())