In [None]:
import pandas as pd
import numpy as np
import matplotlib.pylab as plt
from matplotlib.pylab import rcParams
import statsmodels.tsa.stattools as st
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.stattools import acf, pacf
from statsmodels.tsa.arima_model import ARIMA

In [None]:
sb=pd.read_csv("mfd_bank_shibor.csv",parse_dates=["mfd_date"],date_parser=lambda date:pd.datetime.strptime(date,"%Y%m%d"))
it=pd.read_csv("mfd_day_share_interest.csv",parse_dates=["mfd_date"],date_parser=lambda date:pd.datetime.strptime(date,"%Y%m%d"))
bl=pd.read_csv("user_balance_table.csv",parse_dates=["report_date"],date_parser=lambda date:pd.datetime.strptime(date,"%Y%m%d"))
itsb=pd.merge(sb,it,on="mfd_date")
data=pd.merge(itsb,bl,left_on="mfd_date",right_on="report_date")
data.set_index("report_date",inplace=True)
data.pop("mfd_date")
data=data.groupby(by="report_date").sum()

In [None]:
def test_stationarity(timeseries):
    """
    平稳性检验
    """
    rolmean = pd.rolling_mean(timeseries, window=12)  # 滑动平均数
    rolstd = pd.rolling_std(timeseries, window=12)  # 滑动标准差

    # 画出原时序、滑动平均数和滑动标准差的曲线
    plt.figure(figsize=(17,6))
    orig = plt.plot(timeseries, color='blue', label='Original')
    mean = plt.plot(rolmean, color='red', label='Rolling Mean')
    std = plt.plot(rolstd, color='black', label='Rolling Std')
    plt.legend(loc='best')
    plt.title('Rolling Mean & Standard Deviation')
    plt.show(block=False)

    # 进行 Dickey-Fuller 测试:
    print('Results of Dickey-Fuller Test:')
    dftest = adfuller(timeseries, autolag='AIC')
    dfoutput = pd.Series(dftest[0:4], index=[
                         'Test Statistic', 'p-value', '#Lags Used', 'Number of Observations Used'])
    for key, value in dftest[4].items():
        dfoutput['Critical Value (%s)' % key] = value
    print(dfoutput)

test_stationarity(data["total_purchase_amt"]['2014-05':].diff().diff().diff().dropna())

In [None]:
data_acf=acf(data["total_purchase_amt"]['2014-05':])
data_pacf=pacf(data["total_purchase_amt"]['2014-05':])
#Plot ACF: 
plt.figure(figsize=(16,7))
plt.subplot(121) 
plt.plot(data_acf)
plt.axhline(y=0,linestyle='--',color='gray')
plt.axhline(y=-1.96/np.sqrt(len(data["total_purchase_amt"]['2014-05':])),linestyle='--',color='gray')
plt.axhline(y=1.96/np.sqrt(len(data["total_purchase_amt"]['2014-05':])),linestyle='--',color='gray')
plt.title('Autocorrelation Function')
#Plot PACF:
plt.subplot(122)
plt.plot(data_pacf)
plt.axhline(y=0,linestyle='--',color='gray')
plt.axhline(y=-1.96/np.sqrt(len(data["total_purchase_amt"]['2014-05':])),linestyle='--',color='gray')
plt.axhline(y=1.96/np.sqrt(len(data["total_purchase_amt"]['2014-05':])),linestyle='--',color='gray')
plt.title('Partial Autocorrelation Function')
plt.tight_layout()

In [None]:
model = ARIMA(data["total_purchase_amt"]['2014-05':].diff().diff().dropna(), order=(2, 2, 2))  
results_ARIMA = model.fit(disp=-1)  
plt.figure(figsize=(16,7))
plt.plot(data["total_purchase_amt"]['2014-05':].diff().diff().dropna())
plt.plot(results_ARIMA.fittedvalues, color='red')