In [None]:
import pandas as pd
import numpy as np
from statsmodels.tsa.ar_model import AutoReg, ar_select_order
from statsmodels.tsa.api import VAR

class AR():
    '''
     Autoregressions (AR)
     y_t = \sum{\alpha_i*y_i} + res
     y: T * 1
     T: number of time steps

    '''
    def __init__(self, data, p=50):
        '''
        arg data: targets
            p   : max lag order

        '''
        super().__init__()
        self.data = data
        mod = ar_select_order(data, maxlag=p)
        self.results = AutoReg(data, lags = mod.ar_lags).fit()
        
    
    def residual(self):
        '''
        transform targets to its AR residuals
        return: AR residuals

        '''
        fit = self.results.fittedvalues
        return self.data - fit
    
    def predict(self, res):
        '''
        transform predicted residuals to predicted targets
        arg res: predicted residuals, len=predict period
        return: predicted targets
        '''
        n = len(res)
        pred = self.results.forecast(n)
        return pred + res.values
    
class V_AR():
    '''
    Vector Autoregressions (VAR)
    Y_t = \sum{A_i*Y_i} + Res
    Y: T * K
    T: number of time steps
    K: number of features

    '''
    def __init__(self, data):
        '''
        arg data: features
            p: max lag order

        '''
        self.data = data
        model = VAR(data)
        self.results = model.fit()
    
    def residual(self):
        '''
        transform features to its VAR residuals
        return: VAR residuals

        '''
        fit = self.results.fittedvalues
        return self.data - fit

In [None]:
df = pd.read_pickle('C:/Users/miaoy/Desktop/11785/HwData/project/train.pkl')
df = df.set_index(['investment_id', 'time_id'])
df = df.drop('row_id', 1)

In [None]:
iterables = [range(3774), range(1220)]
index = pd.MultiIndex.from_product(iterables, names=['investment_id', 'time_id',])
adf = pd.DataFrame(np.empty(4604280), index=index)
adf = adf.join(df)
adf = adf.drop(0, 1)
adf.head()

In [None]:
target = adf['target']
feature = adf.drop('target', 1)

In [None]:
import warnings
warnings.filterwarnings('ignore')
m = []
name = []
for x in range(3774):
    if x % 50 ==0: print(x)
    f = feature.loc[x].dropna()
    if f.size == 0: continue
    V = V_AR(f)
    m.append(V.residual())
    name.append(x)
df_ar = pd.concat(m, keys=name, names=['investment_id'])

In [None]:
df_ar = df_ar.join(target)