In [1]:
%matplotlib inline
import numpy as np
import pandas as pd
from collections import Counter
import statsmodels.api as sm
import statsmodels
import random
import itertools
from statsmodels.tsa.arima_process import arma_generate_sample
from pmdarima.utils import diff_inv

In [None]:
def arima_sim(ar,ma,diff_order,nsample):
    if diff_order ==0:
        sample=arma_generate_sample(ar=ar,ma=ma,nsample=nsample)
    elif diff_order==1:
        x=arma_generate_sample(ar=ar,ma=ma,nsample=nsample)
        sample=diff_inv(x,lag=1,differences=diff_order)[1:]
    elif diff_order==2:
        x=arma_generate_sample(ar=ar,ma=ma,nsample=nsample) 
    return sample

def stationary_ar2(): # return stationary ar coefficient 
    while True:
        phi1=np.random.uniform(-0.9,0.9)
        phi2=np.random.uniform(-0.9,0.9)
        if abs(phi2) < 1 and (phi1 + phi2) < 1 and (phi2 - phi1) < 1: # AR(2) stationary condition
            return phi1,phi2
    
def stationary_series_ar2(phi1,phi2,timesteps):
    series=arima_sim(ar=[1,-phi1,-phi2],ma=[1],diff_order=0,nsample=timesteps)
    inputs=np.concatenate((series,[phi1,phi2,0,0]))
    return inputs
def unitroot_series_1(phi1,phi2,diff_order,timesteps,case): # case 1 : 걍 단위근 시계열.
    series=arima_sim(ar=[1,-phi1,-phi2],ma=[1],diff_order=diff_order,nsample=timesteps)
    inputs=np.concatenate((series,[phi1,phi2,diff_order,1]))
    return inputs
def unitroot_series_2(phi1,phi2,diff_order,timesteps,case): # case 2 : 정상+비정상.
    series1=arima_sim(ar=[1,-phi1,-phi2],ma=[1],diff_order=0,nsample=timesteps//2)
    series2=arima_sim(ar=[1,--phi1,-phi2],ma=[1],diff_order=diff_order,nsample=timesteps-timesteps//2)+series1[-1]
    series=np.append(series1,series2)
    inputs=np.concatenate((series,[phi1,phi2,diff_order,2]))
    return inputs

def unitroot_series_3(phi1,phi2,diff_order,timesteps,case): # case 3 : 비정상+정상.
    series1=arima_sim(ar=[1,-phi1,-phi2],ma=[1],diff_order=diff_order,nsample=timesteps//2)
    series2=arima_sim(ar=[1,-phi1,-phi2],ma=[1],diff_order=0,nsample=timesteps-timesteps//2)+series1[-1]
    series=np.append(series1,series2)
    inputs=np.concatenate((series,[phi1,phi2,diff_order,3]))
    return inputs

In [None]:
def dgenerator(n,p,timesteps):
    inputs=[]
    outputs_b=[]
    outputs_m=[]
    #
    one=round(n*(1-p)) # stationary
    zero=round(n*p) # unit root    
    d1=d2=zero//2
    #
    while one:
        one-=1
        phi1,phi2=stationary_ar2()
        series=stationary_series_ar2(phi1,phi2,100)
        inputs.append(series)
        outputs_b.append([0,1])
        outputs_m.append([0,0,1])
    diff_order=1
    while d1:
        d1-=1
        phi1,phi2=stationary_ar2()
        series=unitroot_series_1(phi1,phi2,diff_order,timesteps,1)
        inputs.append(series)
        outputs_b.append([1,0])
        outputs_m.append([0,1,0])
    diff_order=2
    while d2:
        d2-=1
        phi1,phi2=stationary_ar2()
        series=unitroot_series_1(phi1,phi2,diff_order,timesteps,1)
        inputs.append(series)
        outputs_b.append([1,0])
        outputs_m.append([1,0,0])
    #
    inputs=np.array(inputs)
    outputs_b=np.array(outputs_b)
    outputs_m=np.array(outputs_m)
    assert n==len(inputs)==len(outputs_b)==len(outputs_m)
    idx = np.random.permutation(n)
    inputs_shuffled=inputs[idx]
    outputs_b_shuffled=outputs_b[idx]
    outputs_m_shuffled=outputs_m[idx]
    return inputs_shuffled,outputs_b_shuffled,outputs_m_shuffled

In [None]:
# combine dataset generator
# p: proportion of Ho dataset
def cgenerator(n,p,timesteps):
    inputs=[]
    outputs_b=[]
    outputs_m=[]
    one=round(n*(1-p)) # stationary
    zero=round(n*p) # unit root    
    d1=zero//2
    d2=zero//2
    d1c1=d1c2=d1c3=d1//3
    d2c1=d2c2=d2c3=d2//3
    while one:
        one-=1
        phi1,phi2=stationary_ar2()
        series=stationary_series_ar2(phi1,phi2,100)
        inputs.append(series)
        outputs_b.append([0,1])
        outputs_m.append([0,0,1])
    
    diff_order=1
    while d1c1:
        d1c1-=1
        phi1,phi2=stationary_ar2()
        series=unitroot_series_1(phi1,phi2,diff_order,timesteps,1)
        inputs.append(series)
        outputs_b.append([1,0])
        outputs_m.append([0,1,0])
        
    while d1c2:
        d1c2-=1
        phi1,phi2=stationary_ar2()
        series=unitroot_series_2(phi1,phi2,diff_order,timesteps,2)
        inputs.append(series)
        outputs_b.append([1,0])
        outputs_m.append([0,1,0])
        
    while d1c3:
        d1c3-=1
        phi1,phi2=stationary_ar2()
        series=unitroot_series_3(phi1,phi2,diff_order,timesteps,3)
        inputs.append(series)
        outputs_b.append([1,0])
        outputs_m.append([0,1,0])
        
    diff_order=2
    while d2c1:
        d2c1-=1
        phi1,phi2=stationary_ar2()
        series=unitroot_series_1(phi1,phi2,diff_order,timesteps,1)
        inputs.append(series)
        outputs_b.append([1,0])
        outputs_m.append([1,0,0])
        
    while d2c2:
        d2c2-=1
        phi1,phi2=stationary_ar2()
        series=unitroot_series_2(phi1,phi2,diff_order,timesteps,2)
        inputs.append(series)
        outputs_b.append([1,0])
        outputs_m.append([1,0,0])
        
    while d2c3:
        d2c3-=1
        phi1,phi2=stationary_ar2()
        series=unitroot_series_3(phi1,phi2,diff_order,timesteps,3)
        inputs.append(series)
        outputs_b.append([1,0])
        outputs_m.append([1,0,0])
    #
    inputs=np.array(inputs)
    outputs_b=np.array(outputs_b)
    outputs_m=np.array(outputs_m)
    idx = np.random.permutation(n)
    inputs_shuffled=inputs[idx]
    outputs_b_shuffled=outputs_b[idx]
    outputs_m_shuffled=outputs_m[idx]
    return inputs_shuffled,outputs_b_shuffled,outputs_m_shuffled