In [6]:
import numpy
from numbers import Number
import multiprocessing
import torch

In [7]:

# Markovian process generator
def generator(random_state, size, T):
    omega = 3.0397692859811784e-05
    alpha_1 = 0.200052672189836
    alpha_2 = 0.699953910465642
    mu = 0.003469667713479039
    phi = -0.1392222068214604
    # (r_Mt, epsilon_Mt, sigma^2_Mt)
    epsilon = random_state.normal.Normal(0,1).sample((T,size))
    process = torch.zeros((size,T,3))
    process[:,0,0] = -0.006
    process[:,0,2] = omega/(1-alpha_1-alpha_2)
    for t in range(1,T):
        process[:,t,2] = omega + alpha_1*process[:,t-1,1]**2 + alpha_2*process[:,t-1,2]
        process[:,t,1] = torch.sqrt(process[:,t,2]) * epsilon[t]
        process[:,t,0] = mu + phi*process[:,t-1,0] + process[:,t,1]
    return process

out = generator(torch.distributions, 10, 25)
print(out.shape)

torch.Size([10, 25, 3])


In [8]:
class MarkovSampler(object):
    def __init__(self, generator, n_Markov_states: list, n_samples: int, T: int):
        self.samples = generator(torch.distributions, n_samples, T)
        shape = self.samples.shape
        self.T, self.dim_Markov_states = shape[1:]
        self.n_Markov_states = n_Markov_states
        self.generator = generator
        self.n_samples = n_samples
        # initialize the states [1, 100, 100, 100, ...] where init state and n_states at each time step
        self.Markov_states = [None for _ in range(self.T)]
        self.Markov_states[0] = self.samples[0,0,:].reshape(1,-1)

    def _initialize_states(self):
        # initialization of the Markov states
        for t in range(1, self.T):
            self.Markov_states[t] = self.samples[:self.n_Markov_states[t],t,:]

    def _initialize_matrix(self):
        # initialization of the transition matrix
        self.transition_matrix = [torch.tensor([1])]
        self.transition_matrix += [torch.zeros(self.n_Markov_states[t-1],self.n_Markov_states[t]) for t in range(1, self.T)]

    def SA(self):
        # Use stochastic approximation to compute the partition
        self._initialize_states()
        for idx, sample in enumerate(self.samples):
            step_size = 1.0/(idx+1)
            for t in range(1,self.T):
                temp = self.Markov_states[t] - sample[t]
                idx = torch.argmin(torch.sum(temp**2, axis=1))
                self.Markov_states[t][idx] += ((sample[t]-self.Markov_states[t][idx]) * step_size)
        self.train_transition_matrix()
        return (self.Markov_states,self.transition_matrix)

    def train_transition_matrix(self):
        # Use the generated sample to train the transition matrix by frequency counts
        labels = torch.zeros(self.n_samples, self.T, dtype=torch.int)
        # stay only unique states
        for t in range(1, self.T):
            self.Markov_states[t] = torch.unique(self.Markov_states[t], dim=0)
            self.n_Markov_states[t] = len(self.Markov_states[t])
        for t in range(1, self.T):
            dist = torch.empty(self.n_samples, self.n_Markov_states[t])
            for idx, markov_state in enumerate(self.Markov_states[t]):
                temp = self.samples[:, t, :] - markov_state
                dist[:, idx] = torch.sum(temp**2, axis=1)
            labels[:, t] = torch.argmin(dist, axis=1)
        self._initialize_matrix()
        for k in range(self.n_samples):
            for t in range(1, self.T):
                self.transition_matrix[t][labels[k, t-1], labels[k, t]] += 1
        for t in range(1, self.T):
            counts = torch.sum(self.transition_matrix[t], axis=1)
            idx = (counts == 0)
            if idx.any():
                self.Markov_states[t-1] = self.Markov_states[t-1][~idx]
                self.n_Markov_states[t-1] -= torch.sum(idx).item()
                self.transition_matrix[t-1] = self.transition_matrix[t-1][:, ~idx]
                self.transition_matrix[t] = self.transition_matrix[t][~idx, :]
                counts = counts[~idx]
            self.transition_matrix[t] /= counts.reshape(-1, 1)

In [9]:
T = 25
temp = MarkovSampler(generator, [1] + [100] * (T - 1), 1000, 25)
a, b = temp.SA()
# temp._initialize_states()
# temp._initialize_matrix()



In [27]:
import numpy
from numbers import Number
import pandas
import multiprocessing


class Markovian(object):
    def __init__(self, f, n_Markov_states, n_sample_paths, int_flag=0):
        self.samples = f(numpy.random.RandomState(0),size=n_sample_paths)
        shape = self.samples.shape
        self.T, self.dim_Markov_states = shape[1:]
        self.n_Markov_states = n_Markov_states
        self.f = f
        self.int_flag = int_flag
        self.n_samples = n_sample_paths
        self.Markov_states = [None for t in range(self.T)]
        self.Markov_states[0] = self.samples[0,0,:].reshape(1,-1)

    def _initialize(self):
        """initialize Markov states."""
        for t in range(1,self.T):
            self.Markov_states[t] = self.samples[:self.n_Markov_states[t],t,:]

    def _initialize_matrix(self):
        """initialize transition matrix."""
        self.transition_matrix = [numpy.array([[1]])]
        self.transition_matrix += ([numpy.zeros([self.n_Markov_states[t-1],self.n_Markov_states[t]]) for t in range(1,self.T)])

    def SA(self):
        """Use stochastic approximation to compute the partition."""
        self._initialize()
        for idx, sample in enumerate(self.samples):
            step_size = 1.0/(idx+1)
            for t in range(1,self.T):
                temp = self.Markov_states[t] - sample[t]
                idx = numpy.argmin(numpy.sum(temp**2, axis=1))
                self.Markov_states[t][idx] += ((sample[t]-self.Markov_states[t][idx]) * step_size)
        self.train_transition_matrix()
        return (self.Markov_states,self.transition_matrix)

    def RSA(self):
        """Use robust stochastic approximation to compute the partition."""
        self._initialize()
        self.iterate = [
            self.Markov_states[t].copy() for t in range(self.T)]
        step_size = 1.0/numpy.sqrt(self.n_samples)
        for idx, sample in enumerate(self.samples):
            for t in range(1,self.T):
                temp = self.iterate[t] - sample[t]
                idx = numpy.argmin(numpy.sum(temp**2, axis=1))
                self.iterate[t][idx] += ((sample[t]-self.iterate[t][idx]) * step_size)
            for t in range(1,self.T):
                self.Markov_states[t] += self.iterate[t]
        for t in range(1,self.T):
            self.Markov_states[t] = self.Markov_states[t]/self.n_samples
        self.train_transition_matrix()
        return (self.Markov_states,self.transition_matrix)

    def SAA(self):
        from sklearn.cluster import KMeans
        if self.int_flag == 0:
            labels = numpy.zeros(self.n_samples,dtype=int)
        self._initialize_matrix()
        for t in range(1,self.T):
            kmeans = KMeans(n_clusters=self.n_Markov_states[t],random_state=0,).fit(self.samples[:,t,:])
            self.Markov_states[t] = kmeans.cluster_centers_
            if self.int_flag == 0:
                labels_new = kmeans.labels_
                counts = numpy.zeros([self.n_Markov_states[t-1],1])
                for i in range(self.n_samples):
                    counts[labels[i]] += 1
                    self.transition_matrix[t][labels[i]][labels_new[i]] += 1
                self.transition_matrix[t] /= counts
                labels = labels_new
        if self.int_flag == 1:
            self.train_transition_matrix()

        return (self.Markov_states,self.transition_matrix)

    def train_transition_matrix(self):
        if self.int_flag == 1:
            self.round_to_int()
        # labels = (1000, 25)
        labels = numpy.zeros([self.n_samples,self.T],dtype=int)
        for t in range(1,self.T):
            # self.Markov_states[t] = (100,3)
            self.Markov_states[t] = numpy.unique(self.Markov_states[t],axis=0)
            self.n_Markov_states[t] = len(self.Markov_states[t])
        for t in range(1,self.T):
            # dist = (1000, 100)
            dist = numpy.empty([self.n_samples,self.n_Markov_states[t]])
            # markov_state = (3,)
            # self.samples[:,t,:] = (1000,3)
            for idx, markov_state in enumerate(self.Markov_states[t]):
                # temp = (1000,3)
                temp = self.samples[:,t,:] - markov_state
                dist[:,idx] = numpy.sum(temp**2, axis=1)
            labels[:,t] = numpy.argmin(dist,axis=1)
        self._initialize_matrix()
        for k in range(self.n_samples):
            for t in range(1,self.T):
                self.transition_matrix[t][labels[k,t-1]][labels[k,t]] += 1
        for t in range(1,self.T):
            counts = numpy.sum(self.transition_matrix[t], axis=1)
            idx = numpy.where(counts==0)[0]
            if len(idx) > 0:
                self.Markov_states[t-1] = numpy.delete(self.Markov_states[t-1], obj=idx, axis=0)
                self.n_Markov_states[t-1] -= len(idx)
                self.transition_matrix[t-1] = numpy.delete(self.transition_matrix[t-1], obj=idx, axis=1)
                self.transition_matrix[t] = numpy.delete(self.transition_matrix[t], obj=idx, axis=0)
                counts = numpy.delete(counts, obj=idx)
            self.transition_matrix[t] /= counts.reshape(-1,1)

    def write(self, path):
        for t in range(self.T):
            pandas.DataFrame(self.Markov_states[t]).to_csv(path + "Markov_states_{}.csv".format(t))
            pandas.DataFrame(self.transition_matrix[t]).to_csv(path + "transition_matrix_{}.csv".format(t))

    def simulate(self, n_samples):
        """A utility function. Generate a three dimensional array
        (n_samples * T * n_states) representing n_samples number of sample paths.
        Can be used to generate fan plot to compare with the historical data."""
        sim = numpy.empty([n_samples,self.T,self.dim_Markov_states])
        for i in range(n_samples):
            state = 0
            random_state = numpy.random.RandomState(i)
            for t in range(self.T):
                state = random_state.choice(range(self.n_Markov_states[t]),p=self.transition_matrix[t][state],)
                sim[i][t]=self.Markov_states[t][state]
        return sim

In [28]:
# Markovian process generator
def generator(random_state, size):
    omega = 3.0397692859811784e-05
    alpha_1 = 0.200052672189836
    alpha_2 = 0.699953910465642
    mu = 0.003469667713479039
    phi = -0.1392222068214604
    T = 25
    # (r_Mt, epsilon_Mt, sigma^2_Mt)
    epsilon = random_state.normal(size=[T,size])
    process = numpy.zeros(shape=[size,T,3])
    process[:,0,0] = -0.006
    process[:,0,2] = omega/(1-alpha_1-alpha_2)
    for t in range(1,T):
        process[:,t,2] = omega + alpha_1*process[:,t-1,1]**2 + alpha_2*process[:,t-1,2]
        process[:,t,1] = numpy.sqrt(process[:,t,2]) * epsilon[t]
        process[:,t,0] = mu + phi*process[:,t-1,0] + process[:,t,1]
    return process


In [31]:
T = 25
N = 5
markovian = Markovian(generator,n_Markov_states=[1]+[100]*(T-1),n_sample_paths=1000)
a, b = markovian.SA()