# Causal hypothesis generation

In this method given different observation networks proposes single causal graph

In [1]:
import torch
import torch.nn as nn
import numpy as np
import pandas as pd
import seaborn as sns

In [2]:
class Data:
    def __init__(self, batch_size=1, time_lag=4):
        self.data = pd.read_pickle('dyari.pkl')
        self.train = self.data.sample(frac=0.8, random_state=200)
        self.test = self.data.drop(self.train.index)
        self.batch_size = batch_size
        self.time_lag = time_lag
        self.trajectory_length = trajectory_length

    def get_batch(self, mode='train'):
        if mode == 'train':
            trajectory_ids = random.sample(range(0, len(self.train)), self.batch_size)
            simulation_samples = self.train.iloc[trajectory_ids]
        else:
            trajectory_ids = random.sample(range(0, len(self.test)), self.batch_size)
            simulation_samples = self.test.iloc[trajectory_ids]

        batch_x = []
        batch_y = []

        def lag_batch(_positions, _velocity, _energy, _edges):
            time_lag = random.randint(self.time_lag, (self.trajectory_length/sample_freq))
            _bx = []
            _by = []
            for time_step in range(time_lag-self.time_lag, time_lag):
                frames = [_positions[time_step], _velocity[time_step], _energy[time_step]]
                result = pd.concat(frames)
                _bx.append(result)
                _by.append(_edges[time_step])
            _by = [_by[-1]]
            return np.asarray(_bx), np.asarray(_by)

        for _id in range(0, len(simulation_samples)):
            _positions = simulation_samples.trajectories[_id].positions
            _velocity = simulation_samples.trajectories[_id].velocity
            _energy = simulation_samples.trajectories[_id].total_energy
            _edges = simulation_samples.trajectories[_id].edges
            _x, _y = lag_batch(_positions, _velocity, _energy, _edges)
            batch_x.append(_x)
            batch_y.append(_y)

        return np.asarray(batch_x), np.asarray(batch_y)

In [3]:
d = Data()
print(d.get_batch)

ValueError: unsupported pickle protocol: 5

In [None]:
number_of_particles = 4
observation_length = 4

data_frame = sim.sample_trajectory(total_time_steps=10000, sample_freq=50, observation_length=4+1)
# print(data_frame.to_numpy())

_reshape = lambda _x: np.reshape(_x.to_numpy(), (1, number_of_particles*2))

X = [_reshape(data_frame.iloc[i]) for i in range(observation_length)]
Y = [_reshape(data_frame.iloc[-1])]

print(X[0].shape)
print(X)
print(Y)


In [None]:
particles = 4
features = 2
time_slice = 4

class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        # Input dim is 8, output dim is 8
        self.lstm = nn.LSTM(8, 8)  
        # initialize the hidden state.
        self.hidden = (torch.randn(1, 1, 8), torch.randn(1, 1, 8))
        
    def forward(self, x):
        x = torch.Tensor(x)
        for i in x:
            # Step through the sequence one element at a time.
            # after each step, hidden contains the hidden state.
            out, self.hidden = self.lstm(i.view(1, 1, -1), self.hidden)
        return out

In [None]:
class Model(Net):
    def __init__(self):
        super().__init__()
        self.criterion = torch.nn.MSELoss(reduction='sum')
        self.optimizer = torch.optim.SGD(self.parameters(),
                                         lr=0.001,
                                         momentum=0.9)

    def loss(self, y_prediction, y_real):
        y_real = torch.Tensor(y_real)
        loss = self.criterion(y_prediction, y_real)
        return loss

    def print_params(self, x):
        for param in self.parameters():
            print(param)

    def predict_next_position(self, x):
        return self.forward(x)

    def train(self):
        entry = []
        for step in range(100):
            data_frame = sim.sample_trajectory(total_time_steps=10000, sample_freq=50, observation_length=4+1)
            _reshape = lambda _x: np.reshape(_x.to_numpy(), (1, number_of_particles*2))
            X = [_reshape(data_frame.iloc[i]) for i in range(observation_length)]
            Y = [_reshape(data_frame.iloc[-1])]

            self.optimizer.zero_grad()
            self.hidden = (torch.zeros(1, 1, 8),
                                 torch.zeros(1, 1, 8))

            y_pred = self.predict_next_position(X)

            train_loss = self.loss(self.predict_next_position(X), Y)
            train_loss.backward()
            self.optimizer.step()
            
            print(f'step {step}: {train_loss.item()}')
            entry.append({'time_step': step, 'loss': train_loss.item(), 'type': 'train'})

        sns.lineplot(data=pd.DataFrame(entry), x='time_step', y='loss', hue='type')
            

model = Model()
model.train()

In [None]:
print(model.predict_next_position(X))
print(Y)