# Data Generation

## Addition Problem
Source: https://github.com/batzner/indrnn/blob/master/examples/addition_rnn.py

Timesteps params: https://arxiv.org/abs/1803.04831

BatchSize params: https://arxiv.org/pdf/1511.06464.pdf

In [56]:
import pandas as pd
import os, sys
import numpy as np

from random import randint
from numpy import array

batch_size_arr = [80, 50, 100, 180, 200]
time_steps_arr = [100, 500, 1000, 5000, 10000, 15000]

def generateAddingProblemData(batch_size, time_steps):
    # Build the first sequence
    add_values = np.random.rand(batch_size, time_steps)

    # Build the second sequence with one 1 in each half and 0s otherwise
    add_indices = np.zeros_like(add_values, dtype=int)
    half = int(time_steps / 2)
    for i in range(batch_size):
        first_half = np.random.randint(half)
        second_half = np.random.randint(half, time_steps)
        add_indices[i, [first_half, second_half]] = 1

    # Zip the values and indices in a third dimension:
    # inputs has the shape (batch_size, time_steps, 2)
    inputs = np.dstack((add_values, add_indices))
    targets = np.sum(np.multiply(add_values, add_indices), axis=1)
    data = np.column_stack((inputs.reshape(batch_size, time_steps*2), targets))
    return inputs, targets, data

for bs in batch_size_arr:
    for ts in time_steps_arr:
        _, _, addingproblemdata = (generateAddingProblemData(bs*2, ts))
        with open(f"../../Datasets/2_addingproblem/addingProblem.bs={bs}.ts={ts}.csv",'w') as csvfile:
            np.savetxt(csvfile, np.array([[2, 1]]),fmt='%d', delimiter=",")
        with open(f"../../Datasets/2_addingproblem/addingProblem.bs={bs}.ts={ts}.csv",'a') as csvfile:
            np.savetxt(csvfile, addingproblemdata, fmt='%.4f', delimiter=",")