## Sending data to Kafka server

This notebook uses the [Python client for the Apache Kafka distributed stream processing system](http://kafka-python.readthedocs.io/en/master/index.html) to send messages to a Kafka cluster. 

In this example, the messages are data generated from a linear model with $n$ input variables, i.e., 

$$
y =x^T \beta +w
$$
with $x, \beta \in \mathbb{R}^n$, and $y, w \in \mathbb{R}$. $w$ is Gaussian noise.

Messages are sent every $interval$ seconds. They are list of size (n+2) where:
* First element is the counter 
* Second element is $y$ 
* Third to last elements are $x$ values (size $n$)


In [1]:
from kafka import KafkaProducer
import time
import numpy as np
import matplotlib as plt

In [2]:
producer = KafkaProducer(bootstrap_servers='localhost:9092')

In [3]:
class NormalWishart:
    """Normal-Wishart distribution.
    
    Attributes:
        _mu0 (:obj:`np.ndarray`): location vector.
        _D (int): Number of components.
        _lambda (float): positive-only scalar.
        _W (:obj:`np.ndarray`): positive definite scale matrix.
        _nu (int): number of degrees of freedom.
        _n (int): sample size.
        _0 (:obj:`np.ndarray`): pre-allocated vector of zeros.
    """
    
    def __init__(self, mu0, _lambda, W, nu, n):
        self._mu0 = np.asarray(mu0)
        self._D = self._mu0.shape[0]
        self._lambda = _lambda
        self._W = np.asarray(W)
        self._nu = nu
        self._n = n
        self._0 = np.zeros(self._nu)
        # Make sure that degrees of freedom are sufficient
        # and that the dimensionality if the scale matrix is correct.
        assert((self._nu > self._D - 1) and (len(W) == self._nu))
    
    def sample(self):
        """Randomly generates a sample of `self._n` observations.
        
        returns:
            :obj:`np.ndarray`: a random mean vector.
            :obj:`np.ndarray`: a random precision matrix.
        """
        # Randomly samples the mean vector
        mu = np.random.multivariate_normal(self._mu0, self._W)
        
        # Randomly samples the precision matrix
        G = np.random.multivariate_normal(self._0, self._W, size=self._n)
        S = np.dot(G.T, G)
        return mu, S

In [4]:
class Generator:
    """Data generator.
    
    Attributes:
        _n_inputs (int): Number of explanatory variables.
        _n_outputs (int): Number of explained variables.
        _latent_dim (int): Latent dimension, represented by the
            number of Gaussian observations to draw for
            estimating the precision matrix.
        _beta (:obj:`np.ndarray`): Variable weights.
    """
    
    def __init__(self, n_inputs, n_outputs, latent_dim=15):
        self._counter = 0
        self._n_inputs = n_inputs
        self._n_outputs = n_outputs
        self._latent_dim = latent_dim
        
        # Initializes NW distribution with the identity
        # as scale matrix and a zero vector as location vector.
        W = np.eye(self._n_inputs)
        mu0 = np.zeros(self._n_inputs)
        nu = self._n_inputs
        nw = NormalWishart(mu0, 1., W, nu, self._latent_dim)
        
        # Lambda is a random precision matrix,
        # and needs to be inverted in order to obtain
        # a covariance matrix.
        mu, Lambda = nw.sample()
        Sigma = np.linalg.inv(Lambda)
        
        # Randomly initializes the variable weights
        self._beta = np.random.multivariate_normal(mu, Sigma, self._n_outputs)
    
    def sample(self, sample_size=1):
        """Draw random samples from a multivariate Gaussian distribution.
        
        Parameters:
            sample_size (int): Number of observations to draw.
        
        Returns:
            :obj:`np.ndarray`: Array of shape (`sample_size`, `self._n_inputs`)
                containing random values for the explanatory variables.
            :obj:`np.ndarray`: Array of shape (`sample_size`, `self._n_outputs`)
                containing Random values for the explained variables.
            :obj:`np.ndarray`: Array of length `sample_size` containing the unique
                identifiers of generated samples.
        """
        # Generates unique identifiers
        counters = np.arange(self._counter, self._counter + sample_size)
        self._counter += sample_size
        
        # Samples explanatory variables
        X = np.random.rand(sample_size, self._n_inputs)
        
        # Samples random noise
        w = np.random.normal(0, 1, size=self._n_outputs) * 0.1
        
        # Computes outputs
        y = np.dot(X, self._beta.T) + w
        return np.squeeze(X), np.squeeze(y), counters
    
    @property
    def beta(self):
        return self._beta
    
    @property
    def counter(self):
        """Returns current value for the sample counter.
        
        Returns:
            int: Identifier of the next sample to be generated.
        """
        return self._counter

In [5]:
n = 10 # Number of inputs
m = 8 # Number of outputs

n_models = 6

In [6]:
gen = Generator(10, 8)
gen.sample()

(array([0.63005893, 0.43323915, 0.41965043, 0.75887481, 0.84093609,
        0.25743898, 0.80655364, 0.29180842, 0.17620812, 0.3565675 ]),
 array([2.30136192, 1.42930032, 1.46511899, 1.51555191, 2.02273809,
        1.91264073, 1.3688976 , 1.07686505]),
 array([0]))

In [None]:
TIME_INTERVAL = 1

n = 10 # Number of inputs
m = 8 # Number of outputs

gen = Generator(n, m)

print('beta: %s' % str(gen.beta))

# Loop for sending messages to Kafka with the topic dataLinearModel
while True:
    
    x, y, counter = gen.sample()
    arr = np.concatenate((counter, y, x))
    #print(arr)
    message = np.array2string(arr, separator=',')
    producer.send('dataLinearModel', message.encode())
    time.sleep(TIME_INTERVAL)
    

beta: [[ 0.04999734  2.22922191  0.64776345 -1.35174686  1.45025298 -0.57287667
   0.1591668   0.9256341  -0.47154562  1.24734486]
 [-0.17381563  2.09452095 -0.0214811  -0.69715385  2.22256245 -0.22321851
   0.55433158  0.54801595  0.35102829  0.42000597]
 [-0.69010133  0.8858062   0.15654393 -1.33572736  0.75606331 -0.09945067
   0.02126802  1.60031293 -0.60808223  1.16340666]
 [-0.7651876   1.70914799  1.03180331 -1.12608898  0.75992093  0.33461455
   0.33132817  1.28949043 -0.40251148  2.47122849]
 [-0.28411374  1.80858853 -0.27723884 -0.72318038  1.83048786 -0.90524121
   0.89711907  1.07975923 -0.0181386  -0.37521333]
 [-0.43098733  1.68965667  0.58693057 -1.73791254  1.28425738  0.56428111
  -0.30134754  0.99810276 -0.52060576  1.30048181]
 [-1.19306451  2.00825084 -0.2666904  -0.59411742  1.74717222 -1.02882489
   0.31720957  1.55169703  0.29879952 -0.25403411]
 [ 0.21585186  1.7405864   1.17806856 -1.64294489  1.2250097  -0.33884547
   0.50272814  0.91959732 -0.41674601  1.5599