Developer: **Rakesh Nain**

### Process Event Producer

Here I am generating the event timestamp in UTC timezone for each data record in the producer, and then convert the timestamp to unix-timestamp format (keeping UTC timezone) to simulate the “ts” column. For example, if the current time is 2020-10-10 10:10:10 UTC, it should be converted to the value of 1602324610, and stored in the “ts” column

Basically, Writing a python program that loads all the data from “Streaming_Linux_process.csv” and sending X number of records from each machine following the sequence to the Kafka stream every 5 seconds.

Where,
- The number X is a random number between 10~50 (inclusive), which is regenerated for each machine in each cycle.
- Appending event time in unix-timestamp format (as mentioned above).
- If the data is exhausted, restarting from the first sequence again

In [1]:
# import statements
from time import sleep
from json import dumps
from kafka import KafkaProducer
import random
import datetime as dt
import csv

In [13]:
def read_csv(fileName):
    data = []
    with open(fileName, newline='') as csvfile:
        reader = csv.DictReader(csvfile)
    
        for row in reader:
            data.append({'sequence': int(row['sequence']), 'machine': int(row['machine']), 'PID': int(row['PID']), 'TRUN': int(row['TRUN']), 'TSLPI': int(row['TSLPI']), 'TSLPU': int(row['sequence']), 'POLI': row['POLI'], 'NICE': int(row['NICE']), 'PRI': int(row['PRI']), 'RTPR': int(row['RTPR']), 'CPUNR': int(row['CPUNR']), 'Status': row['Status'], 'EXC': int(row['EXC']), 'State': row['State'], 'CPU': int(float(row['CPU'])), 'CMD': row['CMD']})
        
    return data

def publish_message(producer_instance, topic_name, data):
    try:
        producer_instance.send(topic_name, data)
        print('Message published successfully. Data: ' + str(data))
    except Exception as ex:
        print('Exception in publishing message.')
        print(str(ex))
        
def connect_kafka_producer():
    _producer = None
    try:
        _producer = KafkaProducer(bootstrap_servers=['localhost:9092'],
                                  value_serializer=lambda x: dumps(x).encode('ascii'),
                                  api_version=(0, 10))
    except Exception as ex:
        print('Exception while connecting Kafka.')
        print(str(ex))
    finally:
        return _producer
    
if __name__ == '__main__':
   
    topic = 'Streaming_Linux_process7'
    cRows = read_csv('Streaming_Linux_process.csv')
    
    print('Publishing records..')
    producer = connect_kafka_producer()
    
    machines_set = set()
    for a in cRows:
        machines_set.add(a['machine'])   
        
    machine_list = list(machines_set)
          
    g = globals()
    for m_num in machine_list:
        g['machine_{0}'.format(m_num)] = []
        
    for m_num in machine_list:
        for row in cRows:
            if row['machine'] == m_num:
                g['machine_{0}'.format(m_num)].append(row)
                
    data_machine = []           
    for m_num in machine_list:       
        data_machine.append(g['machine_{0}'.format(m_num)])
        
    start_list = [0]*len(data_machine)
    final_produced_data = []
    i = 0
    while True:
        produced_data = []
        for i in range(len(data_machine)):
            rand_n = random.randint(10,50)
            if start_list[i]+rand_n < len(data_machine[i]):
                selected_data = data_machine[i][start_list[i]:start_list[i]+rand_n]
            else:
                start_list[i] = 0
                selected_data = data_machine[i][start_list[i]:start_list[i]+rand_n]
                
            for a in selected_data:
                a['ts'] = round(dt.datetime.utcnow().timestamp())
                produced_data.append(a)
                
            start_list[i] = start_list[i] + rand_n
            
        publish_message(producer, topic, produced_data)  
        sleep(5)

Publishing records..
Message published successfully. Data: [{'sequence': 1, 'machine': 4, 'PID': 1442, 'TRUN': 0, 'TSLPI': 1, 'TSLPU': 1, 'POLI': 'norm', 'NICE': 0, 'PRI': 120, 'RTPR': 0, 'CPUNR': 0, 'Status': '-', 'EXC': 0, 'State': 'S', 'CPU': 0, 'CMD': 'Xorg', 'ts': 1604192462}, {'sequence': 2, 'machine': 4, 'PID': 2797, 'TRUN': 0, 'TSLPI': 5, 'TSLPU': 2, 'POLI': 'norm', 'NICE': 0, 'PRI': 120, 'RTPR': 0, 'CPUNR': 1, 'Status': '-', 'EXC': 0, 'State': 'S', 'CPU': 0, 'CMD': 'nautilus', 'ts': 1604192462}, {'sequence': 3, 'machine': 4, 'PID': 2547, 'TRUN': 0, 'TSLPI': 3, 'TSLPU': 3, 'POLI': 'norm', 'NICE': 0, 'PRI': 120, 'RTPR': 0, 'CPUNR': 3, 'Status': '-', 'EXC': 0, 'State': 'S', 'CPU': 0, 'CMD': 'unity-panel-se', 'ts': 1604192462}, {'sequence': 4, 'machine': 4, 'PID': 2993, 'TRUN': 0, 'TSLPI': 3, 'TSLPU': 4, 'POLI': 'norm', 'NICE': 0, 'PRI': 120, 'RTPR': 0, 'CPUNR': 3, 'Status': '-', 'EXC': 0, 'State': 'S', 'CPU': 0, 'CMD': 'zeitgeist-fts', 'ts': 1604192462}, {'sequence': 5, 'machine'

Message published successfully. Data: [{'sequence': 20, 'machine': 4, 'PID': 4375, 'TRUN': 0, 'TSLPI': 1, 'TSLPU': 20, 'POLI': 'norm', 'NICE': 20, 'PRI': 100, 'RTPR': 0, 'CPUNR': 0, 'Status': '-', 'EXC': 0, 'State': 'S', 'CPU': 0, 'CMD': 'atop', 'ts': 1604192467}, {'sequence': 21, 'machine': 4, 'PID': 1442, 'TRUN': 0, 'TSLPI': 1, 'TSLPU': 21, 'POLI': 'norm', 'NICE': 0, 'PRI': 120, 'RTPR': 0, 'CPUNR': 0, 'Status': '-', 'EXC': 0, 'State': 'S', 'CPU': 0, 'CMD': 'Xorg', 'ts': 1604192467}, {'sequence': 22, 'machine': 4, 'PID': 3790, 'TRUN': 0, 'TSLPI': 3, 'TSLPU': 22, 'POLI': 'norm', 'NICE': 0, 'PRI': 120, 'RTPR': 0, 'CPUNR': 0, 'Status': '-', 'EXC': 0, 'State': 'S', 'CPU': 0, 'CMD': 'ostinato', 'ts': 1604192467}, {'sequence': 23, 'machine': 4, 'PID': 4372, 'TRUN': 1, 'TSLPI': 0, 'TSLPU': 23, 'POLI': 'norm', 'NICE': 20, 'PRI': 100, 'RTPR': 0, 'CPUNR': 3, 'Status': '-', 'EXC': 0, 'State': 'R', 'CPU': 0, 'CMD': 'atop', 'ts': 1604192467}, {'sequence': 24, 'machine': 4, 'PID': 2797, 'TRUN': 0, 

Message published successfully. Data: [{'sequence': 66, 'machine': 4, 'PID': 4375, 'TRUN': 0, 'TSLPI': 1, 'TSLPU': 66, 'POLI': 'norm', 'NICE': 20, 'PRI': 100, 'RTPR': 0, 'CPUNR': 0, 'Status': '-', 'EXC': 0, 'State': 'S', 'CPU': 0, 'CMD': 'atop', 'ts': 1604192472}, {'sequence': 67, 'machine': 4, 'PID': 4374, 'TRUN': 0, 'TSLPI': 1, 'TSLPU': 67, 'POLI': 'norm', 'NICE': 20, 'PRI': 100, 'RTPR': 0, 'CPUNR': 1, 'Status': '-', 'EXC': 0, 'State': 'S', 'CPU': 0, 'CMD': 'atop', 'ts': 1604192472}, {'sequence': 68, 'machine': 4, 'PID': 3790, 'TRUN': 0, 'TSLPI': 3, 'TSLPU': 68, 'POLI': 'norm', 'NICE': 0, 'PRI': 120, 'RTPR': 0, 'CPUNR': 1, 'Status': '-', 'EXC': 0, 'State': 'S', 'CPU': 0, 'CMD': 'ostinato', 'ts': 1604192472}, {'sequence': 69, 'machine': 4, 'PID': 1442, 'TRUN': 0, 'TSLPI': 1, 'TSLPU': 69, 'POLI': 'norm', 'NICE': 0, 'PRI': 120, 'RTPR': 0, 'CPUNR': 1, 'Status': '-', 'EXC': 0, 'State': 'S', 'CPU': 0, 'CMD': 'Xorg', 'ts': 1604192472}, {'sequence': 70, 'machine': 4, 'PID': 2797, 'TRUN': 0, 

Message published successfully. Data: [{'sequence': 77, 'machine': 4, 'PID': 1851, 'TRUN': 0, 'TSLPI': 2, 'TSLPU': 77, 'POLI': 'norm', 'NICE': 0, 'PRI': 120, 'RTPR': 0, 'CPUNR': 1, 'Status': '-', 'EXC': 0, 'State': 'S', 'CPU': 0, 'CMD': 'vmtoolsd', 'ts': 1604192477}, {'sequence': 78, 'machine': 4, 'PID': 1371, 'TRUN': 0, 'TSLPI': 1, 'TSLPU': 78, 'POLI': 'norm', 'NICE': 0, 'PRI': 120, 'RTPR': 0, 'CPUNR': 3, 'Status': '-', 'EXC': 0, 'State': 'S', 'CPU': 0, 'CMD': 'irqbalance', 'ts': 1604192477}, {'sequence': 79, 'machine': 4, 'PID': 2498, 'TRUN': 0, 'TSLPI': 1, 'TSLPU': 79, 'POLI': 'norm', 'NICE': 0, 'PRI': 120, 'RTPR': 0, 'CPUNR': 0, 'Status': '-', 'EXC': 0, 'State': 'S', 'CPU': 0, 'CMD': 'upstart-dbus-b', 'ts': 1604192477}, {'sequence': 80, 'machine': 4, 'PID': 3793, 'TRUN': 1, 'TSLPI': 11, 'TSLPU': 80, 'POLI': 'norm', 'NICE': 0, 'PRI': 120, 'RTPR': 0, 'CPUNR': 3, 'Status': '-', 'EXC': 0, 'State': 'S', 'CPU': 1, 'CMD': 'drone', 'ts': 1604192477}, {'sequence': 81, 'machine': 4, 'PID': 2

KeyboardInterrupt: 