In [2]:
import pulsar
import json
import matplotlib.pyplot as plt
import pandas as pd
from IPython.display import clear_output

In [3]:
client = pulsar.Client('pulsar://localhost:6650')

In [4]:
producer = client.create_producer('request')

2022-06-04 11:23:15.621 INFO  [139850953791296] ConnectionPool:85 | Created connection for pulsar://localhost:6650
2022-06-04 11:23:15.628 INFO  [139849988380416] ClientConnection:356 | [127.0.0.1:35930 -> 127.0.0.1:6650] Connected to broker
2022-06-04 11:23:15.637 INFO  [139849988380416] HandlerBase:54 | [persistent://public/default/request, ] Getting connection from pool
2022-06-04 11:23:15.655 INFO  [139849988380416] ProducerImpl:170 | [persistent://public/default/request, ] Created producer on broker [127.0.0.1:35930 -> 127.0.0.1:6650] 


In [5]:
total_data = 85400

In [6]:
test_stack = []

# Divides the request into batches and sends it to the analysis nodes
def makeRequest(data_size, batch_size=264):
    temp_total = data_size
    start_index = 0
    if not (0 < data_size < 365000):
        raise ValueError("Data Size not available!")
    while data_size > 0:
        if data_size - batch_size < 0:
            batch_size = data_size
        next_index = start_index
        next_index += batch_size
        producer.send((f"{start_index} - {next_index}").encode('utf-8'))
        #test_stack.append(f"{start_index} - {next_index}")
        start_index = next_index + 1
        data_size -= batch_size + 1

In [7]:
def aggregate_dicts(dict_a, dict_b):
    for key in dict_b.keys():
        if key not in dict_a.keys():
            dict_a[key] = int(dict_b[key])
        else:
            dict_a[key] += int(dict_b[key])
    return dict_a

def parse_dict(prog_dict):
    prog_df = pd.DataFrame.from_dict(prog_dict, orient='index', columns=["count"])
    prog_df = prog_df.sort_values("count", ascending=False)
    return prog_df

In [8]:
consumer_one = client.subscribe('response_one', subscription_name='client')
consumer_two = client.subscribe('response_two', subscription_name='client')
consumer_three = client.subscribe('response_three', subscription_name='client')
consumer_four = client.subscribe('response_four', subscription_name='client')

2022-06-04 11:23:19.315 INFO  [139850953791296] Client:88 | Subscribing on Topic :response_one
2022-06-04 11:23:19.327 INFO  [139849988380416] HandlerBase:54 | [persistent://public/default/response_one, client, 0] Getting connection from pool
2022-06-04 11:23:19.330 INFO  [139849988380416] ConsumerImpl:216 | [persistent://public/default/response_one, client, 0] Created consumer on broker [127.0.0.1:35930 -> 127.0.0.1:6650] 
2022-06-04 11:23:19.331 INFO  [139850953791296] Client:88 | Subscribing on Topic :response_two
2022-06-04 11:23:19.333 INFO  [139849988380416] HandlerBase:54 | [persistent://public/default/response_two, client, 1] Getting connection from pool
2022-06-04 11:23:19.335 INFO  [139849988380416] ConsumerImpl:216 | [persistent://public/default/response_two, client, 1] Created consumer on broker [127.0.0.1:35930 -> 127.0.0.1:6650] 
2022-06-04 11:23:19.336 INFO  [139850953791296] Client:88 | Subscribing on Topic :response_three
2022-06-04 11:23:19.340 INFO  [139849988380416]

In [8]:
makeRequest(1000)

In [9]:
prog_dict = {}
test_prog_dict = {}
devop_prog_dict = {}
top_repos = []
total_num = 0
total_test = 0
total_devop_test = 0

In [None]:
while True:
    # merge top ranking programming languages
    msg = consumer_one.receive()
    consumer_one.acknowledge(msg)
    prog_lang = json.loads(msg.data().decode("utf-8"))
    prog_dict = aggregate_dicts(prog_dict, prog_lang)
    
    # List top commits
    msg = consumer_two.receive()
    consumer_two.acknowledge(msg)
    top_repos.extend(json.loads(msg.data().decode("utf-8")))
    top_repos = sorted(top_repos, key=lambda tup: tup[1], reverse=True)
    top_repos = top_repos[:10]

    # Show total test
    msg = consumer_three.receive()
    consumer_three.acknowledge(msg)
    print(msg.data().decode("utf-8"))
    test_prog_lang = json.loads(msg.data().decode("utf-8"))
    test_prog_dict = aggregate_dicts(test_prog_dict, test_prog_lang)
    
    # Show total test + devops
    msg = consumer_four.receive()
    consumer_four.acknowledge(msg)
    devop_prog_lang = json.loads(msg.data().decode("utf-8"))
    devop_prog_dict = aggregate_dicts(devop_prog_dict, devop_prog_lang)
    
    clear_output(wait=True)
    
    print(top_repos[:10])
    prog_dict_df = parse_dict(prog_dict)
    print(prog_dict_df.iloc[:10])
    print(parse_dict(test_prog_dict).iloc[:10])
    print(parse_dict(devop_prog_dict).iloc[:10])

[['https://github.com/GiorgioComitini/COVID-19', 107359], ['https://github.com/YusufSuleman/piskel2', 1833], ['https://github.com/miroslavpejic85/mirotalk', 753], ['https://github.com/oxalica/rust-overlay', 631], ['https://github.com/Deweh/CyberCAT-SimpleGUI', 278], ['https://github.com/qirolab/laravel-themer', 82], ['https://github.com/restuwahyu13/express-payment-gateway', 72], ['https://github.com/oberblastmeister/neuron.nvim', 52], ['https://github.com/canyie/Riru-MomoHider', 48], ['https://github.com/metonym/svelte-pincode', 36]]
                  count
Python               98
JavaScript           57
TypeScript           49
C++                  47
Java                 23
C#                   21
Jupyter Notebook     19
Go                   18
HTML                 17
C                    16
                  count
Python               15
PHP                   8
TypeScript            8
Dart                  6
Rust                  5
C++                   5
Go                    5
Jav

In [12]:
client.close()

2022-05-31 15:41:08.629 INFO  [140485261166400] ClientImpl:483 | Closing Pulsar client
2022-05-31 15:41:08.630 INFO  [140485261166400] ProducerImpl:546 | [persistent://public/default/request, standalone-0-43] Closing producer for topic persistent://public/default/request
2022-05-31 15:41:08.630 INFO  [140485261166400] ConsumerImpl:884 | [persistent://public/default/response_one, client, 0] Closing consumer for topic persistent://public/default/response_one
2022-05-31 15:41:08.630 INFO  [140485261166400] ConsumerImpl:884 | [persistent://public/default/response_two, client, 1] Closing consumer for topic persistent://public/default/response_two
2022-05-31 15:41:08.630 INFO  [140485261166400] ConsumerImpl:884 | [persistent://public/default/response_three, client, 2] Closing consumer for topic persistent://public/default/response_three
2022-05-31 15:41:08.631 INFO  [140485261166400] ConsumerImpl:884 | [persistent://public/default/response_four, client, 3] Closing consumer for topic persiste

In [25]:
a = [[1],[2]]
b = [[3]]
a.extend(b)
a

[[1], [2], [3]]

In [5]:
a = [("a", 12),("a",1),("a",3)]
sorted(a,key=lambda tup: tup[1])

[('a', 1), ('a', 3), ('a', 12)]