In [53]:
# Install the dependencies

#import sys
#!conda install --yes --prefix {sys.prefix} pandas
#!conda install --yes --prefix {sys.prefix} matplotlib

In [113]:
# Import the dependencies

import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import re
from matplotlib.pyplot import figure
from datetime import datetime

In [114]:
# Declare variables

springKafkaLogFile = 'logs/spring-kafka.log'
springReactiveKafkaLogFile = 'logs/spring-reactive-kafka.log'
columns = ['project', 'date', 'difference', 'thread', 'animal', 'data']

In [121]:
# Functions

def getDataframe(filename, project):
    logFile = open(filename, 'r')
    lines = logFile.readlines()
    data = []
    lastDate = ''
    
    for line in lines:
        
        if verifyLine(line):
            #print(line)
            date = getDate(line)
            thread = getThread(line)
            animal = getAnimal(line)
            content = getData(line)
            difference = 0
            
            if isResponseLine(line):
                difference = (date - lastDate).total_seconds()
            else:
                lastDate = date
            
            data.append([project, date, difference, thread, animal, content])
            
    return pd.DataFrame(data, columns = columns)

        
def verifyLine(line):
    if isRecordLine(line) or isResponseLine(line):
        return True
    else: return False

def isRecordLine(line):
    return 'Record arrived:' in line

def isResponseLine(line):
    return 'Response arrived:' in line

def getDate(line):
    return datetime.strptime(line[0:23], '%d-%m-%Y %H:%M:%S.%f')

def getThread(line):
    return re.findall(r'(\[(\w+|\.+|\:+|\#|\-+)+\])', line)[1][0][1:-1]

def getData(line):
    if isResponseLine(line):
        return re.findall(r'(Response arrived: (.*))', line)[0][0]
    else: ''

def getAnimal(line):
    if isRecordLine(line):
        return re.findall(r': (\w+( \w+|))', line)[0][0]
    
    if isResponseLine(line):
        return re.findall(r'(animal (\w+( \w+|)))', line)[0][1]

In [122]:
df = pd.concat([getDataframe(springKafkaLogFile, 'spring-kafka'), getDataframe(springReactiveKafkaLogFile, 'spring-reactive-kafka')])

In [123]:
df.head(3)

Unnamed: 0,project,date,difference,thread,animal,data
0,spring-kafka,2021-10-05 19:05:29.173,0.0,org.springframework.kafka.KafkaListenerEndpoin...,Aardvark,
1,spring-kafka,2021-10-05 19:05:31.323,2.15,org.springframework.kafka.KafkaListenerEndpoin...,Aardvark,"Response arrived: {""_type"": ""SearchResponse"", ..."
2,spring-kafka,2021-10-05 19:05:31.329,0.0,org.springframework.kafka.KafkaListenerEndpoin...,Abyssinian,


In [124]:
df.tail(3)

Unnamed: 0,project,date,difference,thread,animal,data
2275,spring-reactive-kafka,2021-10-05 19:05:42.343,0.772,reactor-http-nio-2,Zorse,"Response arrived: {""_type"": ""SearchResponse"", ..."
2276,spring-reactive-kafka,2021-10-05 19:05:42.353,0.782,reactor-http-nio-2,Yakutian Laika,"Response arrived: {""_type"": ""SearchResponse"", ..."
2277,spring-reactive-kafka,2021-10-05 19:05:42.394,0.823,reactor-http-nio-3,Yak,"Response arrived: {""_type"": ""SearchResponse"", ..."


In [213]:
mask = (df['project'] == 'spring-kafka') & (df['difference'] != 0)
medianSpringKafka = df.loc[mask]['difference'].median()
stdSpringKafka = df.loc[mask]['difference'].std()
print('spring-kafka median =', medianSpringKafka)
print('spring-kafka standard deviation =', stdSpringKafka)

spring-kafka median = 0.702
spring-kafka standard deviation = 0.35238035280733065


In [214]:
mask = (df['project'] == 'spring-reactive-kafka') & (df['difference'] != 0)
medianSpringReactiveKafka = df.loc[mask]['difference'].median()
stdSpringReactiveKafka = df.loc[mask]['difference'].std()
print('spring-reactive-kafka median =', medianSpringReactiveKafka)
print('spring-reactive-kafka standard deviation =', stdSpringReactiveKafka)

spring-reactive-kafka median = 0.036
spring-reactive-kafka standard deviation = 0.25061742982702656


In [215]:
print('Request time median drop comparisson between spring-kafka and spring-reactive-kafka:', (100-(medianSpringReactiveKafka*100)/medianSpringKafka))

print('Request time standard deviation drop comparisson between spring-kafka and spring-reactive-kafka:', (100-(stdSpringReactiveKafka*100)/stdSpringKafka))

Request time median drop comparisson between spring-kafka and spring-reactive-kafka: 94.87179487179488
Request time standard deviation drop comparisson between spring-kafka and spring-reactive-kafka: 28.87871646917401


In [256]:
mask = (df['difference'] == 0)
newdf = df.loc[mask]
newdf = newdf.sort_values(by=['animal', 'date'], ascending=True)
newdf = newdf.groupby("animal").agg(["first", "last"]).stack().reset_index();
newdf = newdf.sort_values(by=['animal', 'date'], ascending=True)

newdf.loc[newdf['level_1'] == "first", "time-diff-s"] = 0;
newdf["time-diff-s"] = newdf['date'].diff().astype("timedelta64[s]")
newdf.loc[newdf['level_1'] == "first", "time-diff-s"] = 0

mask = (newdf['time-diff-s'] != 0)
medianDifferenceInMessageReceiveTime = newdf.loc[mask]['time-diff-s'].median()
stdDifferenceInMessageReceiveTime = newdf.loc[mask]['time-diff-s'].std()

print('Median difference in message receive time =', medianDifferenceInMessageReceiveTime)
print('Standard deviation difference in message receive time =', stdDifferenceInMessageReceiveTime)

Median difference in message receive time = 406.0
Standard deviation difference in message receive time = 233.44410318031112


In [161]:
df.to_csv("test.csv", sep=",", encoding="utf-8");

In [None]:
fig, ax = plt.subplots(figsize=(50,30))
ax.set_title('Threads', {'fontsize': 70})
df['thread'].value_counts().sort_index(ascending=True).plot(kind='bar', stacked=True)
plt.xlabel('Tempo de resposta(s)', fontsize=50)
plt.ylabel('Total de requisições', fontsize=50)
plt.xticks(fontsize=30)
plt.yticks(fontsize=30)
plt.legend(loc=2, prop={'size': 30})
ax.grid(True);