In [28]:
import matplotlib.pyplot as plt
import pandas as pd
import csv
import seaborn as sns
plt.style.use('fivethirtyeight')
import numpy as np
import datetime
import re
import time

In [18]:
task_start_pattern = r"\[(.*)\] Started task: (.*) on worker: (.*)"
task_end_pattern = r"\[(.*)\] Completed task: (.*) on worker: (.*)"

job_start_pattern = r"\[(.*)\] Started job: (.*)"
job_end_pattern = r"\[(.*)\] Completed job: (.*)"

filenames = ["log_file_LL.txt", "log_file_RR.txt", "log_file_RANDOM.txt"]

In [29]:
for filename in filenames:

    jobs = dict()
    tasks = dict()
    task_workers = dict()
    num_of_tasks = dict()

    with open(filename, "r") as f:
        lines = f.readlines()
        lines.sort()
        for line in lines:
            # Process job start times
            job_start = re.match(job_start_pattern, line)
            if job_start:
                timestamp, job_id = job_start.groups()
                jobs[job_id] = timestamp

            # Process job end times and compute duration
            job_end = re.match(job_end_pattern, line)
            if job_end:
                end, job_id = job_end.groups()
                start = jobs[job_id]

                startTime = datetime.datetime.strptime(start, "%Y-%m-%d %H:%M:%S.%f")

                endTime = datetime.datetime.strptime(end, "%Y-%m-%d %H:%M:%S.%f")

                duration = endTime - startTime

                elapsed = (duration.days * 86400) + (duration.seconds * 1) + float(duration.microseconds / 1000)/1000

                jobs[job_id] = float(elapsed)

            # Process task start times
            task_start = re.match(task_start_pattern, line)
            if task_start:
                timestamp, task_id, worker_id = task_start.groups()
                tasks[task_id] = timestamp
                if worker_id in task_workers:
                    task_workers[worker_id].append(task_id)
                    t = datetime.datetime.strptime(timestamp, "%Y-%m-%d %H:%M:%S.%f")
                    t = time.mktime(t.timetuple())
                    num_of_tasks[worker_id].append((t, len(task_workers[worker_id])))
                else:
                    task_workers[worker_id] = [task_id]
                    t = datetime.datetime.strptime(timestamp, "%Y-%m-%d %H:%M:%S.%f")
                    t = time.mktime(t.timetuple())
                    num_of_tasks[worker_id] = [(t, len(task_workers[worker_id]))]



            # Process task end times and compute duration
            task_end = re.match(task_end_pattern, line)
            if task_end:
                end, task_id, worker_id = task_end.groups()
                start = tasks[task_id]

                startTime = datetime.datetime.strptime(start, "%Y-%m-%d %H:%M:%S.%f")
                endTime = datetime.datetime.strptime(end, "%Y-%m-%d %H:%M:%S.%f")

                duration = endTime - startTime

                elapsed = (duration.days * 86400) + (duration.seconds * 1) + float(duration.microseconds / 1000)/1000

                tasks[task_id] = float(elapsed)
                
                task_workers[worker_id].remove(task_id)
                t = datetime.datetime.strptime(timestamp, "%Y-%m-%d %H:%M:%S.%f")
                t = time.mktime(t.timetuple())
                num_of_tasks[worker_id].append((t, len(task_workers[worker_id])))


    jobs_df = pd.DataFrame(jobs.values(), columns=["jobs"])
    tasks_df = pd.DataFrame(tasks.values(), columns=["tasks"])

    mean_jobs = jobs_df["jobs"].mean()
    mean_tasks = tasks_df["tasks"].mean()

    median_jobs = jobs_df["jobs"].median()
    median_tasks = tasks_df["tasks"].median()
    
    algo = filename.split("_")[-1]
    algo = algo.split(".")[0]
    print("Algorithm = {algo}".format(algo=algo))
    print("Job: Mean = {mean}, Median = {median}".format(mean=mean_jobs, median=median_jobs))
    print("Task: Mean = {mean}, Median = {median}\n".format(mean=mean_tasks, median=median_tasks))
    
#     for key in num_of_tasks:
#         print(key, num_of_tasks[key])
#     print("\n")

Algorithm = LL
Job: Mean = 9.01924, Median = 9.023
Task: Mean = 3.67010333333, Median = 4.003

Algorithm = RR
Job: Mean = 9.0745, Median = 9.014
Task: Mean = 3.66972333333, Median = 4.003

Algorithm = RANDOM
Job: Mean = 9.03162, Median = 9.0205
Task: Mean = 3.67042666667, Median = 4.003



In [31]:
# Graph for worker, Y-axis num tasks, X-axis is timestamp
time_range = dict()
tasks_time_range = dict()
for worker_id in num_of_tasks:
    worker = num_of_tasks[worker_id]
    time_range[worker_id] = [worker[i+1][0] - worker[i][0] for i in range(len(worker) - 1)]
    tasks_time_range[worker_id] = [worker[i][1] for i in range(len(worker) - 1)]

In [32]:
print(time_range)

{'1': [0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0