In [29]:
from hashlib import sha256
from base64 import b85encode
import pandas as pd
from PIL import Image, ImageDraw, ImageOps

In [3]:
d1 = pd.read_excel("QCdata_1.xlsx")
d2 = pd.read_excel("QCdata_2.xlsx")
d3 = pd.read_excel("QCdata_3.xlsx")
d4 = pd.read_excel("QCdata_4.xlsx")
d5 = pd.read_excel("QCdata_5.xlsx")
d6 = pd.read_excel("QCdata_6.xlsx")
df = pd.concat([d1, d2, d3, d4, d5, d6], ignore_index=True)

In [4]:
df = df.drop(columns=[
  "FAULT.CREATION.DATE", # empty
  "AUTOMATION.LEVEL", # empty
  "DETAILED.AUTOMATION.LEVEL", # almost empty
  "DOMAIN", # no variance
  "PROJECT", # no variance
  "FAULT.REPORT.NB", # equivalent to "TEST.STATUS"
  "TEST.AUTOMATION.LEVEL", # useless
  "AUTOMATION.LEVEL.FINAL", # useless
  "FAULT.REPORT.ID", # broken
])

In [5]:
df = df.drop_duplicates()
df = df.drop(columns=["TEST.RUN.ID"])

In [6]:
df = df[df["TEST.OBJECT"] != "Benchmark"].drop(columns=["TEST.OBJECT"])

In [7]:
df["TEST.STATUS"] = df["TEST.STATUS"].map({"Passed": "PASS", "Failed": "FAIL"}).astype("category")

In [8]:
df = df[df["TEST.ENTITY"] != "Manual"]
df = df[df["TEST.ENTITY"].isna() == False]


In [9]:
df["PROGRAM.PHASE"] = df["PROGRAM.PHASE"].fillna("NONE")
df["RELEASE"] = df["RELEASE"].fillna("NONE")
df["ORGANIZATION"] = df["ORGANIZATION"].fillna("NONE")

In [10]:
def get_test_run_id(row):
    date = row["EXECUTION.DATE"].strftime("%Y-%m-%d")
    fields = "$".join([date, *[row[c] for c in ["PROGRAM.PHASE", "RELEASE", "TEST.ENTITY", "ORGANIZATION"]]])
    hash = b85encode(sha256(fields.encode()).digest()).decode()
    return f"{date}${hash}"

df["run_id"] = df.apply(get_test_run_id, axis=1).astype("string")
df = df.drop(columns=["EXECUTION.DATE", "PROGRAM.PHASE", "RELEASE", "TEST.ENTITY", "ORGANIZATION"])
df = df.rename(columns={"TEST.STATUS": "status", "TEST.INSTANCE.ID": "case_id"})
df = df[['run_id', 'case_id', 'status']]
df = df.reset_index(drop=True)

In [11]:
# df.to_csv("output.csv", index=False)

In [32]:
groups = df.groupby("run_id")["case_id"].count()
img = Image.new("RGB", (1000, groups.max() * 10), "white")
draw = ImageDraw.Draw(img)
m = 0
for i in range(1000):
    row = groups.iloc[groups.shape[0] * i // 1000]
    draw.line((i, 0, i, row * 10), fill="red")
    m = max(m, row * 10)
img = img.crop((0, 0, 1000, m))
img = ImageOps.flip(img)
img.save("nokia.png")


In [31]:
tidy = pd.read_csv("../../tcp-framework/datasets/LittleProxy.csv")
groups = tidy.groupby("travisJobId")["testName"].count()
img = Image.new("RGB", (1000, groups.max() * 10), "white")
draw = ImageDraw.Draw(img)
m = 0
for i in range(1000):
    row = groups.iloc[groups.shape[0] * i // 1000]
    draw.line((i, 0, i, row * 10), fill="red")
    m = max(m, row * 10)
img = img.crop((0, 0, 1000, m))
img = ImageOps.flip(img)
img.save("travis.png")