Skip to content

Commit

Permalink
Merge pull request elastic#2 from HiDAl/palcantar/add-index-summary
Browse files Browse the repository at this point in the history
  • Loading branch information
joegallo committed Jan 13, 2023
2 parents 05c9d60 + 181ce3a commit 5864aff
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 21 deletions.
72 changes: 51 additions & 21 deletions ingest_report.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
#!/usr/bin/env python3

import click
import json
import math
import numpy as np
Expand All @@ -23,7 +25,7 @@ def _ingests(data):
def _processors(pipeline):
return [next(iter(processor.keys())) for processor in pipeline["processors"]]

def pipelines(data):
def pipelines_summary(data):
ingests = _ingests(data)

pipelines = set()
Expand Down Expand Up @@ -52,19 +54,35 @@ def pipelines(data):

def total(data):
ingests = _ingests(data)

arr = np.zeros((1, 2), dtype=np.int64)

t_count = 0
t_time_in_millis = 0
t_ingest_time_in_millis = 0
for ingest in ingests:
t_count += ingest["total"]["count"]
t_time_in_millis += ingest["total"]["time_in_millis"]
t_ingest_time_in_millis += ingest["total"]["time_in_millis"]

t_index_time_in_millis = 0
nodes = data["nodes"]
for node in nodes:
if "indices" not in nodes[node]:
break

t_index_time_in_millis = t_index_time_in_millis + nodes[node]["indices"]["indexing"]["index_time_in_millis"]

arr[0, 0] = t_count
arr[0, 1] = t_time_in_millis
arr[0, 1] = t_ingest_time_in_millis

df = pd.DataFrame(arr, index=["total"], columns=["count", "time_in_millis"])
df["time_in_nanos"] = ((df["time_in_millis"] * 1000000) / (df["count"] + 1)).apply(np.ceil).astype(np.int64)

if t_index_time_in_millis == 0:
df["ingest_time_%"] = "N/A"
else:
df["ingest_time_%"] = 100 * t_ingest_time_in_millis / (t_ingest_time_in_millis + t_index_time_in_millis)
df["index_time_in_millis"] = t_index_time_in_millis

return df

def processor(data, pipeline):
Expand Down Expand Up @@ -94,29 +112,51 @@ def processor(data, pipeline):
df["percent"] = (df["time_in_millis"] * 100 / df["time_in_millis"].sum()).astype(np.float32)
return df

def main(diagnostic):
def validate(diagnostic_directory):
version = os.path.join(diagnostic_directory, "version.json")
if not os.path.exists(version):
print("Missing the file `version.json` in the directory")
sys.exit(1)

nodes_stats = os.path.join(diagnostic_directory, "nodes_stats.json")
if not os.path.exists(nodes_stats):
print("Missing the file `nodes_stats.json` in the directory")
sys.exit(1)

@click.command()
@click.argument("diagnostic_directory")
@click.option('--full', is_flag=True, default=False)
def command(full, diagnostic_directory):
validate(diagnostic_directory)

pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', None)
pd.set_option('display.float_format', '{:,.1f}%'.format)

with open(os.path.join(diagnostic, "nodes_stats.json")) as f:
with open(os.path.join(diagnostic_directory, "nodes_stats.json")) as f:
data = json.load(f)

p = pipelines(data)
p = pipelines_summary(data)
p = p.sort_values("time_in_millis", ascending=False)
pt = p.sum().drop(["time_in_nanos","percent"])
t = total(data)

print(title("Ingest Summary:"))
print(title("Ingest & Index Summary:"))
print(t)
print()

print(title("Pipeline Summary:"))
print(p.head(5))
if full:
print(p)
else:
print(p.head(5))

print("{0:.0%}".format((pt / t.loc["total"])["time_in_millis"]))
print()

for pipeline in p.index[0:5]:
pipelines = sorted(p.index) if full else p.index[0:5]

for pipeline in pipelines:
pr = processor(data, pipeline)
pr = pr.sort_values("time_in_millis", ascending=False)
prt = pr.sum().drop(["index","time_in_nanos","percent"])
Expand All @@ -128,14 +168,4 @@ def main(diagnostic):


if __name__ == "__main__":
if len(sys.argv) == 1:
print("Run as ingest_report.py <path to some diagnostic>")
sys.exit(1)

diagnostic = sys.argv[1]
version = os.path.join(diagnostic, "version.json")
if not os.path.exists(version):
print("Run as ingest_report.py <path to some diagnostic>")
sys.exit(1)

main(diagnostic)
command()
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
click==8.1.3
numpy==1.23.4
pandas==1.5.1
Pygments==2.13.0
Expand Down

0 comments on commit 5864aff

Please sign in to comment.