Skip to content
18 changes: 2 additions & 16 deletions src/madengine/tools/run_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,9 +121,8 @@ def print_perf(self):
print(f"{self.model} performance is {self.performance} {self.metric}")

# Exports all info in json format to json_name
# multiple_results excludes the info provided on csv
# "model,performance,metric" additionally status
# to handle results more generically regardless of what is passed in
# multiple_results excludes the "model,performance,metric,status" keys
# to handle results more generically regardless of the multiple_results csv being passed in
def generate_json(self, json_name: str, multiple_results: bool = False) -> None:
"""Generate JSON file for performance results of a model.

Expand Down Expand Up @@ -1033,19 +1032,6 @@ def run_model(self, model_info: typing.Dict) -> bool:
if multiple_results:
run_details.performance = multiple_results

# check the file of multiple results, check the columns of 'model,performance,metric'
with open(multiple_results, 'r') as f:
header = f.readline().strip().split(',')
# if len(header) != 3:
# raise Exception("Header of multiple results file is not valid.")
for line in f:
row = line.strip().split(',')
# iterate through each column of row to check if it is empty or not
for col in row:
if col == '':
run_details.performance = None
print("Error: Performance metric is empty in multiple results file.")
break
else:
perf_regex = ".*performance:\\s*\\([+|-]\?[0-9]*[.]\\?[0-9]*\(e[+|-]\?[0-9]\+\)\?\\)\\s*.*\\s*"
run_details.performance = self.console.sh("cat " + log_file_path +
Expand Down
25 changes: 13 additions & 12 deletions src/madengine/tools/update_perf_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
"""

# build-in imports
import os
import json
import argparse
import typing
Expand Down Expand Up @@ -98,16 +97,15 @@ def handle_multiple_results(
Raises:
AssertionError: If the number of columns in the performance csv DataFrame is not equal to the length of the row.
"""
# Check that the multiple results CSV has three columns and has the following format:
# model, performance, metric
multiple_results_df = df_strip_columns(pd.read_csv(multiple_results))
multiple_results_header = multiple_results_df.columns.tolist()
# if (len(multiple_results_header) != 3):
# raise RuntimeError("Multiple Results CSV file must have three columns: model, performance, metric")

# Check that the multiple results CSV has the following required columns:
# model, performance, metric
headings = ['model', 'performance', 'metric']
Comment thread
Rohan138 marked this conversation as resolved.
for heading in headings:
if not(heading in multiple_results_header):
raise RuntimeError("Multiple Results CSV file is missing the " + heading + " column")
raise RuntimeError(multiple_results + " file is missing the " + heading + " column")

common_info_json = read_json(common_info)
flatten_tags(common_info_json)
Expand All @@ -116,21 +114,24 @@ def handle_multiple_results(
# add results to perf.csv
for r in multiple_results_df.to_dict(orient="records"):
row = common_info_json.copy()
row["model"] = model_name + "_" + str(r["model"])
row["performance"] = r["performance"]
row["metric"] = r["metric"]
model = r.pop("model")
row["model"] = model_name + "_" + str(model)
row.update(r)
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This allows us to add additional columns to the perf csv; this is really the major change from this PR, the other changes just support this e.g. reordering the columns in the final_multiple_results_df to put any additional columns at the end.


if r["performance"] is not None and pd.notna(r["performance"]):
if row["performance"] is not None and pd.notna(row["performance"]):
row["status"] = "SUCCESS"
else:
row["status"] = "FAILURE"

assert perf_csv_df.columns.size == len(row), f"Column count mismatch: CSV has {perf_csv_df.columns.size} columns but row has {len(row)} keys. CSV columns: {list(perf_csv_df.columns)}, Row keys: {list(row.keys())}"
final_multiple_results_df = pd.concat(
[final_multiple_results_df, pd.DataFrame(row, index=[0])], ignore_index=True
)
# Reorder columns according to existing perf csv
columns = perf_csv_df.columns.tolist()
# Add any additional columns to the end
columns = columns + [col for col in final_multiple_results_df.columns if col not in columns]
final_multiple_results_df = final_multiple_results_df[columns]

final_multiple_results_df = final_multiple_results_df[perf_csv_df.columns]
perf_entry_df_to_csv(final_multiple_results_df)
if perf_csv_df.empty:
perf_csv_df = final_multiple_results_df
Expand Down
10 changes: 5 additions & 5 deletions tests/fixtures/dummy/scripts/dummy/run_multi.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@
# All rights reserved.
#

echo "model,performance,latency,metric,temperature
1,$RANDOM,$RANDOM,samples_per_sec,$RANDOM
2,$RANDOM,$RANDOM,samples_per_sec,$RANDOM
3,$RANDOM,$RANDOM,samples_per_sec,$RANDOM
4,$RANDOM,$RANDOM,samples_per_sec,$RANDOM" >>perf_dummy.csv
echo "model,temperature,performance,metric
1,$RANDOM,$RANDOM,samples_per_sec
2,$RANDOM,$RANDOM,samples_per_sec
3,$RANDOM,$RANDOM,samples_per_sec
4,$RANDOM,$RANDOM,samples_per_sec" >>perf_dummy.csv

cp perf_dummy.csv ../
22 changes: 18 additions & 4 deletions tests/test_misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,12 +61,26 @@ def test_output_multi_results(self, global_data, clean_test_temp_files):
test output multiple results
"""
output = global_data['console'].sh("cd " + BASE_DIR + "; " + "MODEL_DIR=" + MODEL_DIR + " " + "python3 src/madengine/mad.py run --tags dummy_multi")
# Check if multiple results are written to perf_test.csv
# Check if multiple results are written to perf_dummy.csv
success = False
# Read the csv file to a dataframe using pandas
df = pd.read_csv(os.path.join(BASE_DIR, 'perf_dummy.csv'))
# Check the number of rows in the dataframe is 4, and columns is 5
if df.shape == (4, 5):
multi_df = pd.read_csv(os.path.join(BASE_DIR, 'perf_dummy.csv'))
# Check the number of rows in the dataframe is 4, and columns is 4
if multi_df.shape == (4, 4):
success = True
if not success:
pytest.fail("The generated multi results is not correct.")
# Check if multiple results from perf_dummy.csv get copied over to perf.csv
perf_df = pd.read_csv(os.path.join(BASE_DIR, 'perf.csv'))
# Get the corresponding rows and columns from perf.csv
perf_df = perf_df[multi_df.columns]
perf_df = perf_df.iloc[-4:, :]
# Drop model columns from both dataframes; these will not match
# if multiple results csv has {model}, then perf csv has {tag_name}_{model}
multi_df = multi_df.drop('model', axis=1)
perf_df = perf_df.drop('model', axis=1)
if all(perf_df.columns == multi_df.columns):
success = True
if not success:
pytest.fail("The columns of the generated multi results do not match perf.csv.")