Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion bin/produce_report.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,5 @@
python -m openshift_metrics.merge /data/*.json \
--invoice-file /tmp/invoice.csv \
--pod-report-file /tmp/pod-report.csv \
--upload-to-s3
--upload-to-s3 \
--use-nerc-rates
38 changes: 33 additions & 5 deletions openshift_metrics/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,10 @@
from datetime import datetime, UTC
import json
from typing import Tuple
from decimal import Decimal
import nerc_rates

from openshift_metrics import utils
from openshift_metrics import utils, invoice
from openshift_metrics.metrics_processor import MetricsProcessor

def compare_dates(date_str1, date_str2):
Expand Down Expand Up @@ -53,6 +55,15 @@ def main():
nargs="*",
help="List of timestamp ranges in UTC to ignore in the format 'YYYY-MM-DDTHH:MM:SS,YYYY-MM-DDTHH:MM:SS'"
)
parser.add_argument(
"--use-nerc-rates",
action="store_true",
help="Use rates from the nerc-rates repo",
)
parser.add_argument("--rate-cpu-su", type=Decimal)
parser.add_argument("--rate-gpu-v100-su", type=Decimal)
parser.add_argument("--rate-gpu-a100sxm4-su", type=Decimal)
parser.add_argument("--rate-gpu-a100-su", type=Decimal)
Comment on lines +63 to +66
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The way things are currently structured, it's legal to run with, say, --use-nerc-rates --rate-cpu-su=1, even though the second argument would be ignored. This would be a good use case for a mutually exclusive group. Alternately, should it be possible to override individual rates from nerc-rates with a command line options?

Since none of the rate options have defaults, they will be 0 if unspecified. Practically, that means that when not using --use-nerc-rates, all the rate options need to be set explicitly. Should we raise an error if only some of them are set explicitly?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@larsks right, I did look into using mutually exclusive group but couldn't get it to work with my use case. I'll take another look at it.

Since none of the rate options have defaults, they will be 0 if unspecified.

I tested this behavior and it's actually NoneType if unspecified and then when I cast it to Decimal it'll throw an error.

  File "/Users/naved/work/openshift-usage-scripts/openshift_metrics/merge.py", line 116, in main
    gpu_a100=Decimal(args.rate_gpu_a100_su),
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
TypeError: conversion from NoneType to Decimal is not supported

So it raises an ugly error if not all of the rates are set but it doesn't silently set it 0. And now that I think about it, I actually don't need to manually cast it as Decimal since the type is already specified. So, if one of the rates is not specified the program will fail at a later stage when it tries to use this NoneType rate (which would be ugly).

I did think of these scenarios when I was creating this PR and I had a function to manually validate all these conditions but I felt it's getting too complicated for a tool that will mostly be run automatically. But I'd be happy to add the validation back. I am going to first see if I can use the mutually exclusive groups, if not I'll had some custom argument validation for these.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I felt it's getting too complicated for a tool that will mostly be run automatically

Yeah, there's a reason I marked the PR as "approved" :).

I did look into using mutually exclusive group but couldn't get it to work with my use case

On second thought, it might be tricky to make this behave the way I was thinking.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, there's a reason I marked the PR as "approved" :).

In that case, I am going to go ahead and merge this.


args = parser.parse_args()
files = args.files
Expand Down Expand Up @@ -91,6 +102,22 @@ def main():

report_month = datetime.strftime(report_start_date, "%Y-%m")

if args.use_nerc_rates:
nerc_data = nerc_rates.load_from_url()
rates = invoice.Rates(
cpu=Decimal(nerc_data.get_value_at("CPU SU Rate", report_month)),
gpu_a100=Decimal(nerc_data.get_value_at("GPUA100 SU Rate", report_month)),
gpu_a100sxm4=Decimal(nerc_data.get_value_at("GPUA100SXM4 SU Rate", report_month)),
gpu_v100=Decimal(nerc_data.get_value_at("GPUV100 SU Rate", report_month)),
)
else:
rates = invoice.Rates(
cpu=Decimal(args.rate_cpu_su),
gpu_a100=Decimal(args.rate_gpu_a100_su),
gpu_a100sxm4=Decimal(args.rate_gpu_a100sxm4_su),
gpu_v100=Decimal(args.rate_gpu_v100_su)
)

if args.invoice_file:
invoice_file = args.invoice_file
else:
Expand All @@ -109,10 +136,11 @@ def main():
["cpu_request", "memory_request", "gpu_request", "gpu_type"]
)
utils.write_metrics_by_namespace(
condensed_metrics_dict,
invoice_file,
report_month,
ignore_hours,
condensed_metrics_dict=condensed_metrics_dict,
file_name=invoice_file,
report_month=report_month,
rates=rates,
ignore_hours=ignore_hours,
)
utils.write_metrics_by_pod(condensed_metrics_dict, pod_report_file, ignore_hours)

Expand Down
31 changes: 28 additions & 3 deletions openshift_metrics/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,19 @@
from requests.exceptions import ConnectionError
import tempfile
from unittest import TestCase, mock
from decimal import Decimal

from openshift_metrics import utils, invoice
import os
from datetime import datetime, UTC

RATES = invoice.Rates(
cpu = Decimal("0.013"),
gpu_a100sxm4 = Decimal("2.078"),
gpu_a100 = Decimal("1.803"),
gpu_v100 = Decimal("1.214")
)

class TestGetNamespaceAnnotations(TestCase):

@mock.patch('openshift_metrics.utils.requests.post')
Expand Down Expand Up @@ -241,7 +249,12 @@ def test_write_metrics_log(self, mock_gna):
"2023-01,namespace2,namespace2,PI2,,,,,48,OpenShift GPUA100SXM4,2.078,99.74\n")

with tempfile.NamedTemporaryFile(mode="w+") as tmp:
utils.write_metrics_by_namespace(test_metrics_dict, tmp.name, "2023-01")
utils.write_metrics_by_namespace(
condensed_metrics_dict=test_metrics_dict,
file_name=tmp.name,
report_month="2023-01",
rates=RATES
)
self.assertEqual(tmp.read(), expected_output)


Expand Down Expand Up @@ -286,14 +299,22 @@ def test_write_metrics_by_namespace_decimal(self, mock_gna):
"2023-01,namespace1,namespace1,PI1,,,,76,35,OpenShift CPU,0.013,0.46\n")

with tempfile.NamedTemporaryFile(mode="w+") as tmp:
utils.write_metrics_by_namespace(test_metrics_dict, tmp.name, "2023-01")
utils.write_metrics_by_namespace(
condensed_metrics_dict=test_metrics_dict,
file_name=tmp.name,
report_month="2023-01",
rates=RATES
)
self.assertEqual(tmp.read(), expected_output)


class TestWriteMetricsWithIgnoreHours(TestCase):
def setUp(self):
"""Creates a test dictionary with condensed data that can be used to test WriteMetricsByPod and WriteMetricsByNamespace"""
start_dt = int(datetime.fromisoformat("2024-04-10T11:00:00Z").timestamp())



self.ignore_times = [
(
datetime(2024, 4, 9, 11, 0, 0, tzinfo=UTC),
Expand Down Expand Up @@ -371,7 +392,11 @@ def test_write_metrics_by_namespace_with_ignore_hours(self, mock_gna):

with tempfile.NamedTemporaryFile(mode="w+") as tmp:
utils.write_metrics_by_namespace(
self.test_metrics_dict, tmp.name, "2023-01", self.ignore_times
condensed_metrics_dict=self.test_metrics_dict,
file_name=tmp.name,
report_month="2023-01",
rates=RATES,
ignore_hours=self.ignore_times
)
self.assertEqual(tmp.read(), expected_output)

Expand Down
10 changes: 1 addition & 9 deletions openshift_metrics/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ def csv_writer(rows, file_name):
csvwriter.writerows(rows)


def write_metrics_by_namespace(condensed_metrics_dict, file_name, report_month, ignore_hours=None):
def write_metrics_by_namespace(condensed_metrics_dict, file_name, report_month, rates, ignore_hours=None):
"""
Process metrics dictionary to aggregate usage by namespace and then write that to a file
"""
Expand All @@ -134,14 +134,6 @@ def write_metrics_by_namespace(condensed_metrics_dict, file_name, report_month,

rows.append(headers)

# TODO: the caller will pass in the rates as an argument
rates = invoice.Rates(
cpu = Decimal("0.013"),
gpu_a100 = Decimal("1.803"),
gpu_a100sxm4 = Decimal("2.078"),
gpu_v100 = Decimal("1.214")
)

for namespace, pods in condensed_metrics_dict.items():
namespace_annotation_dict = namespace_annotations.get(namespace, {})
cf_pi = namespace_annotation_dict.get("cf_pi")
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
requests>=2.18.4
boto3>=1.34.40
https://github.com/CCI-MOC/nerc-rates/archive/main.zip
Loading