Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/ml flow integration #683

Merged
merged 15 commits into from
Aug 2, 2023
Merged
114 changes: 114 additions & 0 deletions langtest/langtest.py
Original file line number Diff line number Diff line change
Expand Up @@ -412,6 +412,7 @@ def report(
self,
format: str = "dataframe",
save_dir: str = None,
mlflow_tracking: bool = False,
) -> pd.DataFrame:
"""Generate a report of the test results.

Expand Down Expand Up @@ -488,6 +489,70 @@ def report(

self.df_report = df_report.fillna("-")

if mlflow_tracking:
try:
import mlflow
except ModuleNotFoundError:
print("mlflow package not found. Install mlflow first")

import datetime

experiment_name = (
self._actual_model
if isinstance(self._actual_model, str)
else self._actual_model.__class__.__module__
)

# Get the experiment
experiment = mlflow.get_experiment_by_name(experiment_name)

if experiment is None:
# The experiment does not exist, create it
experiment_id = mlflow.create_experiment(experiment_name)
else:
# The experiment exists, get its ID
experiment_id = experiment.experiment_id
ArshaanNazir marked this conversation as resolved.
Show resolved Hide resolved

current_datetime = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
mlflow.start_run(
run_name=self.task + "_testing_" + current_datetime,
experiment_id=experiment_id,
)

df_report.apply(
lambda row: mlflow.log_metric(
row["test_type"] + "_pass_rate",
float(row["pass_rate"].rstrip("%")) / 100,
),
axis=1,
)
df_report.apply(
lambda row: mlflow.log_metric(
row["test_type"] + "_min_pass_rate",
float(row["minimum_pass_rate"].rstrip("%")) / 100,
),
axis=1,
)
df_report.apply(
lambda row: mlflow.log_metric(
row["test_type"] + "_pass_status", 1 if row["pass"] else 0
),
axis=1,
)
df_report.apply(
lambda row: mlflow.log_metric(
row["test_type"] + "_pass_count", row["pass_count"]
),
axis=1,
)
df_report.apply(
lambda row: mlflow.log_metric(
row["test_type"] + "_fail_count", row["fail_count"]
),
axis=1,
)
mlflow.end_run()

if format == "dataframe":
return self.df_report
elif format == "dict":
Expand Down Expand Up @@ -562,6 +627,55 @@ def report(

df_report = df_report.reset_index(drop=True)
df_report = df_report.fillna("-")
if mlflow_tracking:
try:
import mlflow
except ModuleNotFoundError:
print("mlflow package not found. Install mlflow first")

import datetime

experiment_name = k

# Get the experiment
experiment = mlflow.get_experiment_by_name(experiment_name)

if experiment is None:
# The experiment does not exist, create it
experiment_id = mlflow.create_experiment(experiment_name)
else:
# The experiment exists, get its ID
experiment_id = experiment.experiment_id

current_datetime = datetime.datetime.now().strftime(
"%Y-%m-%d_%H-%M-%S"
)
mlflow.start_run(
run_name=self.task + "_testing_" + current_datetime,
experiment_id=experiment_id,
)

df_report.apply(
lambda row: mlflow.log_metric(
row["test_type"] + "_pass_rate",
float(row["pass_rate"].rstrip("%")) / 100,
),
axis=1,
)
df_report.apply(
lambda row: mlflow.log_metric(
row["test_type"] + "_min_pass_rate",
float(row["minimum_pass_rate"].rstrip("%")) / 100,
),
axis=1,
)
df_report.apply(
lambda row: mlflow.log_metric(
row["test_type"] + "_pass_status", 1 if row["pass"] else 0
),
axis=1,
)
mlflow.end_run()

df_final_report = pd.concat([df_final_report, df_report])

Expand Down
Loading