In [1]:
import mlflow
from mlflow import MlflowClient
import mlflow.sklearn
import pandas as pd
from dotenv import load_dotenv
import os
import boto3
from io import StringIO
from datetime import datetime

load_dotenv()



True

In [2]:

mlflow.set_tracking_uri(os.getenv("MLFLOW_TRACKING_URI"))
EXPERIMENT_NAME = os.getenv("MLFLOW_EXPERIMENT_NAME")
mlflow.set_experiment(EXPERIMENT_NAME)

# get all runs
client = MlflowClient()
experiment = client.get_experiment_by_name(EXPERIMENT_NAME)

runs = client.search_runs(
    experiment_ids=[experiment.experiment_id],
    order_by=["metrics.f1_score DESC"],
    max_results=1
)

# Find best run
best_run = runs[0]
best_run_id = best_run.info.run_id
best_f1 = best_run.data.metrics.get("f1_score", None)
print(f"Best f1-score : {best_f1:.4f}")

# load the model
model_uri = f"runs:/{best_run_id}/fraud_pipeline"
model = mlflow.sklearn.load_model(model_uri)


# simulate real-time data by sampling from test file
df_raw = pd.read_csv("/Users/martinper/Downloads/fraudTest_real_time.csv")
new_data = df_raw.sample(1).reset_index()
expected_result = new_data.at[0, 'is_fraud']
new_data = new_data.drop(columns=['is_fraud'])

print(new_data)
prediction = model.predict(new_data)
print("Prediction:", prediction)
print("Expected:", expected_result)


AWS_BUCKET_NAME = os.getenv("AWS_BUCKET_NAME")
AWS_ARTIFACT_PATH = os.getenv("AWS_ARTIFACT_PATH")
AWS_DATA_SAVE_PATH = os.getenv("AWS_DATA_SAVE_PATH")

# save to s3
s3 = boto3.client("s3")
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
key = AWS_ARTIFACT_PATH + AWS_DATA_SAVE_PATH + f"{best_run_id}_{timestamp}.csv"

output_df = new_data.copy()
output_df["prediction"] = prediction

csv_buffer = StringIO()
output_df.to_csv(csv_buffer, index=False)
s3.put_object(Bucket=AWS_BUCKET_NAME, Key=key, Body=csv_buffer.getvalue())

print(f"Saved on S3 : s3://{AWS_BUCKET_NAME}/{key}")

Best f1-score : 0.5714
   index  Unnamed: 0 trans_date_trans_time           cc_num  \
0   1360      344381   2020-11-01 10:01:50  374497717543058   

                            merchant  category    amt  first   last gender  \
0  fraud_Bernier, Volkman and Hoeger  misc_net  13.64  Linda  Hurst      F   

   ...    zip      lat      long  city_pop                         job  \
0  ...  58579  47.1709 -100.7944      1190  Designer, ceramics/pottery   

          dob                         trans_num   unix_time  merch_lat  \
0  1948-06-30  f224fa0a687b95832d8d351a0e1664bd  1383300110  46.753429   

   merch_long  
0 -101.267053  

[1 rows x 23 columns]
Prediction: [0]
Expected: 0
Saved on S3 : s3://jedhaparis/fraud_detection_artifacts/input_output/cda0b4fe334241d48228d2d59ae00af2_20251017_192516.csv
