In [2]:
import pandas as pd

url = "https://d37ci6vzurychx.cloudfront.net/trip-data/green_tripdata_2024-03.parquet"
df = pd.read_parquet(url)

df.shape


(57457, 20)

In [3]:
# Step 1: Imports for Evidently metrics
from evidently.report import Report
from evidently.metrics import ColumnQuantileMetric, DatasetMissingValuesMetric

# Split dataset into reference and current
reference_data = df.sample(frac=0.5, random_state=42)
current_data = df.drop(reference_data.index)

# Define Evidently metrics
metrics = [
    DatasetMissingValuesMetric(),  # track missing values for all columns
    ColumnQuantileMetric(column_name="fare_amount", quantile=0.5)  # median fare
]

# Create and run the report
report = Report(metrics=metrics)
report.run(reference_data=reference_data, current_data=current_data)

# Extract metrics programmatically
results = report.as_dict()

# Missing values for 'fare_amount'
missing_values_result = results["metrics"][0]["result"]["current"]
missing_values_fare_amount = missing_values_result.get("fare_amount", None)

# Median fare (0.5 quantile)
median_fare = results["metrics"][1]["result"]["current"]["value"]

print(f"Missing values in fare_amount: {missing_values_fare_amount}")
print(f"Median fare_amount (0.5 quantile): {median_fare}")


Missing values in fare_amount: None
Median fare_amount (0.5 quantile): 13.5
