In [None]:
import os
import torch
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from torchinfo import summary
import time
import numpy as np

In [None]:
import joblib
import torch

model_path = "models/rf_model_100.pth"
model = joblib.load(model_path)  # ✅ joblib for sklearn model
print(model)                        # Show model structure
print(model.estimators_)           # Access individual regressors
print(model.get_params())          # Get hyperparameters

In [None]:
 import pandas as pd
import torch
from torch.utils.data import TensorDataset, DataLoader
import glob

# Use glob to match all CSV files in the evaluation directory
csv_files = sorted(glob.glob("object/nyc_taxi_split/eval/final_features_*.csv"))

# Load and concatenate all matched CSVs
dfs = [pd.read_csv(f) for f in csv_files]
test_df = pd.concat(dfs, ignore_index=True)

# Extract features and targets
target_cols = ["pickup_count", "dropoff_count"]
X = test_df.drop(columns=target_cols).values
y = test_df[target_cols].values

# Convert to tensors
X_tensor = torch.tensor(X, dtype=torch.float32)
y_tensor = torch.tensor(y, dtype=torch.float32)

# Wrap in dataset and loader
test_dataset = TensorDataset(X_tensor, y_tensor)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

In [None]:
model_size = os.path.getsize(model_path) 
print(f"Model Size on Disk: {model_size/ (1e6) :.2f} MB")

In [None]:
import torch
import numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

all_preds = []
all_labels = []

# No gradients needed
with torch.no_grad():
    for features, labels in test_loader:
        preds = model.predict(features.numpy())  # model is scikit-learn, so use .predict()
        all_preds.append(preds)
        all_labels.append(labels.numpy())

# Concatenate all batches
all_preds = np.vstack(all_preds)
all_labels = np.vstack(all_labels)

# MAE
mae = mean_absolute_error(all_labels, all_preds)

# RMSE (manual square root)
rmse = np.sqrt(mean_squared_error(all_labels, all_preds))

# R²
r2 = r2_score(all_labels, all_preds)

print(f"MAE: {mae:.2f}")
print(f"RMSE: {rmse:.2f}")
print(f"R² Score: {r2:.3f}")

In [None]:
print(f"MAE: {mae:.2f}")
print(f"RMSE: {rmse:.2f}")
print(f"R² Score: {r2:.3f}")

In [None]:
import time
import numpy as np

num_trials = 100  # Number of trials

# Get a single sample from the test data
single_sample, _ = next(iter(test_loader))
single_sample = single_sample[0].numpy().reshape(1, -1)  # Convert to 2D NumPy array

# Warm-up run
_ = model.predict(single_sample)

latencies = []
for _ in range(num_trials):
    start_time = time.time()
    _ = model.predict(single_sample)
    latencies.append(time.time() - start_time)

# Output latency statistics
print(f"Average latency: {np.mean(latencies) * 1000:.2f} ms")
print(f"Min latency: {np.min(latencies) * 1000:.2f} ms")
print(f"Max latency: {np.max(latencies) * 1000:.2f} ms")

In [None]:
import time
import numpy as np

batch_times = []
total_samples = 0

for batch in test_loader:
    X_batch, _ = batch
    X_np = X_batch.numpy()

    start_time = time.time()
    _ = model.predict(X_np)
    end_time = time.time()

    batch_times.append(end_time - start_time)
    total_samples += X_np.shape[0]

# Calculate throughput
batch_fps = total_samples / np.sum(batch_times)
print(f"Batch Throughput: {batch_fps:.2f} samples/sec")

In [None]:
print(f"Model Size on Disk: {model_size / 1e6:.2f} MB")
print(f"MAE: {mae:.2f}")
print(f"RMSE: {rmse:.2f}")
print(f"R² Score: {r2:.3f}")
print(f"Inference Latency (single sample, median): {np.percentile(latencies, 50) * 1000:.2f} ms")
print(f"Inference Latency (single sample, 95th percentile): {np.percentile(latencies, 95) * 1000:.2f} ms")
print(f"Inference Latency (single sample, 99th percentile): {np.percentile(latencies, 99) * 1000:.2f} ms")
print(f"Inference Throughput (single sample): {num_trials / np.sum(latencies):.2f} samples/sec")
print(f"Batch Throughput: {batch_fps:.2f} samples/sec")