255.36s - pydevd: Sending message related to process being replaced timed-out after 5 seconds


[31mERROR: Could not find a version that satisfies the requirement script (from versions: none)[0m[31m
[0m[31mERROR: No matching distribution found for script[0m[31m
[0m

In [9]:
import unittest
import mlflow
import os
import pandas as pd
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import pickle
import warnings


In [11]:

class TestModelLoading(unittest.TestCase):

    @classmethod
    def setUpClass(cls):
        """Set up MLflow tracking and load the latest model."""
        dagshub_token = os.getenv("DAGSHUB_PAT")
        if not dagshub_token:
            raise EnvironmentError("DAGSHUB_PAT environment variable is not set")

        os.environ["MLFLOW_TRACKING_USERNAME"] = dagshub_token
        os.environ["MLFLOW_TRACKING_PASSWORD"] = dagshub_token

        dagshub_url = "https://dagshub.com"
        repo_owner = "InsightfulSantosh"
        repo_name = "mlops-mini_project"
        mlflow.set_tracking_uri(f'{dagshub_url}/{repo_owner}/{repo_name}.mlflow')

        # Load the latest model version
        cls.new_model_name = "uma"  # Ensure this matches the registered model name
        cls.new_model_version = cls.get_latest_model_version(cls.new_model_name)

        if not cls.new_model_version:
            raise ValueError(f"No valid model version found for {cls.new_model_name}")

        cls.new_model_uri = f'models:/{cls.new_model_name}/{cls.new_model_version}'

        # Handle Python version mismatch warning gracefully
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", category=UserWarning)
            cls.new_model = mlflow.pyfunc.load_model(cls.new_model_uri)

        # Load vectorizer safely
        try:
            with open('models/vectorizer.pkl', 'rb') as f:
                cls.vectorizer = pickle.load(f)
        except Exception as e:
            raise FileNotFoundError(f"Error loading vectorizer: {e}")

        # Load test data safely
        try:
            cls.holdout_data = pd.read_csv('data/processed/test_bow.csv')
        except Exception as e:
            raise FileNotFoundError(f"Error loading test data: {e}")

    @staticmethod
    def get_latest_model_version(model_name):
        """Retrieve the latest model version from MLflow using search_model_versions()."""
        client = mlflow.MlflowClient()
        latest_versions = client.search_model_versions(f"name='{model_name}'")
        return latest_versions[0].version if latest_versions else None

    def test_model_loaded_properly(self):
        """Check if the model loaded correctly."""
        self.assertIsNotNone(self.new_model)

    def test_model_signature(self):
        """Test if the model signature matches expected input and output shapes."""
        input_text = "hi how are you"
        input_data = self.vectorizer.transform([input_text])
        input_df = pd.DataFrame(input_data.toarray(), columns=[str(i) for i in range(input_data.shape[1])])

        # Predict using the model
        prediction = self.new_model.predict(input_df)

        # Verify input shape
        self.assertEqual(input_df.shape[1], len(self.vectorizer.get_feature_names_out()))

        # Verify output shape (assuming binary classification with a single output)
        self.assertEqual(len(prediction), input_df.shape[0])
        self.assertEqual(len(prediction.shape), 1)

    def test_model_performance(self):
        """Evaluate model performance on holdout test data."""
        X_holdout = self.holdout_data.iloc[:, :-1]
        y_holdout = self.holdout_data.iloc[:, -1]

        # Predict using the model
        y_pred_new = self.new_model.predict(X_holdout)

        # Calculate performance metrics
        accuracy_new = accuracy_score(y_holdout, y_pred_new)
        precision_new = precision_score(y_holdout, y_pred_new)
        recall_new = recall_score(y_holdout, y_pred_new)
        f1_new = f1_score(y_holdout, y_pred_new)

        # Expected thresholds
        expected_thresholds = {
            "accuracy": 0.90,
            "precision": 0.40,
            "recall": 0.40,
            "f1_score": 0.40,
        }

        # Check if performance meets expectations
        self.assertGreaterEqual(accuracy_new, expected_thresholds["accuracy"], "Accuracy below threshold")
        self.assertGreaterEqual(precision_new, expected_thresholds["precision"], "Precision below threshold")
        self.assertGreaterEqual(recall_new, expected_thresholds["recall"], "Recall below threshold")
        self.assertGreaterEqual(f1_new, expected_thresholds["f1_score"], "F1 score below threshold")

if __name__ == "__main__":
    unittest.main()


usage: ipykernel_launcher.py [-h] [-v] [-q] [--locals] [--durations N] [-f]
                             [-c] [-b] [-k TESTNAMEPATTERNS]
                             [tests ...]
ipykernel_launcher.py: error: argument -f/--failfast: ignored explicit argument '/Users/santoshkumar/Library/Jupyter/runtime/kernel-v3a78b61811442cc7679b44e945043c84246eef01d.json'


AttributeError: 'tuple' object has no attribute 'tb_frame'

In [16]:
!python -m unittest test.ipynb


480.12s - pydevd: Sending message related to process being replaced timed-out after 5 seconds


E
ERROR: ipynb (unittest.loader._FailedTest.ipynb)
----------------------------------------------------------------------
ImportError: Failed to import test module: ipynb
Traceback (most recent call last):
  File "/opt/anaconda3/lib/python3.12/unittest/loader.py", line 137, in loadTestsFromName
    module = __import__(module_name)
             ^^^^^^^^^^^^^^^^^^^^^^^
ModuleNotFoundError: No module named 'test.ipynb'


----------------------------------------------------------------------
Ran 1 test in 0.000s

FAILED (errors=1)
