
# Testing ML

This notebook contains the free form code for the [testing ML lesson](https://madewithml.com/courses/mlops/testing/) where we learn how to test code, data and models in order to build reliable ML systems.

<div align="left">
<a target="_blank" href="https://madewithml.com/courses/mlops/testing/"><img src="https://img.shields.io/badge/📖 Read-lesson-9cf"></a>&nbsp;
<a href="https://github.com/GokuMohandas/testing-ml/blob/main/testing.ipynb" role="button"><img src="https://img.shields.io/static/v1?label=&amp;message=View%20On%20GitHub&amp;color=586069&amp;logo=github&amp;labelColor=2f363d"></a>&nbsp;
<a href="https://colab.research.google.com/github/GokuMohandas/testing-ml/blob/main/testing.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"></a>
</div>

# Great Expectations

In [None]:
!pip install great-expectations==0.15.7 -q

In [None]:
import great_expectations as ge
import json
import pandas as pd
from urllib.request import urlopen

In [None]:
# Load projects
url = "https://raw.githubusercontent.com/GokuMohandas/Made-With-ML/main/datasets/projects.json"
projects = json.loads(urlopen(url).read())
df = ge.dataset.PandasDataset(projects)
print (f"{len(df)} projects")
df.head(5)

955 projects


Unnamed: 0,id,created_on,title,description,tag
0,6,2020-02-20 06:43:18,Comparison between YOLO and RCNN on real world...,Bringing theory to experiment is cool. We can ...,computer-vision
1,7,2020-02-20 06:47:21,"Show, Infer & Tell: Contextual Inference for C...",The beauty of the work lies in the way it arch...,computer-vision
2,9,2020-02-24 16:24:45,Awesome Graph Classification,"A collection of important graph embedding, cla...",graph-learning
3,15,2020-02-28 23:55:26,Awesome Monte Carlo Tree Search,A curated list of Monte Carlo tree search pape...,reinforcement-learning
4,19,2020-03-03 13:54:31,Diffusion to Vector,Reference implementation of Diffusion2Vec (Com...,graph-learning


### Table Expectation(s)

In [None]:
df.expect_table_columns_to_match_ordered_list(
    column_list=["id", "created_on", "title", "description", "tag"]
)

{
  "success": true,
  "expectation_config": {
    "meta": {},
    "kwargs": {
      "column_list": [
        "id",
        "created_on",
        "title",
        "description",
        "tag"
      ],
      "result_format": "BASIC"
    },
    "expectation_type": "expect_table_columns_to_match_ordered_list"
  },
  "meta": {},
  "result": {
    "observed_value": [
      "id",
      "created_on",
      "title",
      "description",
      "tag"
    ]
  },
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [None]:
df.expect_compound_columns_to_be_unique(column_list=["title", "description"])

{
  "success": true,
  "expectation_config": {
    "meta": {},
    "kwargs": {
      "column_list": [
        "title",
        "description"
      ],
      "result_format": "BASIC"
    },
    "expectation_type": "expect_compound_columns_to_be_unique"
  },
  "meta": {},
  "result": {
    "element_count": 955,
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0,
    "partial_unexpected_list": []
  },
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

### Column Expectation(s)

`id`

In [None]:
df.expect_column_values_to_be_unique(column="id")

{
  "success": true,
  "expectation_config": {
    "meta": {},
    "kwargs": {
      "column": "id",
      "result_format": "BASIC"
    },
    "expectation_type": "expect_column_values_to_be_unique"
  },
  "meta": {},
  "result": {
    "element_count": 955,
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0,
    "partial_unexpected_list": []
  },
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

`created_on`

In [None]:
df.expect_column_values_to_not_be_null(column="created_on")

{
  "success": true,
  "expectation_config": {
    "meta": {},
    "kwargs": {
      "column": "created_on",
      "result_format": "BASIC"
    },
    "expectation_type": "expect_column_values_to_not_be_null"
  },
  "meta": {},
  "result": {
    "element_count": 955,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "partial_unexpected_list": []
  },
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [None]:
df.expect_column_values_to_match_strftime_format(
    column="created_on", strftime_format="%Y-%m-%d %H:%M:%S"
)

{
  "success": true,
  "expectation_config": {
    "meta": {},
    "kwargs": {
      "column": "created_on",
      "strftime_format": "%Y-%m-%d %H:%M:%S",
      "result_format": "BASIC"
    },
    "expectation_type": "expect_column_values_to_match_strftime_format"
  },
  "meta": {},
  "result": {
    "element_count": 955,
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0,
    "partial_unexpected_list": []
  },
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

`title`

In [None]:
df.expect_column_values_to_not_be_null(column="title")

{
  "success": true,
  "expectation_config": {
    "meta": {},
    "kwargs": {
      "column": "title",
      "result_format": "BASIC"
    },
    "expectation_type": "expect_column_values_to_not_be_null"
  },
  "meta": {},
  "result": {
    "element_count": 955,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "partial_unexpected_list": []
  },
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [None]:
df.expect_column_values_to_be_of_type(column="title", type_="str")

{
  "success": true,
  "expectation_config": {
    "meta": {},
    "kwargs": {
      "column": "title",
      "type_": "str",
      "result_format": "BASIC"
    },
    "expectation_type": "_expect_column_values_to_be_of_type__map"
  },
  "meta": {},
  "result": {
    "element_count": 955,
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0,
    "partial_unexpected_list": []
  },
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

`description`

In [None]:
df.expect_column_values_to_not_be_null(column="description")

{
  "success": true,
  "expectation_config": {
    "meta": {},
    "kwargs": {
      "column": "description",
      "result_format": "BASIC"
    },
    "expectation_type": "expect_column_values_to_not_be_null"
  },
  "meta": {},
  "result": {
    "element_count": 955,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "partial_unexpected_list": []
  },
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [None]:
df.expect_column_values_to_be_of_type(column="description", type_="str")

{
  "success": true,
  "expectation_config": {
    "meta": {},
    "kwargs": {
      "column": "description",
      "type_": "str",
      "result_format": "BASIC"
    },
    "expectation_type": "_expect_column_values_to_be_of_type__map"
  },
  "meta": {},
  "result": {
    "element_count": 955,
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0,
    "partial_unexpected_list": []
  },
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

`tag`

In [None]:
df.expect_column_values_to_not_be_null(column="tag")

{
  "success": true,
  "expectation_config": {
    "meta": {},
    "kwargs": {
      "column": "tag",
      "result_format": "BASIC"
    },
    "expectation_type": "expect_column_values_to_not_be_null"
  },
  "meta": {},
  "result": {
    "element_count": 955,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "partial_unexpected_list": []
  },
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [None]:
df.expect_column_values_to_be_of_type(column="tag", type_="str")

{
  "success": true,
  "expectation_config": {
    "meta": {},
    "kwargs": {
      "column": "tag",
      "type_": "str",
      "result_format": "BASIC"
    },
    "expectation_type": "_expect_column_values_to_be_of_type__map"
  },
  "meta": {},
  "result": {
    "element_count": 955,
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0,
    "partial_unexpected_list": []
  },
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

### Suite

In [None]:
# Expectation suite
expectation_suite = df.get_expectation_suite(discard_failed_expectations=False)
print(df.validate(expectation_suite=expectation_suite, only_return_failures=True))


{
  "success": true,
  "results": [],
  "evaluation_parameters": {},
  "statistics": {
    "evaluated_expectations": 11,
    "successful_expectations": 11,
    "unsuccessful_expectations": 0,
    "success_percent": 100.0
  },
  "meta": {
    "great_expectations_version": "0.15.7",
    "expectation_suite_name": "default",
    "run_id": {
      "run_name": null,
      "run_time": "2022-06-22T17:31:03.478180+00:00"
    },
    "batch_kwargs": {
      "ge_batch_id": "08cd311c-f251-11ec-88b6-0242ac1c0002"
    },
    "batch_markers": {},
    "batch_parameters": {},
    "validation_time": "20220622T173103.478092Z",
    "expectation_suite_meta": {
      "great_expectations_version": "0.15.7"
    }
  }
}
