## Informal Interfaces

### Introduction

In [1]:
# Example of an *informal* interface

# Interface:
class InformalParserInterface:
    def load_data_source(self, path: str, file_name: str) -> str:
        """Load in the file for extracting text."""
        ...

    def extract_text(self, full_file_name: str) -> dict:
        """Extract text from the currently loaded file."""
        ...


# Concrete implementations:
class PdfParser(InformalParserInterface):
    """Extract text from a PDF"""

    def load_data_source(self, path: str, file_name: str) -> str:
        """Overrides InformalParserInterface.load_data_source()"""
        return "PdfParser.load_data_source"

    def extract_text(self, full_file_path: str) -> dict:
        """Overrides InformalParserInterface.extract_text()"""
        return {"PdfParser.extract_text": 0}


class EmlParser(InformalParserInterface):
    """Extract text from an email"""

    def load_data_source(self, path: str, file_name: str) -> str:
        """Overrides InformalParserInterface.load_data_source()"""
        return "EmlParser.load_data_source"

    def extract_text_from_email(self, full_file_path: str) -> dict:
        """A method defined only in EmlParser.
        Does not override InformalParserInterface.extract_text()
        """
        return {"EmlParser.extract_text_from_email": 0}

    # NOTE: doesn't implement `extract_text`, violates interface!

In [2]:
print(issubclass(PdfParser, InformalParserInterface))
print(issubclass(EmlParser, InformalParserInterface))

# Both true - this is a problem as EmlParser violates it.
# Hence informal interfaces are usually a bad idea.

True
True


In [3]:
print(PdfParser.__mro__)
print(EmlParser.__mro__)

(<class '__main__.PdfParser'>, <class '__main__.InformalParserInterface'>, <class 'object'>)
(<class '__main__.EmlParser'>, <class '__main__.InformalParserInterface'>, <class 'object'>)


### Using Metaclasses

* Helps understand the idea behind ABC

In [4]:
# Metaclass that defines __instancecheck__ and __subclasscheck__.
class ParserMeta(type):
    """A Parser metaclass that will be used for parser class creation."""

    def __instancecheck__(cls, instance):
        return cls.__subclasscheck__(type(instance))

    def __subclasscheck__(cls, subclass):
        return (
            hasattr(subclass, "load_data_source")
            and callable(subclass.load_data_source)
            and hasattr(subclass, "extract_text")
            and callable(subclass.extract_text)
        )


# Interface class that uses our metaclass.
class UpdatedInformalParserInterface(metaclass=ParserMeta):
    """This interface is used for concrete classes to inherit from.
    There is no need to define the ParserMeta methods as any class
    as they are implicitly made available via .__subclasscheck__().
    """

    pass


# Concrete implementations:
class PdfParserNew:
    """Extract text from a PDF."""

    def load_data_source(self, path: str, file_name: str) -> str:
        """Overrides UpdatedInformalParserInterface.load_data_source()"""
        return "PdfParserNew.load_data_source"

    def extract_text(self, full_file_path: str) -> dict:
        """Overrides UpdatedInformalParserInterface.extract_text()"""
        return {"PdfParserNew.extract_text": 0}


class EmlParserNew:
    """Extract text from an email."""

    def load_data_source(self, path: str, file_name: str) -> str:
        """Overrides UpdatedInformalParserInterface.load_data_source()"""
        return "EmlParserNew.load_data_source"

    def extract_text_from_email(self, full_file_path: str) -> dict:
        """A method defined only in EmlParser.
        Does not override UpdatedInformalParserInterface.extract_text()
        """
        return {"EmlParserNew.extract_text_from_email": 0}

By using a metaclass, you **don’t need to explicitly define the subclasses**.

Instead, the subclass must define the required methods. If it doesn’t, then issubclass(EmlParserNew, UpdatedInformalParserInterface) will return False

In [5]:
print(issubclass(PdfParserNew, UpdatedInformalParserInterface))
print(issubclass(EmlParserNew, UpdatedInformalParserInterface))

True


False

In [6]:
print(PdfParserNew.__mro__)
print(EmlParserNew.__mro__)

(<class '__main__.PdfParserNew'>, <class 'object'>)
(<class '__main__.EmlParserNew'>, <class 'object'>)


`UpdatedInformalParserInterface` is a superclass of `PdfParserNew`, but it doesn’t appear in the MRO.

This unusual behavior is caused by the fact that `UpdatedInformalParserInterface` is a **virtual base class** of `PdfParserNew`.

### Using Virtual Base Classes

#### Standard base class vs virtual base class:
The key difference between virtual and standard subclasses is that virtual base classes use the .__subclasscheck__() dunder method to implicitly check if a class is a virtual subclass of the superclass. Additionally, virtual base classes don’t appear in the subclass MRO.

In [8]:
# Setup for creating your own virtual base classes.

# 1. Metaclass.
class PersonMeta(type):
    """A person metaclass"""

    def __instancecheck__(cls, instance):
        return cls.__subclasscheck__(type(instance))

    def __subclasscheck__(cls, subclass):
        return (
            hasattr(subclass, "name")
            and callable(subclass.name)
            and hasattr(subclass, "age")
            and callable(subclass.age)
        )


# 2. Superclass.
class PersonSuper:
    """A person superclass"""

    def name(self) -> str:
        ...

    def age(self) -> int:
        ...


# 3. Interface (which uses our metaclass).
class Person(metaclass=PersonMeta):
    """Person interface built from PersonMeta metaclass."""

    pass

In [10]:
# Inheriting subclasses
class Employee(PersonSuper):
    """Inherits from PersonSuper
    PersonSuper will appear in Employee.__mro__
    """

    pass


class Friend:
    """Built implicitly from Person
    Friend is a virtual subclass of Person since
    both required methods exist.
    Person not in Friend.__mro__
    """

    def name(self):
        pass

    def age(self):
        pass

Although `Friend` does not explicitly inherit from `Person`, it implements `.name()` and `.age()`, so `Person` becomes a **virtual base class** of `Friend`. When you run `issubclass(Friend, Person)` it should return `True`, meaning that `Friend` is a subclass of `Person`.

The following UML diagram shows what happens when you call `issubclass()` on the `Friend` class:

![img](./virtual-base-class.webp)

In [13]:
# Use of __instancecheck__ shown below.

employee = Employee()
isinstance(employee, Person)

True

In [12]:
friend = Friend()
isinstance(friend, Person)

True

## Formal Interfaces

### Using abc.ABCMeta

#### Approach 1: Using `Using .__subclasshook__()`

Compared to the informal interface approach above:
* Rather than create your own metaclass, you’ll use `abc.ABCMeta` as the metaclass.
* Then, you’ll overwrite `.__subclasshook__()` in place of `.__instancecheck__()` and `.__subclasscheck__()`, as it creates a more reliable implementation of these dunder methods.

In [14]:
import abc


class FormalParserInterface(metaclass=abc.ABCMeta):  # 1. Set abc.ABCMeta as the metaclass.
    @classmethod  # Note that this needs @classmethod decorator.
    def __subclasshook__(cls, subclass):  # 2. Override __subclasshook__ with your interface "definition".
        return (
            hasattr(subclass, "load_data_source")
            and callable(subclass.load_data_source)
            and hasattr(subclass, "extract_text")
            and callable(subclass.extract_text)
        )


class PdfParserNew:
    """Extract text from a PDF."""

    def load_data_source(self, path: str, file_name: str) -> str:
        """Overrides FormalParserInterface.load_data_source()"""
        return "PdfParserNew.load_data_source"

    def extract_text(self, full_file_path: str) -> dict:
        """Overrides FormalParserInterface.extract_text()"""
        return {"PdfParserNew.extract_text": 0}


class EmlParserNew:
    """Extract text from an email."""

    def load_data_source(self, path: str, file_name: str) -> str:
        """Overrides FormalParserInterface.load_data_source()"""
        return "EmlParserNew.load_data_source"

    def extract_text_from_email(self, full_file_path: str) -> dict:
        """A method defined only in EmlParser.
        Does not override FormalParserInterface.extract_text()
        """
        return {"EmlParserNew.extract_text_from_email": 0}

In [16]:
print(issubclass(PdfParserNew, FormalParserInterface))
print(issubclass(EmlParserNew, FormalParserInterface))

# Works as expected.

True
False


#### Using `abc` to Register a Virtual Subclass

Once you’ve imported the `abc` module, you can directly **register a virtual subclass** by using the `.register()` metamethod.

In the next example, you register the interface `Double` as a virtual base class of the built-in `__float__` class:

In [20]:
class Double(metaclass=abc.ABCMeta):  # 1. Set abc.ABCMeta as the metaclass.
    """Double precision floating point number."""

    pass


Double.register(float);  # 2. Directly register.

# NOTE:
# So here, `Double` becomes a virtual base class of `float`.
# `Double` --[virtual base class of]--> `float`
# ⚠️ Don't be confused, it's NOT the other way round! ⚠️

In [21]:
issubclass(float, Double)

True

In [22]:
isinstance(1.2345, Double)

True

In [23]:
# But not:
issubclass(Double, float)

False

You can also **use it as class decorator** to set the decorated class as a virtual subclass:

In [24]:
@Double.register  # This decorator here. Can use ONCE RE
class Double64:
    """A 64-bit double-precision floating-point number."""

    pass


print(issubclass(Double64, Double))  # True

True


#### Using Subclass Detection With Registration

⚠️ You must be careful when you’re combining `.__subclasshook__()` with `.register()`, as `.__subclasshook__()` takes precedence over virtual subclass registration. 

To ensure that the registered virtual subclasses are taken into consideration, **you must add `NotImplemented` to the `.__subclasshook__()` dunder method**. 

The `FormalParserInterface` would be updated to the following:

In [27]:
class FormalParserInterface(metaclass=abc.ABCMeta):
    @classmethod
    def __subclasshook__(cls, subclass):
        return (
            hasattr(subclass, "load_data_source")
            and callable(subclass.load_data_source)
            and hasattr(subclass, "extract_text")
            and callable(subclass.extract_text)
            or NotImplemented  # For the `.register()` approach to work, must include `or NotImplemented`.
        )


class PdfParserNew:
    def load_data_source(self, path: str, file_name: str) -> str:
        """Overrides FormalParserInterface.load_data_source()"""
        return "PdfParserNew.load_data_source"

    def extract_text(self, full_file_path: str) -> dict:
        """Overrides FormalParserInterface.extract_text()"""
        return {"PdfParserNew.extract_text": 0}


@FormalParserInterface.register  # Using the `.register()` approach here.
class EmlParserNew:
    """Extract text from an email."""

    def load_data_source(self, path: str, file_name: str) -> str:
        """Overrides FormalParserInterface.load_data_source()"""
        return "EmlParserNew.load_data_source"

    def extract_text_from_email(self, full_file_path: str) -> dict:
        """A method defined only in EmlParser.
        Does not override FormalParserInterface.extract_text()
        """
        return {"EmlParserNew.extract_text_from_email": 0}


print(issubclass(PdfParserNew, FormalParserInterface))  # True

print(issubclass(EmlParserNew, FormalParserInterface))  # True
# NOTE:
# But this is a problem! EmlParserNew now conforms to FormalParserInterface (supposedly) even though it doesn't
# implement the necessary method! This is because we used the `.register()` decorator on it, and this "forced it".
# Hence, this approach can be dangerous and is best avoided!

True
True


#### Using Abstract Method Declaration

In [34]:
class FormalParserInterface(metaclass=abc.ABCMeta):
    @classmethod
    def __subclasshook__(cls, subclass):
        return (hasattr(subclass, 'load_data_source') and 
                callable(subclass.load_data_source) and 
                hasattr(subclass, 'extract_text') and 
                callable(subclass.extract_text) or 
                NotImplemented)

    @abc.abstractmethod
    def load_data_source(self, path: str, file_name: str):
        """Load in the data set"""
        raise NotImplementedError

    @abc.abstractmethod
    def extract_text(self, full_file_path: str):
        """Extract text from the data set"""
        raise NotImplementedError

class PdfParserNew(FormalParserInterface):  # NOTE: Here, using inheritance, unlike other cases!
    def load_data_source(self, path: str, file_name: str) -> str:
        """Overrides FormalParserInterface.load_data_source()"""
        return "PdfParserNew.load_data_source"

    def extract_text(self, full_file_path: str) -> dict:
        """Overrides FormalParserInterface.extract_text()"""
        return {"PdfParserNew.extract_text": 0}

class EmlParserNew(FormalParserInterface):  # NOTE: Here, using inheritance, unlike other cases!
    """Extract text from an email."""

    def load_data_source(self, path: str, file_name: str) -> str:
        """Overrides FormalParserInterface.load_data_source()"""
        return "EmlParserNew.load_data_source"

    def extract_text_from_email(self, full_file_path: str) -> dict:
        """A method defined only in EmlParser.
        Does not override FormalParserInterface.extract_text()
        """
        return {"EmlParserNew.extract_text_from_email": 0}


In [35]:
pdf_parser = PdfParserNew()

In [36]:
eml_parser = EmlParserNew()

TypeError: Can't instantiate abstract class EmlParserNew with abstract methods extract_text

In [43]:
# Q: Do you still need `__subclasshook__()`?
# A: No, you don't need it in this approach. 

class FormalParserInterface(metaclass=abc.ABCMeta):
    # No `__subclasshook__()`

    @abc.abstractmethod
    def load_data_source(self, path: str, file_name: str):
        """Load in the data set"""
        ...  # BTW no need to explicitly raise NotImplementedError either.

    @abc.abstractmethod
    def extract_text(self, full_file_path: str):
        """Extract text from the data set"""
        ...  # BTW no need to explicitly raise NotImplementedError either.

class PdfParserNew(FormalParserInterface):  # NOTE: Here, using inheritance, unlike other cases!
    def load_data_source(self, path: str, file_name: str) -> str:
        """Overrides FormalParserInterface.load_data_source()"""
        return "PdfParserNew.load_data_source"

    def extract_text(self, full_file_path: str) -> dict:
        """Overrides FormalParserInterface.extract_text()"""
        return {"PdfParserNew.extract_text": 0}

class EmlParserNew(FormalParserInterface):  # NOTE: Here, using inheritance, unlike other cases!
    """Extract text from an email."""

    def load_data_source(self, path: str, file_name: str) -> str:
        """Overrides FormalParserInterface.load_data_source()"""
        return "EmlParserNew.load_data_source"

    def extract_text_from_email(self, full_file_path: str) -> dict:
        """A method defined only in EmlParser.
        Does not override FormalParserInterface.extract_text()
        """
        return {"EmlParserNew.extract_text_from_email": 0}

In [44]:
pdf_parser = PdfParserNew()

In [45]:
eml_parser = EmlParserNew()

TypeError: Can't instantiate abstract class EmlParserNew with abstract methods extract_text

In [40]:
# NOTE: Also, now you can inherit directly from ABC: https://docs.python.org/3.8/library/abc.html
from abc import ABC

class FormalParserInterface(ABC):  # <-- No need to specify metaclass.
    @abc.abstractmethod
    def load_data_source(self, path: str, file_name: str):
        """Load in the data set"""
        ...

    @abc.abstractmethod
    def extract_text(self, full_file_path: str):
        """Extract text from the data set"""
        ...

class PdfParserNew(FormalParserInterface):  # NOTE: Here, using inheritance, unlike other cases!
    def load_data_source(self, path: str, file_name: str) -> str:
        """Overrides FormalParserInterface.load_data_source()"""
        return "PdfParserNew.load_data_source"

    def extract_text(self, full_file_path: str) -> dict:
        """Overrides FormalParserInterface.extract_text()"""
        return {"PdfParserNew.extract_text": 0}

class EmlParserNew(FormalParserInterface):  # NOTE: Here, using inheritance, unlike other cases!
    """Extract text from an email."""

    def load_data_source(self, path: str, file_name: str) -> str:
        """Overrides FormalParserInterface.load_data_source()"""
        return "EmlParserNew.load_data_source"

    def extract_text_from_email(self, full_file_path: str) -> dict:
        """A method defined only in EmlParser.
        Does not override FormalParserInterface.extract_text()
        """
        return {"EmlParserNew.extract_text_from_email": 0}


In [41]:
pdf_parser = PdfParserNew()

In [42]:
eml_parser = EmlParserNew()

TypeError: Can't instantiate abstract class EmlParserNew with abstract methods extract_text