# Class Inheritance

Imagine a project with data from 2 sensors. The data for both sensors is logged daily into delimited text files, like so:


```
data/
    |
    +-- sensor_a/
    |   |
    |   +-- 20200101.dat
    |   +-- 20200102.dat
    |   +-- ...
    |
    +-- sensor_b/
        |
        +-- 20200101.dat
        +-- 20200102.dat
        +-- ...
```

While the file structure is the same, the format of the files is different. Sensor A logs *every second* in a format that looks like this:

Sample sensor A:

```
TIME;TEMP;HUM
2020-01-01 14:00:00;20.1;40.0
2020-01-01 14:00:01;20.2;39.8
2020-01-01 14:00:02;20.3;40.0
2020-01-01 14:00:03;20.3;40.2
2020-01-01 14:00:04;20.3;40.0
```

Sensor A logs *every minute* in a format that looks like this:

Sample sensor B:

```
TS|CO2|NO2
1577887200|602.200|1.973
1577887260|599.917|2.270
1577887320|598.083|2.842
1577887380|596.600|2.590
1577887440|599.083|2.692
```

Note the difference in time notation and use of a different delimiter character.

## The Base Reader Class

In [None]:
import pandas as pd


class DataReader:
    """Class for reading delimited text files"""

    # Set delimited file parameters
    # Note: see pd.read_csv for available options
    _csv_params = {}
    
    # Set required columns
    required = []
    
    # Set renaming options
    # Note: Supply either a dict or callable
    _rename = None
        
    def _load_file(self, path):
        """Load delimited file with specified parameters."""
        
        return pd.read_csv(path, **self._csv_params)

    def _check_required(self, df):
        """Checks required columns are present."""
        
        missing = set(self._required) - set(df.columns)
        if missing:
            raise RuntimeError(
                f"Missing columns in the data: {', '.join(missing)}"
            )

    def _rename_columns(self, df):
        """Renames columns if requested."""
        
        if isinstance(self._rename, (dict, callable)):
            return df.rename(columns=self._rename)
        
    def _process(self, df):
        """Processes the data after loading it."""
        
        return df
            
    def load(self, path):
        """Loads data from the specified path."""
        
        df = self._load_file(path)
        self._check_required(df)
        
        # Process the data
        return (
            df
            .pipe(self._rename_columns)
            .pipe(self._process)
        )

## Processing Sensor A

In [None]:
class SensorAReader(DataReader):
    """Class for reading Sensor A data files."""

    _csv_params = {"sep": ";", "parse_dates": ["TIME"]}
    
    _required = ["TIME", "TEMP", "HUM"]
    
    _rename = {"TIME": "datetime", "TEMP": "temperature", "HUM": "humidity_pct"}
    

In [None]:
SensorAReader().load("data/sensor_a/20200101.dat")

## Processing Sensor B

In [None]:
class SensorBReader(DataReader):
    """Class for reading Sensor B data files."""
    
    _csv_params = {"sep": "|"}
    
    _required = ["TS", "CO2", "NO2"]
    
    _rename = {"TS": "datetime"}
    
    def _process(self, df):
        """Convert epoch time to datetime"""

        return df.assign(datetime=lambda df: pd.to_datetime(df["datetime"], unit="s"))

In [None]:
SensorBReader().load("data/sensor_b/20200101.dat")

## Inheritance and Object Types

In [None]:
a_reader = SensorAReader()

In [None]:
# Type refers to the specific instance type
type(a_reader)

In [None]:
# Can use isinstance for the subclass
isinstance(a_reader, SensorAReader)

In [None]:
# Obviously False for different subclass
isinstance(a_reader, SensorBReader)

In [None]:
# But also matches the base class
isinstance(a_reader, DataReader)

In [None]:
# Explicit check on subclass
issubclass(SensorAReader, DataReader)

## Accessing the Parent Class

In [None]:
class DataReader:
    """DataReader base class."""

    def __init__(self):
        print("Hi, I'm the base class!")


class ReaderA(DataReader):
    """Subclass extending the DataReader class."""
    
    def __init__(self):
        
        # Use super() function to get access to the parent class
        super().__init__()
        
        print("Hi, I'm the subclass!")


In [None]:
reader_a = ReaderA()

## Accessing the subclasses

Classes can access thier subclass through their `__subclasses__()` method

In [None]:
class DataReader:
    """DataReader base class."""
    
    @classmethod
    def available_readers(cls):
        return [cls.__name__ for cls in cls.__subclasses__()]

class ReaderA(DataReader):
    """Subclass A extending the base class."""

class ReaderB(DataReader):
    """Subclass B extending the base class."""


In [None]:
DataReader.available_readers()