In [14]:
import pytest
import logging
import numpy as np
import pandas as pd

from sklearn.linear_model import ElasticNet
from sklearn.datasets import make_regression
from sklearn.metrics import r2_score
from collections import Counter

# 0. Initial configuration

In [3]:
class MLLinearModel:

  def __init__(
      self,
      alpha:float,
      l1_ratio:float,
      model_name=""
      ):

    # Store params
    self.alpha = alpha
    self.l1_ratio = l1_ratio
    self.model_name = model_name

    # Initializate parameters
    self.model_reg = None

  def train_model(self, X_train:np.ndarray, y_train:np.ndarray):
    self.model_reg = ElasticNet(random_state=0, alpha=self.alpha, l1_ratio=self.l1_ratio)
    self.model_reg.fit(X_train, y_train)
    print(self.model_reg.coef_)
    print(self.model_reg.intercept_)

  def predict_model(self, X_test:np.ndarray):
    y_pred = self.model_reg.predict(X_test)
    return y_pred

  def evaluate_model(self, X_test:np.ndarray, y_test:np.ndarray):
    y_pred = self.predict_model(X_test)
    return {'r2_score': r2_score(y_test, y_pred)}

In [4]:
# Generate synthetic data
n_features = 4
X_train, y_train = make_regression(n_features=n_features, random_state=0)
X_test, y_test = make_regression(n_features=n_features, random_state=0)

In [5]:
# Try class
model_reg =  MLLinearModel(alpha=1.0, l1_ratio=0.5,model_name="prueba")
model_reg.train_model(X_train=X_train, y_train=y_train)
model_reg.evaluate_model(X_test=X_test, y_test=y_test)

[17.01756265 25.92033077 42.98727128 57.7739761 ]
-0.7022844637724632


{'r2_score': 0.8905284935984247}

### 0.1. Logging

Source:
* https://realpython.com/python-logging/

In [36]:
# logging_example.py

import logging

# Create a custom logger
logger = logging.getLogger(__name__)

# Create handlers
c_handler = logging.StreamHandler()
f_handler = logging.FileHandler('file.log')
c_handler.setLevel(logging.WARNING)
f_handler.setLevel(logging.ERROR)


# Create formatters and add it to handlers
c_format = logging.Formatter('%(name)s - %(levelname)s - %(message)s')
f_format = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
#f_format = logging.Formatter('[%(asctime)s] p%(process)s {%(pathname)s:%(lineno)d} %(levelname)s - %(message)s','%m-%d %H:%M:%S')

c_handler.setFormatter(c_format)
f_handler.setFormatter(f_format)

# Add handlers to the logger
logger.addHandler(c_handler)
logger.addHandler(f_handler)

logger.warning('This is a warning')
logger.error('This is an error')
logger.debug('This is a debug message')
logger.info('This is an info message')
logger.warning('This is a warning message')
logger.error('This is an error message')
logger.critical('This is a critical message')

__main__ - ERROR - This is an error
__main__ - ERROR - This is an error
__main__ - ERROR - This is an error
__main__ - ERROR - This is an error
__main__ - ERROR - This is an error
__main__ - ERROR - This is an error
__main__ - ERROR - This is an error
ERROR:__main__:This is an error
__main__ - ERROR - This is an error message
__main__ - ERROR - This is an error message
__main__ - ERROR - This is an error message
__main__ - ERROR - This is an error message
__main__ - ERROR - This is an error message
__main__ - ERROR - This is an error message
__main__ - ERROR - This is an error message
ERROR:__main__:This is an error message
__main__ - CRITICAL - This is a critical message
__main__ - CRITICAL - This is a critical message
__main__ - CRITICAL - This is a critical message
__main__ - CRITICAL - This is a critical message
__main__ - CRITICAL - This is a critical message
__main__ - CRITICAL - This is a critical message
__main__ - CRITICAL - This is a critical message
CRITICAL:__main__:This is

In [38]:
logger.critical('Prueba')

__main__ - CRITICAL - Prueba
__main__ - CRITICAL - Prueba
__main__ - CRITICAL - Prueba
__main__ - CRITICAL - Prueba
__main__ - CRITICAL - Prueba
__main__ - CRITICAL - Prueba
__main__ - CRITICAL - Prueba
CRITICAL:__main__:Prueba


In [45]:
# logging_example.py

import logging

# Create a custom logger
logger = logging.getLogger(__name__)

# Create handlers
f_handler = logging.FileHandler('file_2.log')
f_handler.setLevel(logging.ERROR)
f_handler.setLevel(logging.DEBUG)

# Create formatters and add it to handlers
f_format = logging.Formatter('[%(asctime)s] p%(process)s {%(pathname)s:%(lineno)d} %(levelname)s - %(message)s','%m-%d %H:%M:%S')

f_handler.setFormatter(f_format)

# Add handlers to the logger
logger.addHandler(f_handler)

logger.warning('This is a warning')
logger.error('This is an error')
logger.debug('This is a debug message')
logger.info('This is an info message')
logger.warning('This is a warning message')
logger.error('This is an error message')
logger.critical('This is a critical message')
logger.critical('Prueba')

__main__ - ERROR - This is an error
__main__ - ERROR - This is an error
__main__ - ERROR - This is an error
__main__ - ERROR - This is an error
__main__ - ERROR - This is an error
__main__ - ERROR - This is an error
__main__ - ERROR - This is an error
ERROR:__main__:This is an error
__main__ - ERROR - This is an error message
__main__ - ERROR - This is an error message
__main__ - ERROR - This is an error message
__main__ - ERROR - This is an error message
__main__ - ERROR - This is an error message
__main__ - ERROR - This is an error message
__main__ - ERROR - This is an error message
ERROR:__main__:This is an error message
__main__ - CRITICAL - This is a critical message
__main__ - CRITICAL - This is a critical message
__main__ - CRITICAL - This is a critical message
__main__ - CRITICAL - This is a critical message
__main__ - CRITICAL - This is a critical message
__main__ - CRITICAL - This is a critical message
__main__ - CRITICAL - This is a critical message
CRITICAL:__main__:This is

## 1. Unit Tests

### 1.1. Initial example

In [None]:
class MLModel:

  def __init__(
      self,
      param1:float,
      param2:float,
      model_name:str
      ):

    # Store params
    self.param1 = param1
    self.param2 = param2
    self.model_name = model_name

  def train_model(self):
    pass

  def predict_model(self):
    pass

  def evaluate_model(self):
    pass


In [None]:
# Define several tests within a function, and evaluate them over different
# configurations
def test_answer(result_test):
    assert result_test.param1 >= 0
    assert result_test.param2 >= 0
    assert type(result_test.param1) == float
    assert type(result_test.param2) == float

def test1():
    return MLModel(param1=1.1, param2=1.2, model_name="prueba")

def test2():
    return MLModel(param1=1.1, param2=-1, model_name="prueba")

# Test 1
test_answer(result_test = test1())

# Test 2
test_answer(result_test = test2())

AssertionError: 

In [None]:
#@pytest.fixture()
def test1():
    return MLModel(param1=1.1, param2=1.2, model_name="prueba")

def test_unit_create_model(test1):
    #assert test1.model_name == "prueba"
    assert test1.param1 >= 0
    assert test1.param2 >= 0
    assert test1.param1 == float
    assert test1.param2 == float

In [None]:
test_unit_create_model(test1)

In [None]:
MLModel(param1=1.1, param2=1.2, model_name="prueba").model_name

'prueba'

### 1.2. Another example
Source:
* https://github.com/miguelgfierro/pybase/blob/main/test/pytest_fixtures.py
* https://docs.pytest.org/en/6.2.x/fixture.html

In [15]:
# Define data structures
@pytest.fixture()
def basic_structures():
    data = {
        "int": 5,
        "yes": True,
        "no": False,
        "float": 0.5,
        "pi": 3.141592653589793238462643383279,
        "string": "Name",
        "none": None,
    }
    return data

@pytest.fixture()
def complex_structures():
    my_list = [1, 2, 3]
    my_dict = {"a": 1, "b": 2}
    return my_list, my_dict

@pytest.fixture()
def numeric_libs(complex_structures):
    l, d = complex_structures
    np_array = np.array(l)
    df = pd.DataFrame(d, index=[0])
    series = pd.Series(l)
    return np_array, df, series

In [16]:
# Define tests
def test_basic_structures(basic_structures):
    assert basic_structures["int"] == 5
    assert basic_structures["yes"] is True
    assert basic_structures["no"] is False
    assert basic_structures["float"] == 0.5
    assert basic_structures["string"] == "Name"
    assert basic_structures["none"] is None


def test_comparing_numbers(basic_structures):
    assert basic_structures["pi"] == pytest.approx(3.1415926, 0.0000001)
    assert basic_structures["pi"] != pytest.approx(3.1415926, 0.00000001)
    assert basic_structures["int"] > 3
    assert basic_structures["int"] >= 5
    assert basic_structures["int"] < 10
    assert basic_structures["int"] <= 5


def test_lists(complex_structures):
    l = complex_structures[0]
    assert l == [1, 2, 3]
    assert Counter(l) == Counter([2, 1, 3])  # list have same elements
    assert 1 in l
    assert 5 not in l
    assert all(x in l for x in [2, 3])  # sublist in list


def test_dictionaries(complex_structures):
    d = complex_structures[1]
    assert d == {"a": 1, "b": 2}
    assert "a" in d
    assert d.items() <= {"a": 1, "b": 2, "c": 3}.items()  # subdict in dict
    with pytest.raises(KeyError):
        value = d["c"]


def test_pandas(numeric_libs):
    _, df, series = numeric_libs
    df_target = pd.DataFrame({"a": 1, "b": 2}, index=[0])
    series_target = pd.Series([1, 2, 3])
    pd.testing.assert_frame_equal(df, df_target)
    pd.testing.assert_series_equal(series, series_target)


def test_numpy(numeric_libs):
    np_array = numeric_libs[0]
    np_target = np.array([1, 2, 3])
    np_target2 = np.array([0.9999, 2, 3])
    assert np.all(np_array == np_target)
    np.testing.assert_array_equal(np_array, np_target)  # same as before
    np.testing.assert_array_almost_equal(np_array, np_target2, decimal=4)

## 2. Smoke Tests

## 3. Integration/Functional Tests

## 4. Utility Test