# Initial Setup

In [1]:
# This is needed only for the purpose of the notebook
!pip install ipytest




[notice] A new release of pip is available: 24.2 -> 24.3
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
# Importing required libraries

import pandas as pd
import numpy as np

from sklearn import datasets
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

import pytest
import ipytest
ipytest.autoconfig()

In [3]:
# Getting the data
iris = datasets.load_iris()

In [4]:
# Simple setup in the data
iris_df = pd.DataFrame(iris.data, columns = iris.feature_names)
iris_df['target'] = iris.target

### Setting up the original class for the SimplePipeline

In [5]:
class SimplePipeline:
    def __init__(self):
        self.frame = None
        # Each value is None when we instantiate the class
        self.X_train, self.X_test, self.y_train, self.Y_test = None, None, None, None
        self.model = None
        self.load_dataset()
    
    def load_dataset(self):
        """Loading the dataset, and make the train, test, split."""
        dataset = datasets.load_iris()
        
        # Removing the units (cm) from the headers
        self.feature_names = [fn[:-5] for fn in dataset.feature_names]
        self.frame = pd.DataFrame(dataset.data, columns=self.feature_names)
        self.frame['target'] = dataset.target
        
        self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(
            self.frame[self.feature_names], self.frame.target, test_size=0.65, random_state=42)
        
    def train(self, algorithm=LogisticRegression):
        
        self.model = algorithm(solver='lbfgs', multi_class='auto')
        self.model.fit(self.X_train, self.y_train)
        
    def predict(self, input_data):
        return self.model.predict(input_data)
        
    def get_accuracy(self):
        return self.model.score(X=self.X_test, y=self.y_test)
    
    def run_pipeline(self):
        """Execution method for running the pipeline several times."""
        self.load_dataset()
        self.train()

### Let's modify the settings of the pipeline

In [6]:
class PipelineWithConfig(SimplePipeline):
    def __init__(self, config):
        # Calling the inherit method SimplePipeline __init__ first.
        super().__init__()
        # We pass a config object that will be used in the training.
        # This is known as Dependency Injection
        self.config = config
            
    def train(self, algorithm=LogisticRegression):
        # We initialize the LogisticRegression classifier with the parameters of the config from the pipeline.
        self.model = algorithm(solver=self.config.get('solver'),
                               multi_class=self.config.get('multi_class'))
        self.model.fit(self.X_train, self.y_train)

# Testing

In [7]:
@pytest.fixture
def pipeline():
    config = {'solver': 'lbfgs', 'multi_class': 'auto'}
    pl = PipelineWithConfig(config=config)
    pl.run_pipeline()
    return pl

In [10]:
%%ipytest


ENABLED_MODEL_SOLVERS = {'newton-cg'}

def test_pipeline_config(pipeline):
    # Getting the config parameters for the model.
    model_params = pipeline.model.get_params()

    # Comparing
    assert model_params['solver'] in ENABLED_MODEL_SOLVERS
    # assert False, repr(model_params)

[31mF[0m[31m                                                                                            [100%][0m
[31m[1m______________________________________ test_pipeline_config _______________________________________[0m

pipeline = <__main__.PipelineWithConfig object at 0x0000020FE8EAC0D0>

    [0m[94mdef[39;49;00m [92mtest_pipeline_config[39;49;00m(pipeline):[90m[39;49;00m
        [90m# Getting the config parameters for the model.[39;49;00m[90m[39;49;00m
        model_params = pipeline.model.get_params()[90m[39;49;00m
    [90m[39;49;00m
        [90m# Comparing[39;49;00m[90m[39;49;00m
>       [94massert[39;49;00m model_params[[33m'[39;49;00m[33msolver[39;49;00m[33m'[39;49;00m] [95min[39;49;00m ENABLED_MODEL_SOLVERS[90m[39;49;00m
[1m[31mE       AssertionError: assert 'lbfgs' in {'newton-cg'}[0m

[1m[31mC:\Users\richv\AppData\Local\Temp\ipykernel_18688\3623714792.py[0m:8: AssertionError
notebooks/Testing/t_8ccbc8b1b64546d7b616490fc3ae4baa.p