# Step-by-Step Explanation

## 1. Import Libraries:

In [16]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.datasets import load_iris
from sklearn.metrics import accuracy_score

## 2. Load Data:

In [3]:
iris = load_iris()
X, y = iris.data, iris.target

In [5]:
X.shape

(150, 4)

In [7]:
type(X)

numpy.ndarray

In [8]:
X

array([[5.1, 3.5, 1.4, 0.2],
       [4.9, 3. , 1.4, 0.2],
       [4.7, 3.2, 1.3, 0.2],
       [4.6, 3.1, 1.5, 0.2],
       [5. , 3.6, 1.4, 0.2],
       [5.4, 3.9, 1.7, 0.4],
       [4.6, 3.4, 1.4, 0.3],
       [5. , 3.4, 1.5, 0.2],
       [4.4, 2.9, 1.4, 0.2],
       [4.9, 3.1, 1.5, 0.1],
       [5.4, 3.7, 1.5, 0.2],
       [4.8, 3.4, 1.6, 0.2],
       [4.8, 3. , 1.4, 0.1],
       [4.3, 3. , 1.1, 0.1],
       [5.8, 4. , 1.2, 0.2],
       [5.7, 4.4, 1.5, 0.4],
       [5.4, 3.9, 1.3, 0.4],
       [5.1, 3.5, 1.4, 0.3],
       [5.7, 3.8, 1.7, 0.3],
       [5.1, 3.8, 1.5, 0.3],
       [5.4, 3.4, 1.7, 0.2],
       [5.1, 3.7, 1.5, 0.4],
       [4.6, 3.6, 1. , 0.2],
       [5.1, 3.3, 1.7, 0.5],
       [4.8, 3.4, 1.9, 0.2],
       [5. , 3. , 1.6, 0.2],
       [5. , 3.4, 1.6, 0.4],
       [5.2, 3.5, 1.5, 0.2],
       [5.2, 3.4, 1.4, 0.2],
       [4.7, 3.2, 1.6, 0.2],
       [4.8, 3.1, 1.6, 0.2],
       [5.4, 3.4, 1.5, 0.4],
       [5.2, 4.1, 1.5, 0.1],
       [5.5, 4.2, 1.4, 0.2],
       [4.9, 3

In [9]:
y.shape

(150,)

In [10]:
y

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

In [11]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [12]:
X_train.shape

(120, 4)

## 3. Create a Pipeline:

In [13]:
pipeline = Pipeline([
    ('scaler', StandardScaler()),  # Step 1: Standardize the data
    ('svc', SVC())  # Step 2: Apply SVM
])

## 4. Fit the Pipeline:

In [14]:
pipeline.fit(X_train, y_train)

## 5. Make Predictions:

In [15]:
y_pred = pipeline.predict(X_test)

## 6. Evaluate the Model:

In [17]:
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')

Accuracy: 1.00


# Example with Grid Search

## 1. Import Libraries:

In [18]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.datasets import load_iris
from sklearn.metrics import accuracy_score

## 2. Load Data:

In [19]:
iris = load_iris()
X, y = iris.data, iris.target

## 3. Create a Pipeline:

In [20]:
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('svc', SVC())
])

## 4. Define Parameter Grid:

In [21]:
param_grid = {
    'svc__C': [0.1, 1, 10, 100],
    'svc__gamma': [1, 0.1, 0.01, 0.001],
    'svc__kernel': ['rbf', 'linear']
}

## 5. Perform Grid Search:

In [22]:
grid_search = GridSearchCV(pipeline, param_grid, cv=5, scoring='accuracy')
grid_search.fit(X, y)

## 6. Best Parameter and Score:

In [23]:
print(f'Best Parameters: {grid_search.best_params_}')
print(f'Best Score: {grid_search.best_score_:.2f}')

Best Parameters: {'svc__C': 1, 'svc__gamma': 0.1, 'svc__kernel': 'rbf'}
Best Score: 0.97


# Example with Multiple Preprocessing Steps

## 1. Import Libraries:

In [24]:
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris

## 2. Load Data:

In [25]:
iris = load_iris()
X, y = iris.data, iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## 3. Create a Pipeline:

In [26]:
pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy='mean')),  # Step 1: Handle missing values
    ('scaler', StandardScaler()),  # Step 2: Standardize the data
    ('rf', RandomForestClassifier())  # Step 3: Train Random Forest
])

## 4. Fit the Pipeline:

In [39]:
pipeline.fit(X_train, y_train)

## 5. Make Predictions: 

In [40]:
y_pred = pipeline.predict(X_test)

## 6. Evaluate the Model:

In [41]:
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')

Accuracy: 1.00


# Example with Custom Function

## 1. Import Libraries:

In [42]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, FunctionTransformer
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris
import numpy as np

## 2. Create a Custom Function

In [43]:
# Let's create a custom function to perform a logarithmic transformation and include it in a pipeline.
def log_transform(X):
    return np.log1p(X)  # np.log1p is log(1 + X) to handle zero values

## 3. Wrap the Function Using ```'FunctionTransformer'```:

In [44]:
log_transformer = FunctionTransformer(log_transform)

## 4. Load Data:

In [46]:
iris = load_iris()
X, y = iris.data, iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## 5. Create a Pipeline:

In [47]:
pipeline = Pipeline([
    ('log', log_transformer),  # Step 1: Apply log transformation
    ('scaler', StandardScaler()),  # Step 2: Standardize the data
    ('svc', SVC())  # Step 3: Apply SVM
])

## 6. Fit the Pipeline:

In [61]:
pipeline.fit(X_train, y_train)

## 7. Make Predictions:

In [62]:
y_pred = pipeline.predict(X_test)

## 8. Evaluate the Model:

In [63]:
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')

Accuracy: 1.00


# Example with Custom Function - 2

* If your custom function requires parameters, you can use the `kw_args` parameter of `FunctionTransformer` to pass them.

## 1. Import Libraries:

In [66]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, FunctionTransformer
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris
import numpy as np

## 2. Create a Custom Function

In [67]:
def custom_scale(X, factor=1):
    return X * factor

## 3. Wrap the Function Using ```'FunctionTransformer'```:

In [68]:
custom_scale_transformer = FunctionTransformer(custom_scale, kw_args={'factor': 2})

## 4. Load Data:

In [69]:
iris = load_iris()
X, y = iris.data, iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## 5. Create a Pipeline:

In [70]:
pipeline = Pipeline([
    ('custom_scale', custom_scale_transformer),  # Step 1: Custom scaling
    ('scaler', StandardScaler()),  # Step 2: Standardize the data
    ('svc', SVC())  # Step 3: Apply SVM
])

## 6. Fit the Pipeline:

In [71]:
pipeline.fit(X_train, y_train)

## 7. Make Predictions:

In [72]:
y_pred = pipeline.predict(X_test)

## 8. Evaluate the Model:

In [73]:
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')

Accuracy: 1.00


# Example with Custom Function - 3
* To apply a custom function to **certain columns** in a Pipeline, you can use `ColumnTransformer` along with `FunctionTransformer`. `ColumnTransformer` allows you to apply different transformations to different subsets of features in your dataset.

## 1. Import Libraries

In [74]:
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, FunctionTransformer
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris
import numpy as np
from sklearn.metrics import accuracy_score

## 2. Create a Custom Function:

In [75]:
def log_transform(X):
    return np.log1p(X)  # np.log1p is log(1 + X) to handle zero values

## 3. Wrap the Function Using ```'FunctionTransformer'```:

In [76]:
log_transformer = FunctionTransformer(log_transform)

## 4. Load Data:

In [77]:
iris = load_iris()
X, y = iris.data, iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## 5. Define Columns for Transformation:

- Suppose you want to apply the log transformation to the first two columns and standard scaling to the remaining columns.

## 6. Create a `ColumnTransformer`:

In [78]:
preprocessor = ColumnTransformer(
    transformers=[
        ('log', log_transformer, [0, 1]),  # Apply log transformation to columns 0 and 1
        ('scaler', StandardScaler(), [2, 3])  # Apply standard scaling to columns 2 and 3
    ]
)

## 7. Create a Pipeline:

In [79]:
pipeline = Pipeline([
    ('preprocessor', preprocessor),  # Step 1: Apply column transformations
    ('svc', SVC())  # Step 2: Apply SVM
])

## 8. Fit the Pipeline:

In [81]:
pipeline.fit(X_train, y_train)

## 9. Make Predictions: 

In [82]:
y_pred = pipeline.predict(X_test)

## 10. Evaluate the Model:

In [83]:
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')

Accuracy: 1.00


# Example with Custom Class

## 1. Import Libraries:

In [10]:
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris
import numpy as np
from sklearn.metrics import accuracy_score

## 2. Create a Custom Transformer Class:

In [2]:
class LogTransformer(BaseEstimator, TransformerMixin):
    def __init__(self):
        pass
    
    def fit(self, X, y=None):
        return self
     
    def transform(self, X, y=None):
        return np.log1p(X)  # np.log1p is log(1 + X) to handle zero values

## 3. Load Data:

In [3]:
iris = load_iris()
X, y = iris.data, iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## 4. Define Columns for Transformation:

- Suppose you want to apply the **log transformation** to the first two columns and **standard scaling** to the remaining columns.

## 5. Create a `ColumnTransformer`:

In [4]:
preprocessor = ColumnTransformer(
    transformers=[
        ('log', LogTransformer(), [0, 1]),  # Apply custom log transformation to columns 0 and 1
        ('scaler', StandardScaler(), [2, 3])  # Apply standard scaling to columns 2 and 3
    ]
)

## 6. Create a Pipeline:

In [5]:
pipeline = Pipeline([
    ('preprocessor', preprocessor),  # Step 1: Apply column transformations
    ('svc', SVC())  # Step 2: Apply SVM
])

## 7. Fit the Pipeline:

In [6]:
pipeline.fit(X_train, y_train)

## 8. Make Predictions:

In [7]:
y_pred = pipeline.predict(X_test)

## 9. Evaluate the Model:

In [11]:
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')

Accuracy: 1.00


# Example with Custom Class - 2

Let's create an example pipeline that uses three different custom transformer classes. In this example, we'll use the following custom transformers:

> **1 - LogTransformer**: Applies a logarithmic transformation to specified columns.

> **2 - SquareTransformer**: Squares the specified columns.

> **3 - CubeTransformer**: Cubes the specified columns.

## 1. Import Libraries:

In [12]:
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris
import numpy as np
from sklearn.metrics import accuracy_score

## 2. Create Custom Transform Classes:

In [13]:
class LogTransformer(BaseEstimator, TransformerMixin):
    def __init__(self):
        pass
    
    def fit(self, X, y=None):
        return self
     
    def transform(self, X, y=None):
        return np.log1p(X)  # np.log1p is log(1 + X) to handle zero values

class SquareTransformer(BaseEstimator, TransformerMixin):
    def __init__(self):
        pass
    
    def fit(self, X, y=None):
        return self
     
    def transform(self, X, y=None):
        return np.square(X)

class CubeTransformer(BaseEstimator, TransformerMixin):
    def __init__(self):
        pass
    
    def fit(self, X, y=None):
        return self
     
    def transform(self, X, y=None):
        return np.power(X, 3)

## 3. Load Data:

In [14]:
iris = load_iris()
X, y = iris.data, iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## 4. Define Columns for Each Transformation:

Suppose we apply:

- **Log transformation** to columns 0 and 1.
- **Square transformation** to column 2.
- **Cube transformation** to column 3.

## 5. Create a `ColumnTransformer`:

In [15]:
preprocessor = ColumnTransformer(
    transformers=[
        ('log', LogTransformer(), [0, 1]),  # Apply log transformation to columns 0 and 1
        ('square', SquareTransformer(), [2]),  # Apply square transformation to column 2
        ('cube', CubeTransformer(), [3]),  # Apply cube transformation to column 3
    ]
)

## 6. Create a Pipeline:

In [16]:
pipeline = Pipeline([
    ('preprocessor', preprocessor),  # Step 1: Apply column transformations
    ('scaler', StandardScaler()),  # Step 2: Standardize the data
    ('svc', SVC())  # Step 3: Apply SVM
])

## 7. Fit the Pipeline:

In [17]:
pipeline.fit(X_train, y_train)

## 8. Make Predictions:

In [18]:
y_pred = pipeline.predict(X_test)

## 9 Evaluate the Model:

In [19]:
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')

Accuracy: 1.00
