# Pipeline

In [None]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.svm import SVC


from sklearn.pipeline import Pipeline

In [None]:
data = load_iris()
X, y = data.data, data.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
pipeline = Pipeline([
    ('scaler', StandardScaler()),  # Step 1: Scaling
    ('pca', PCA(n_components=2)),  # Step 2: PCA for dimensionality reduction
    ('svm', SVC(kernel='rbf'))     # Step 3: Classifier (SVM in this case)
])

In [None]:
# The fit method of the pipeline sequentially applies each step to the training data. It first scales the data, then applies PCA, and finally fits the SVM classifier to the transformed data.
pipeline.fit(X_train, y_train)

In [None]:
# The score method computes the accuracy of the classifier on the test data after transforming the test data through the entire pipeline. In this case, it computes and prints the accuracy of the SVM classifier.
accuracy = pipeline.score(X_test, y_test)
print("Accuracy:", accuracy)

Accuracy: 0.9


In [None]:
pipeline.predict(X_test)

array([2, 2, 0, 2, 0, 2, 2, 0, 2, 2, 0, 2, 2, 2, 2, 0, 0, 2, 2, 0, 2, 2,
       2, 0, 0, 0, 2, 0, 2, 2])

In [None]:
import pickle

model = SVC(kernel='rbf')

model.fit(X_train, y_train)

print(model.score(X_test, y_test))

1.0


In [None]:
with open('file.pkl', 'wb') as file:

    # A new file will be created
    pickle.dump(model, file)

In [None]:
with open('file.pkl', 'rb') as file:
    model_plk = pickle.load(file)

model_plk

In [None]:
with open('pipeline.pkl', 'wb') as file:

    # A new file will be created
    pickle.dump(pipeline, file)

In [None]:
with open('pipeline.pkl', 'rb') as file:
    pipeline_plk = pickle.load(file)

pipeline_plk

## Accesing to steps

In [None]:
# Show list of step elements
pipeline.named_steps

{'scaler': StandardScaler(), 'pca': PCA(n_components=2), 'svm': SVC()}

In [None]:
# Accessing and exploring pipeline steps
scaler_step = pipeline.named_steps['scaler']
pca_step = pipeline.named_steps['pca']
svm_step = pipeline.named_steps['svm']

# Print information about each step
print("Scaler Step:")
print("Mean of each feature after scaling:", scaler_step.mean_)
print("Standard deviation of each feature after scaling:", scaler_step.scale_)

print("\nPCA Step:")
print("Explained variance ratio of each principal component:", pca_step.explained_variance_ratio_)

# Note: For SVM, specific attributes might depend on the SVM implementation used (e.g., sklearn.svm.SVC or sklearn.svm.LinearSVC)

# Using fit_transform with the entire pipeline
X_train_transformed = pca_step.fit_transform(X_train, y_train)
X_test_transformed = pipeline["pca"].transform(X_test)

# Using predict with the entire pipeline
y_pred = svm_step.predict(X_test_transformed)

# Print the transformed data and predicted labels
print("\nTransformed data shape:", X_train_transformed.shape)
print("Predicted labels:", y_pred)
print("Score:", svm_step.score(X_test_transformed, y_test))

Scaler Step:
Mean of each feature after scaling: [5.98       3.04       3.88333333 1.26333333]
Standard deviation of each feature after scaling: [0.83082289 0.37824155 1.81035601 0.7972801 ]

PCA Step:
Explained variance ratio of each principal component: [0.74658917 0.22284909]

Transformed data shape: (120, 2)
Predicted labels: [1 0 2 1 2 0 1 2 2 1 2 0 0 0 0 1 2 1 1 2 0 2 0 2 2 2 2 2 0 0]
Score: 0.9333333333333333
