In [0]:
# Import required libraries
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report


In [0]:
# Create synthetic dataset
X, y = make_classification(n_samples=1000, n_features=10, n_informative=5, n_classes=3, random_state=42)


In [0]:
# Check shape of the data
print("Shape of X:", X.shape)
print("Shape of y:", y.shape)

Shape of X: (1000, 10)
Shape of y: (1000,)


In [0]:

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

# Set up pipeline steps
steps = [
    ("scaling", StandardScaler()),
    ("PCA", PCA(n_components=3)),
    ("SVC", SVC())
 ]


In [0]:
pipe = Pipeline(steps)

# Train the pipeline on training data
pipe.fit(X_train,y_train)

# Predict on test data
predictions = pipe.predict(X_test)

In [0]:
print("Predictions:", predictions)
print("Accuracy:", accuracy_score(y_test, predictions))

Predictions: [2 2 1 1 1 2 0 2 1 2 1 1 1 1 1 0 2 2 1 0 0 1 1 1 0 2 0 2 2 2 0 2 2 1 1 0 2
 0 0 1 2 2 0 1 2 2 2 1 1 2 2 1 1 2 2 2 1 0 2 2 2 0 2 0 1 2 0 2 0 1 0 0 2 1
 2 0 1 2 2 2 0 0 0 2 1 0 1 0 1 2 0 1 1 2 1 0 1 2 2 1 1 2 1 1 1 1 1 1 2 2 1
 1 2 2 1 1 1 2 1 2 0 2 2 2 2 2 1 0 2 1 1 2 2 2 0 2 1 1 2 2 1 2 0 2 1 0 1 0
 1 2 2 1 0 1 2 1 2 0 1 0 0 1 2 1 2 1 2 2 2 1 1 1 1 2 0 2 2 2 0 1 2 2 1 1 2
 0 1 1 1 0 0 0 0 1 2 2 0 1 2 0 0 2 2 0 2 2 2 0 1 0 2 2 1 1 0 2 1 0 2 2 2 0
 1 2 0 0 1 1 2 1 2 1 1 2 1 0 0 0 0 1 1 2 0 1 2 1 0 2 1 1 2 2 0 1 1 2 0 1 0
 1 1 1 2 0 1 2 0 2 0 0 2 1 0 0 2 2 0 0 0 2 0 0 2 1 1 2 0 1 1 2 0 1 1 2 2 0
 1 1 2 0 0 2 2 2 2 1 1 1 2 0 1 0 2 1 2 2 0 1 1 2 1 0 0 2 0 0 1 1 2 0]
Accuracy: 0.7454545454545455


In [0]:
print("Confusion Matrix:\n", confusion_matrix(y_test, predictions))
print("Classification Report:\n", classification_report(y_test, predictions))


Confusion Matrix:
 [[68 21 10]
 [ 5 82 19]
 [14 15 96]]
Classification Report:
               precision    recall  f1-score   support

           0       0.78      0.69      0.73        99
           1       0.69      0.77      0.73       106
           2       0.77      0.77      0.77       125

    accuracy                           0.75       330
   macro avg       0.75      0.74      0.74       330
weighted avg       0.75      0.75      0.75       330



In [0]:
from sklearn.impute import SimpleImputer
import numpy as np

In [0]:
numeric_processor = Pipeline(
    steps=[
        ("imputation_mean", SimpleImputer(missing_values=np.nan, strategy="mean")),
        ("scaler", StandardScaler())
    ]
)

# To print and check the pipeline
(numeric_processor)

In [0]:
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer


In [0]:
categorical_processor = Pipeline(
    steps=[
        ("imputation_constant", SimpleImputer(fill_value="missing", strategy="constant")),
        ("onehot", OneHotEncoder(handle_unknown="ignore"))
    ]
)


In [0]:
# Replace 'categorical_columns' with your actual list of categorical column names.
categorical_columns = ['column1', 'column2', 'column3']  # Example column names, modify as needed

preprocessor = ColumnTransformer(
    transformers=[
        ('cat', categorical_processor, categorical_columns)
    ]
)

In [0]:
(categorical_processor)


In [0]:
(preprocessor)


In [0]:
from sklearn.pipeline import make_pipeline
from sklearn.linear_model import LogisticRegression

pipe = make_pipeline(preprocessor, LogisticRegression())
pipe