In [24]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# Load the iris dataset
iris = load_iris()
X, y = iris['data'], iris['target']

# Add missing values
X[[1, 20, 50, 100, 135], 0] = np.nan  
X[[2, 5, 88, 135], 1] = np.nan          
X[[4, 15], 2] = np.nan                   
X[[40, 135], 3] = np.nan                

# Split the dataset into training and test sets (33% test size)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=43)

# Create the pipeline
pipeline = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),  # Imputer for missing values
    ('scaler', StandardScaler()),                    # Standard scaler
    ('classifier', LogisticRegression(max_iter=200)) # Logistic regression model
])

# Train the pipeline on the training set
pipeline.fit(X_train, y_train)

# Make predictions on the test set
y_pred = pipeline.predict(X_test)
print(y_pred)
# Calculate the accuracy score of the model
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy of the model on the test set: {accuracy:.2f}')


[0 0 2 1 2 0 2 1 1 1 0 1 2 0 1 1 0 0 2 2 0 0 0 2 2 2 0 1 0 0 1 0 1 1 2 2 1
 2 1 1 1 2 1 2 0 1 1 1 1 1]
Accuracy of the model on the test set: 0.98
