In [13]:
### Q1 
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import RandomForestClassifier
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.feature_selection import SelectFromModel

# Automated feature selection
feature_selector = SelectFromModel(RandomForestClassifier())

# Numerical pipeline
numerical_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler())
])

# Categorical pipeline
categorical_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('encoder', OneHotEncoder())
])

# Combine numerical and categorical pipelines
preprocessing_pipeline = ColumnTransformer([
    ('numerical', numerical_pipeline, numerical_features),
    ('categorical', categorical_pipeline, categorical_features)
])

# Final pipeline
pipeline = Pipeline([
    ('feature_selection', feature_selector),
    ('preprocessing', preprocessing_pipeline),
    ('classifier', RandomForestClassifier())
])

# Fit the pipeline on the training data
pipeline.fit(X_train, y_train)

# Evaluate accuracy on the test dataset
accuracy = pipeline.score(X_test, y_test)


In [None]:
### Q2 
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.linear_model import LogisticRegression

# Create individual classifiers
random_forest = RandomForestClassifier()
logistic_regression = LogisticRegression()

# Create the voting classifier
voting_classifier = VotingClassifier(
    estimators=[('rf', random_forest), ('lr', logistic_regression)],
    voting='hard'
)

# Create the pipeline
pipeline = Pipeline([
    ('classifier', voting_classifier)
])

# Fit the pipeline on the training data
pipeline.fit(X_train, y_train)

# Evaluate accuracy on the test dataset
accuracy = pipeline.score(X_test, y_test)
