In [1]:
from sklearn.utils import all_estimators
from sklearn.model_selection import train_test_split
from sklearn.feature_selection import SelectKBest, f_regression
from sklearn.metrics import mean_squared_error
from sklearn.datasets import fetch_california_housing

def get_estimators(estimator_type):
    """
    Fetches all scikit-learn estimators of a specified type.

    Parameters:
    - estimator_type (str): Type of estimator ('classifier' or 'regressor').

    Returns:
    - list: List of (name, class) tuples of all estimators of the specified type.
    """
    if estimator_type not in ['classifier', 'regressor']:
        raise ValueError("Invalid estimator type. Must be 'classifier' or 'regressor'.")
    
    estimators = all_estimators(type_filter=estimator_type)
    return list(estimators)

def perform_train_test_split(X, y, test_size=0.2, random_state=None):
    """
    Performs train-test split on the data.

    Parameters:
    - X (array-like): Feature dataset.
    - y (array-like): Target values.
    - test_size (float or int): Size of the test set.
    - random_state (int or None): Random seed for reproducibility.

    Returns:
    - tuple: Tuple containing train-test split of X and y: (X_train, X_test, y_train, y_test).
    """
    return train_test_split(X, y, test_size=test_size, random_state=random_state)

def perform_feature_selection(X, y, k=10):
    """
    Performs feature selection using SelectKBest with f_regression scoring.

    Parameters:
    - X (array-like): Feature dataset.
    - y (array-like): Target values.
    - k (int): Number of top features to select.

    Returns:
    - array-like: Transformed feature dataset with selected features.
    """
    selector = SelectKBest(score_func=f_regression, k=k)
    X_selected = selector.fit_transform(X, y)
    return X_selected
if __name__ == "__main__":
    housing = fetch_california_housing()
    X, y = housing.data, housing.target
    X_train, X_test, y_train, y_test = perform_train_test_split(X, y, test_size=0.2, random_state=42)
    X_train_selected = perform_feature_selection(X_train, y_train, k=5)
    X_test_selected = perform_feature_selection(X_test, y_test, k=5)
    type_of_estimator = input("Enter type of estimator ('classifier' or 'regressor'): ")
    estimators = get_estimators(type_of_estimator)
    
    print(f"Found {len(estimators)} {type_of_estimator}s:")
    for name, _ in estimators:
        print(f"- {name}")
    for name, estimator_class in estimators:
        try:
            estimator = estimator_class()
            estimator.fit(X_train_selected, y_train)
            y_pred = estimator.predict(X_test_selected)
            mse = mean_squared_error(y_test, y_pred)
            print(f'{name} Mean Squared Error: {mse}')
        except Exception as e:
            print(f'{name} failed: {e}')

ImportError: 
`load_boston` has been removed from scikit-learn since version 1.2.

The Boston housing prices dataset has an ethical problem: as
investigated in [1], the authors of this dataset engineered a
non-invertible variable "B" assuming that racial self-segregation had a
positive impact on house prices [2]. Furthermore the goal of the
research that led to the creation of this dataset was to study the
impact of air quality but it did not give adequate demonstration of the
validity of this assumption.

The scikit-learn maintainers therefore strongly discourage the use of
this dataset unless the purpose of the code is to study and educate
about ethical issues in data science and machine learning.

In this special case, you can fetch the dataset from the original
source::

    import pandas as pd
    import numpy as np

    data_url = "http://lib.stat.cmu.edu/datasets/boston"
    raw_df = pd.read_csv(data_url, sep="\s+", skiprows=22, header=None)
    data = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])
    target = raw_df.values[1::2, 2]

Alternative datasets include the California housing dataset and the
Ames housing dataset. You can load the datasets as follows::

    from sklearn.datasets import fetch_california_housing
    housing = fetch_california_housing()

for the California housing dataset and::

    from sklearn.datasets import fetch_openml
    housing = fetch_openml(name="house_prices", as_frame=True)

for the Ames housing dataset.

[1] M Carlisle.
"Racist data destruction?"
<https://medium.com/@docintangible/racist-data-destruction-113e3eff54a8>

[2] Harrison Jr, David, and Daniel L. Rubinfeld.
"Hedonic housing prices and the demand for clean air."
Journal of environmental economics and management 5.1 (1978): 81-102.
<https://www.researchgate.net/publication/4974606_Hedonic_housing_prices_and_the_demand_for_clean_air>


In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.utils import all_estimators
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.datasets import load_digits  # Example dataset, replace with user input

class EstimatorManager:
    def __init__(self):
        self.estimators = self._get_estimators()
        self.model = None
        self.X = None
        self.y = None

    def _get_estimators(self):
        """
        Fetches all scikit-learn estimators.

        Returns:
        - list: List of (name, class) tuples of all estimators.
        """
        all_estimators_dict = all_estimators()
        estimators = [(name, EstimatorClass) for name, EstimatorClass in all_estimators_dict]
        return estimators

    def select_model(self, model_name):
        """
        Selects a scikit-learn model by name.

        Parameters:
        - model_name (str): Name of the scikit-learn estimator.

        Raises:
        - ValueError: If the specified model name is not found.

        Returns:
        - bool: True if model selection is successful.
        """
        for name, estimator_class in self.estimators:
            if model_name.lower() == name.lower():
                self.model = estimator_class()
                return True
        raise ValueError(f"Model '{model_name}' not found in scikit-learn.")

    def provide_dataset(self, X, y):
        """
        Accepts a dataset for model training.

        Parameters:
        - X (array-like): Feature dataset.
        - y (array-like): Target values.

        Returns:
        - bool: True if dataset is successfully provided.
        """
        self.X = X
        self.y = y
        return True

    def make_graphs(self, num_graphs):
        """
        Generates example graphs using the dataset.

        Parameters:
        - num_graphs (int): Number of graphs to generate.

        Returns:
        - list: List of generated graphs (matplotlib figures).
        """
        if self.X is None or self.y is None:
            raise ValueError("Dataset not provided.")
        
        graphs = []
        for _ in range(num_graphs):
            # Example graph generation (replace with actual graphing logic)
            fig, ax = plt.subplots()
            ax.scatter(self.X[:, 0], self.y)
            ax.set_title("Example Scatter Plot")
            graphs.append(fig)
        
        return graphs

    def train_and_predict(self):
        """
        Trains the selected model on the provided dataset and makes predictions.

        Returns:
        - array-like: Predicted values.
        """
        if self.model is None or self.X is None or self.y is None:
            raise ValueError("Model or dataset not provided.")

        # Example: Train-test split for demonstration
        X_train, X_test, y_train, y_test = train_test_split(self.X, self.y, test_size=0.2, random_state=42)

        # Train the model
        self.model.fit(X_train, y_train)

        # Make predictions
        y_pred = self.model.predict(X_test)

        # Example evaluation (replace with user's specific needs)
        mse = mean_squared_error(y_test, y_pred)
        print(f"Mean Squared Error: {mse}")

        return y_pred

if __name__ == "__main__":
    # Example usage script

    # Initialize EstimatorManager
    estimator_manager = EstimatorManager()

    # Example: Select model
    model_name = input("Enter model name (e.g., 'LinearRegression', 'RandomForestRegressor'): ")
    estimator_manager.select_model(model_name)

    # Example: Provide dataset (replace with user input logic)
    digits = load_digits()
    X, y = digits.data, digits.target
    estimator_manager.provide_dataset(X, y)

    # Example: Generate graphs (replace with user input logic)
    num_graphs = int(input("Enter number of graphs to generate: "))
    graphs = estimator_manager.make_graphs(num_graphs)
    for i, graph in enumerate(graphs):
        graph.savefig(f'graph_{i}.png')  # Save or display the graphs

    # Example: Train and predict
    try:
        predictions = estimator_manager.train_and_predict()
        print("Predictions:", predictions)
    except ValueError as e:
        print(e)
