### Example of Pipeline Creation and Usage

In [9]:
from sklearn.datasets import load_iris
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [None]:
pipe = make_pipeline(StandardScaler(), LogisticRegression());   #* Pipeline creation

X, y = load_iris(return_X_y=True);  #* Load dataset

#* Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0);

In [None]:
pipe.fit(X_train, y_train); #* Train the model
score = accuracy_score(y_test, pipe.predict(X_test)); #* Evaluate the model
print(f"Accuracy: {score*100:.2f}%");

Accuracy: 97.37%


### Hyper-parameter Tuning
- Hyper-parameters are configuration variables set before the training process begins.
- They are used to fine tune the learning process of the model.
- They are distinct from model parameters that are learned during the training.

In [17]:
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import RandomizedSearchCV, train_test_split
from sklearn.ensemble import RandomForestRegressor
from scipy.stats import randint

In [18]:
X, y = fetch_california_housing(return_X_y=True);
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0);

param_dist = {
    'n_estimators': randint(1, 5),
    'max_depth': randint(5, 10),
};

In [None]:
search = RandomizedSearchCV(
    estimator=RandomForestRegressor(random_state=0),
    param_distributions=param_dist,
    n_iter=5,
    random_state=0
);
search.fit(X_train, y_train);
print(f"Best Parameters: {search.best_params_}");

Best Parameters: {'max_depth': 9, 'n_estimators': 4}


In [22]:
score = search.score(X_test, y_test);
print(f"Test Score: {score}");

Test Score: 0.735363411343253
