[Reference](https://medium.com/better-programming/streamlining-model-selection-de50c421d129)

# Normal code

```python
#Baseline logistic model
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
#splitting the data into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(features,labels,test_size=0.2)
clf = LogisticRegression()
model = clf.fit(x_train,y_train)
predictions = model.predict(x_test)
model.score(x_test,y_test)
```

# GridSearchCV

```python
#GridSearchCV inserted into a function that streamlines the process
def perform_gridsearch(features,labels):
    penalty = ['l1', 'l2']
    C = np.logspace(0, 4, 10)
    logistic = linear_model.LogisticRegression()
    # Create range of candidate penalty hyperparameter values
    penalty = ['l1', 'l2']
    # Create range of candidate regularization hyperparameter values C
    C = np.logspace(0, 4, 10)
    # Create dictionary hyperparameter candidates
    hyperparameters = dict(C=C, penalty=penalty)
    gridsearch = GridSearchCV(logistic, hyperparameters, cv=5, verbose=1) # Fit grid search
    best_model = gridsearch.fit(features,labels)
    predictions = best_model.predict(features)
    fpr, tpr, thresholds = metrics.roc_curve(labels,predictions)
    print(predictions)
    print(fpr,tpr,thresholds)
    print('Best Model Parameters:', best_model.best_estimator_) 
    print('Best C:', best_model.best_estimator_.get_params()['C'])
    print("The mean accuracy of the model is:",best_model.score(features,labels))
```

# RandomizedSearchCV
It don’t have any specific hyperparameter values

```python
def perform_randomized_search(features,labels):
    # Create logistic regression
    logistic = linear_model.LogisticRegression()
    # Create range of candidate regularization penalty hyperparameter values
    penalty = ['l1', 'l2']
    # Create distribution of candidate regularization hyperparameter values
    C = uniform(loc=0, scale=4)
    # Create hyperparameter options
    hyperparameters = dict(C=C, penalty=penalty)
    # Create randomized search
    randomizedsearch = RandomizedSearchCV(
    logistic, hyperparameters, random_state=1, n_iter=100, cv=5, verbose=1)
    # Fit randomized search
    best_model = randomizedsearch.fit(features,labels)
    predictions=best_model.predict(features)
    fpr, tpr, thresholds = metrics.roc_curve(labels,predictions)
    print(fpr, tpr, thresholds)
    print('Best Penalty:', best_model.best_estimator_) 
    print('Best C:', best_model.best_estimator_.get_params()['C'])
    print("The mean accuracy of the model is:",best_model.score(features,labels))
```

# Pipelining and Preprocessing

```python
def execute_pipeline(features,labels):
    #Preprocessing
    pca_components = PCA() ## if n_components not specified, keeps all components
    std_scaler = StandardScaler()
    preprocess = FeatureUnion([("std",std_scaler), ("pca", pca_components)])
    # Create a pipeline
    pipe = Pipeline([("classifier", LogisticRegression())])
    # Create dictionary with candidate learning algorithms and their hyperparameters
    search_space = [
                {"classifier": [LogisticRegression()],
                 "classifier__penalty": ['l2','l1'],
                 "classifier__C": np.logspace(0, 4, 10)
                 },
                {"classifier": [LogisticRegression()],
                 "classifier__penalty": ['l2'],
                 "classifier__C": np.logspace(0, 4, 10),
                 "classifier__solver":['newton-cg','saga','sag','liblinear'] ##This solvers don't allow L1 penalty
                 },
                {"classifier": [RandomForestClassifier()],
                 "classifier__n_estimators": [10, 100,200],
                 "classifier__max_depth":[5,8,15,None],
                 "classifier__min_samples_leaf":[1,2,5,10,15],
                 "classifier__max_leaf_nodes": [2, 5,10]
                 },
                 {"classifier": [SVC()],
                 "classifier__C": [0.01,0.1,1,10,100],
                 "classifier__kernel":['linear','rbf','sigmoid']
                }]
                 
    gridsearch = GridSearchCV(pipe, search_space, cv=5, verbose=1,n_jobs=-1) # Fit grid search
    best_model = gridsearch.fit(features,labels)
    predictions = best_model.predict(features)
    fpr_pipe, tpr_pipe, thresholds_pipe = metrics.roc_curve(labels,predictions)
    print(fpr_pipe, tpr_pipe, thresholds_pipe)
    print(best_model.best_estimator_)
    print("The mean accuracy of the model is:",best_model.score(features,labels))
'''


![image](https://miro.medium.com/max/1360/1*_PwxmaOj_dyBI8pRNYQdVA.jpeg)