In [50]:
from sklearn.linear_model import LogisticRegression

# Defines the model and its hyper-parameters
clf = LogisticRegression(random_state=0)

X = [[ 1,  2,  3],  # 2 samples, 3 features
     [11, 12, 13]]

y = [0,
     1]  # classes of each sample

clf.fit(X, y)

In [5]:
clf.predict(X)

array([0, 1])

In [3]:
clf.predict([
    [4, 5, 6],
    [14, 15, 16]
])

array([0, 1])

In [6]:
from sklearn.preprocessing import StandardScaler, MinMaxScaler, PowerTransformer

from sklearn.preprocessing import OneHotEncoder, OrdinalEncoder

X = [[0, 15],
     [1, -10]]

scaler = StandardScaler()

scaler.fit(X)

In [8]:
scaler.mean_, scaler.scale_

(array([0.5, 2.5]), array([ 0.5, 12.5]))

In [9]:
scaler.transform(X)

array([[-1.,  1.],
       [ 1., -1.]])

In [11]:
scaler.fit_transform(X)

array([[-1.,  1.],
       [ 1., -1.]])

In [21]:
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import make_pipeline
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score


# create a pipeline object
pipe = make_pipeline(
    StandardScaler(),
    LogisticRegression()
)


# load the iris dataset and split it into train and test sets
X, y = load_iris(return_X_y=True)

X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    random_state=0,
    test_size=0.25,
)

# fit the whole pipeline
pipe.fit(X_train, y_train)

In [22]:
X_test[:5]

array([[5.8, 2.8, 5.1, 2.4],
       [6. , 2.2, 4. , 1. ],
       [5.5, 4.2, 1.4, 0.2],
       [7.3, 2.9, 6.3, 1.8],
       [5. , 3.4, 1.5, 0.2]])

In [23]:
pipe.predict(X_test[:5])

array([2, 1, 0, 2, 0])

In [24]:
y_test[:5]

array([2, 1, 0, 2, 0])

In [25]:
pipe.predict(X_test)

array([2, 1, 0, 2, 0, 2, 0, 1, 1, 1, 2, 1, 1, 1, 1, 0, 1, 1, 0, 0, 2, 1,
       0, 0, 2, 0, 0, 1, 1, 0, 2, 1, 0, 2, 2, 1, 0, 2])

In [26]:
y_pred = pipe.predict(X_test)

accuracy_score(y_pred, y_test)

0.9736842105263158

In [29]:
len(X_train)

112

In [27]:
len(y_test)

38

In [30]:
from sklearn.datasets import make_regression
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import cross_validate

X, y = make_regression(n_samples=1000, random_state=0)


In [32]:
lr = LinearRegression()

result = cross_validate(lr, X, y)  # defaults to 5-fold CV
result['test_score'].mean()  # r_squared score is high because dataset is easy

1.0

In [45]:
from sklearn.datasets import fetch_california_housing
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import train_test_split
from scipy.stats import randint

X, y = fetch_california_housing(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)



# define the parameter space that will be searched over
param_distributions = {
    'n_estimators': randint(1, 5),
    'max_depth': randint(5, 10)
}




# now create a searchCV object and fit it to the data
search = RandomizedSearchCV(estimator=RandomForestRegressor(random_state=0),
                            n_iter=5,
                            param_distributions=param_distributions,
                            random_state=0)

search.fit(X_train, y_train)

search.best_params_

{'max_depth': 9, 'n_estimators': 4}

In [46]:
# the search object now acts like a normal random forest estimator
# with max_depth=9 and n_estimators=4
search.score(X_test, y_test)

0.735363411343253

In [None]:
import random

random.seed(10)

In [None]:
random.random()

0.5714025946899135

In [None]:
random.random()

0.4288890546751146