In [1]:
from sklearn.ensemble import RandomForestClassifier

In [2]:
clf = RandomForestClassifier(random_state = 0)

In [3]:
X = [[1,2,3],
     [11,12,13]]
y = [0,1]

In [4]:
clf.fit(X,y) # We created our model.

In [7]:
clf.predict(X)

array([0, 1])

In [9]:
clf.predict([[4,5,6],[14,15,16]])

array([0, 1])

In [10]:
clf.predict([[1,1,1],[2,3,4]])

array([0, 0])

In [11]:
## Data Scaling (Data Processing)

In [12]:
from sklearn.preprocessing import StandardScaler

In [13]:
X = [[0,15],
     [1,-10]]

In [14]:
StandardScaler().fit(X).transform(X) # We scaled our data.

array([[-1.,  1.],
       [ 1., -1.]])

In [15]:
## Pipelines 

In [16]:
# We will create LogisticRegression

In [17]:
from sklearn.pipeline import make_pipeline

In [18]:
from sklearn.linear_model import LogisticRegression

In [19]:
pipe = make_pipeline(
    StandardScaler(),
    LogisticRegression()
)

In [20]:
from sklearn.datasets import load_iris 

In [21]:
X, y = load_iris(return_X_y = True)

In [22]:
from sklearn.model_selection import train_test_split

In [23]:
X_train, X_test, y_train , y_test = train_test_split(X, y , random_state=0)

In [24]:
pipe.fit(X_train, y_train)

In [25]:
from sklearn.metrics import accuracy_score

In [26]:
accuracy_score(y_test,pipe.predict(X_test))

0.9736842105263158

In [27]:
## Model Evaluation

In [28]:
# Cross Validation.

In [29]:
from sklearn.datasets import make_regression

In [30]:
X,y = make_regression(n_samples=1000 , random_state=0)

In [34]:
from sklearn.linear_model import LinearRegression

In [35]:
lr = LinearRegression()

In [36]:
from sklearn.model_selection import cross_validate

In [37]:
result = cross_validate(lr,X, y)

In [39]:
result

{'fit_time': array([0.0279274 , 0.02491951, 0.02305698, 0.01795197, 0.02194142]),
 'score_time': array([0., 0., 0., 0., 0.]),
 'test_score': array([1., 1., 1., 1., 1.])}

In [38]:
result["test_score"]

array([1., 1., 1., 1., 1.])

In [40]:
## Automatic Parameter Searches

In [41]:
from sklearn.datasets import fetch_california_housing

In [42]:
X,y = fetch_california_housing(return_X_y = True)

In [43]:
X_train,X_test,y_train,y_test = train_test_split(X,y, random_state = 0)

In [44]:
from sklearn.model_selection import RandomizedSearchCV

In [45]:
from scipy.stats import randint

In [47]:
param_distributions = {"n_estimators" : randint(1,5) , 
                       "max_depth" : randint(5,10)}

In [51]:
from sklearn.ensemble import RandomForestRegressor
search = RandomizedSearchCV(estimator=RandomForestRegressor(random_state=0), n_iter = 5 , param_distributions = param_distributions , random_state = 0)

In [52]:
search.fit(X_train, y_train)

In [53]:
search.best_params_ # We see best params via this.

{'max_depth': 9, 'n_estimators': 4}

In [54]:
search.score(X_test,y_test)

0.735363411343253