# BASIC RANDOM FOREST

In [4]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

In [6]:
iris = datasets.load_iris()
x = iris.data
y = iris.target

In [7]:
x_train, x_test, y_train, y_test = train_test_split(x,y, test_size=0.3, random_state=42)

In [9]:
model = RandomForestClassifier(n_estimators=100,random_state=42)
model.fit(x_train,y_train)

In [10]:
y_pred = model.predict(x_test)

In [11]:
accuracy = accuracy_score(y_test, y_pred)
accuracy

1.0

-------------

In [12]:
# RF W/ Hyperparameter Tuning

In [34]:
from sklearn import datasets
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

In [35]:
data = datasets.load_iris()
x = data.data
y = data.target

In [36]:
x_train, x_test, y_train, y_test = train_test_split(x,y, random_state=42, test_size=0.3)

In [37]:
model = RandomForestClassifier(random_state=42)
param_grid =  {
    'n_estimators': [10, 50, 100],
    'max_depth': [None, 10, 20, 30]
}

In [30]:
grid_search = GridSearchCV(estimator=model,param_grid=param_grid, cv=5)
grid_search.fit(x_train,y_train)

In [31]:
grid_search.best_params_

{'max_depth': None, 'n_estimators': 100}

In [33]:
grid_search.best_score_

np.float64(0.9428571428571428)

In [38]:
y_hat = grid_search.predict(x_test)
accuracy = accuracy_score(y_test, y_pred)

In [39]:
accuracy

1.0

____________

# Random Forest For Regression

In [43]:
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

In [47]:
data = fetch_california_housing()
x = data.data
y = data.target

In [48]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=42)

In [49]:
model = RandomForestRegressor(n_estimators=100,random_state=42)
model.fit(x_train,y_train)

In [50]:
y_pred = model.predict(x_test)

In [51]:
mse = mean_squared_error(y_test,y_pred)
mse

np.float64(0.25650512920799395)

-------------------

In [None]:
# GRID SEARCH FOR REGRESSION

In [55]:
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

In [56]:
data = fetch_california_housing()
x = data.data
y = data.target

In [57]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=42)

In [60]:
model = RandomForestRegressor(random_state=42)

In [58]:
param = {
    'n_estimators':[10,50,100],
    'max_depth':[None,10,20,30]
}

In [61]:
grid_search = GridSearchCV(estimator=model,param_grid=param,cv=5)
grid_search.fit(x_train,y_train)

In [62]:
y_pred = grid_search.predict(x_test)
mse = mean_squared_error(y_test,y_pred)
mse

np.float64(0.25691566592894494)

In [63]:
print(f'Best Parameters: {grid_search.best_params_}')
print(f'Best Cross-Validation Score: {grid_search.best_score_:.2f}')

Best Parameters: {'max_depth': 20, 'n_estimators': 100}
Best Cross-Validation Score: 0.80
