# SLU11 - Tree-based models: Examples

In [1]:
import pandas as pd

from sklearn.datasets import fetch_california_housing
from sklearn.ensemble import (
    RandomForestClassifier,
    RandomForestRegressor,
    GradientBoostingClassifier,
    GradientBoostingRegressor,
)
from sklearn.metrics import mean_squared_error
from sklearn.tree import (
    DecisionTreeClassifier,
    DecisionTreeRegressor,
)

from utils.utils import *

## 1. Decision trees

### 1.1 Classification

In [2]:
data = make_data()
X, y = separate_target_variable(data)

X = process_categorical_features(X) 

dtc = DecisionTreeClassifier()
dtc.fit(X, y)
dtc.predict(X)

array([0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0])

### 1.2 Regression

In [3]:
def prepare_california():
    california = fetch_california_housing()
    X = pd.DataFrame(data=california.data, columns=california.feature_names)
    y = pd.Series(data=california.target, name='price')
    
    return X, y

X_, y_ = prepare_california()

In [4]:
dtc = DecisionTreeRegressor()
dtc.fit(X_, y_)
dtc.predict(X_)

array([4.526, 3.585, 3.521, ..., 0.923, 0.847, 0.894])

## 2. Random Forests

### 2.1 Classification

In [5]:
dtc = RandomForestClassifier()
dtc.fit(X, y)
dtc.predict(X)

array([0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0])

### 2.2 Regression

In [6]:
dtc = RandomForestRegressor()
dtc.fit(X_, y_)
dtc.predict(X_)

array([4.3145802, 3.8640108, 3.8300011, ..., 0.87471  , 0.85885  ,
       0.92495  ])

## 3. Gradient Boosting

### 3.1 Classification

In [7]:
dtc = GradientBoostingClassifier()
dtc.fit(X, y)
dtc.predict(X)

array([0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0])

### 3.2 Regression

In [8]:
dtc = GradientBoostingRegressor()
dtc.fit(X_, y_)
dtc.predict(X_)

array([4.26432728, 3.87864519, 3.92074556, ..., 0.63664692, 0.74759279,
       0.7994969 ])