In [1]:
import pandas as pd
import numpy as np
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split

In [None]:
rf = RandomForestRegressor()

In [25]:
def calc_bias_variance(depth, train, test):
    k = 10
    X_test = test.drop(['F9'], axis=1) 
    y_test = test['F9']

    results = {}
    for model in [DecisionTreeRegressor, RandomForestRegressor]:
        preds = []
        pred_df = pd.DataFrame()
        for i in np.arange(k):
            _, training = train_test_split(train, test_size=0.3)
            X_training = training.drop(['F9'], axis=1) 
            y_training = training['F9']
            
            # fit model
            if model == RandomForestRegressor:
                model_instance = model(n_estimators=5, max_depth=depth).fit(X_training, y_training)
            else:
                model_instance = model(max_depth=depth).fit(X_training, y_training)
            
            y_hat = model_instance.predict(X_test)
            preds.append(y_hat)
            pred_df = pd.concat([pred_df, pd.DataFrame(preds[i])], axis=1)

        var = np.array([])
        for i in np.arange(pred_df.shape[0]):
            var = np.append(var, np.var(pred_df.iloc[i][:]))

        pred_df['rowmean'] = pred_df.mean(axis=1) 
        gap = np.array(y_test) - np.array(pred_df.rowmean)
        
        results[model.__name__] = (np.mean(var),np.mean(gap**2))

    return results

df = pd.read_csv("CASP.csv")
train, test = train_test_split(df, test_size=0.1)

results_dt = []
results_rf = []
depths = np.arange(start=2,stop=10,step=1)

for depth in depths:
    results = calc_bias_variance(depth, train, test)
    print("depth:",depth)
    print("DT:",results['DecisionTreeRegressor'],"RF:",results['RandomForestRegressor'])
    results_dt.append(results['DecisionTreeRegressor'])
    results_rf.append(results['RandomForestRegressor'])

print("Decision Tree Results: ", results_dt)
print("Random Forest Results: ", results_rf)

depth: 2
DT: (0.819493146268072, 7.080837100319158) RF: (0.596810668260367, 6.559724018957821)
depth: 3
DT: (0.6851291809311022, 4.9520347226707875) RF: (0.2938374468376863, 4.6460435551946215)
depth: 4
DT: (0.5612306814127042, 4.151347889490404) RF: (0.2506001117888518, 4.002508681312595)
depth: 5
DT: (0.4964757226056592, 3.8100153767699765) RF: (0.24865259823246966, 3.7629399518579647)
depth: 6
DT: (0.6193149120555063, 3.7649403748884867) RF: (0.25279023556045904, 3.633928418470776)
depth: 7
DT: (0.7460879845873379, 3.602822231392732) RF: (0.333621317806708, 3.5506998053802854)
depth: 8
DT: (1.0647704257103174, 3.5713426746130557) RF: (0.39376058867497776, 3.5223149149332884)
depth: 9
DT: (1.3425664759368114, 3.5156451799012456) RF: (0.4841379142594824, 3.4548672749472398)
Decision Tree Results:  [(0.819493146268072, 7.080837100319158), (0.6851291809311022, 4.9520347226707875), (0.5612306814127042, 4.151347889490404), (0.4964757226056592, 3.8100153767699765), (0.6193149120555063, 3.7

In [26]:
results_dt

[(0.819493146268072, 7.080837100319158),
 (0.6851291809311022, 4.9520347226707875),
 (0.5612306814127042, 4.151347889490404),
 (0.4964757226056592, 3.8100153767699765),
 (0.6193149120555063, 3.7649403748884867),
 (0.7460879845873379, 3.602822231392732),
 (1.0647704257103174, 3.5713426746130557),
 (1.3425664759368114, 3.5156451799012456)]

In [27]:
results_rf

[(0.596810668260367, 6.559724018957821),
 (0.2938374468376863, 4.6460435551946215),
 (0.2506001117888518, 4.002508681312595),
 (0.24865259823246966, 3.7629399518579647),
 (0.25279023556045904, 3.633928418470776),
 (0.333621317806708, 3.5506998053802854),
 (0.39376058867497776, 3.5223149149332884),
 (0.4841379142594824, 3.4548672749472398)]