# Random Forest Classifier - Best Model

### Imports

In [2]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor

### Dataset and Preparation

In [3]:
data_all = pd.read_pickle('../training_dataset_task3/task_3_training_e8da4715deef7d56_f8b7378_pandas.pkl')
data = data_all.loc[:,'pianist_id':'arousal']

X = data.drop(['quadrant', 'valence', 'arousal'], axis=1)
y = data['quadrant']

# preprocess dataset
X = StandardScaler().fit_transform(X)

# Split in Training and Test Set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [4]:
# Used to evaluate Model

def evaluate(model, X_test, y_test):
    predictions = model.predict(X_test)
    errors = abs(predictions - y_test)
    mape = 100 * np.mean(errors / y_test)
    accuracy = 100 - mape
    print('Model Performance')
    print('Average Error: {:0.4f} degrees.'.format(np.mean(errors)))
    print('Accuracy = {:0.2f}%.'.format(accuracy))
    
    return accuracy

In [4]:
rf = RandomForestRegressor(bootstrap=False, max_depth=50, max_features='sqrt', min_samples_leaf=2, n_estimators=800)
rf.fit(X_train,y_train)
evaluate(rf, X_test, y_test)

Model Performance
Average Error: 0.7857 degrees.
Accuracy = 57.19%.
Model Performance
Average Error: 0.7857 degrees.
Accuracy = 57.19%.


57.19452273062187

57.19452273062187

## Model Performance
## Average Error: 0.7866 degrees.
## Accuracy = 57.28%.

In [5]:
## After running the RandomParameter_tuningRF.ipynb a second time, the best random random forest was:
best2 = RandomForestRegressor(bootstrap=False, max_depth=110, max_features='sqrt', min_samples_leaf=2, n_estimators=600)

In [6]:
best2.fit(X_train,y_train)
evaluate(best2, X_test, y_test)

Model Performance
Average Error: 0.7872 degrees.
Accuracy = 57.15%.


57.14710740249534

## Model Performance
## Average Error: 0.7872 degrees.
## Accuracy = 57.15%.

In [None]:
# Therefore I strongly believe that Random Forests do not get any better than this.