In [2]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.model_selection import GridSearchCV

So now we can actually begin modelling, for this model we are going to use a random forest 

In [6]:
wine_data = pd.read_csv('winequality-red-white.csv')
wine_data.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality,wine_type
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5,1
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5,1
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5,1
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6,1
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5,1


In [11]:
wine_features = wine_data.drop('quality', axis=1)
wine_labels = wine_data['quality']

In [12]:
def print_results(results):
    print('BEST PARAMS: {}\n'.format(results.best_params_))

    means = results.cv_results_['mean_test_score']
    stds = results.cv_results_['std_test_score']
    for mean, std, params in zip(means, stds, results.cv_results_['params']):
        print('{} (+/-{}) for {}'.format(round(mean, 3), round(std * 2, 3), params))

In [13]:
rf = RandomForestClassifier()
parameters = {
    'n_estimators': [5, 50, 250],
    'max_depth': [2, 4, 8, 16, 32, None]
}

cv = GridSearchCV(rf, parameters, cv=5)
cv.fit(wine_features, wine_labels.values.ravel())

print_results(cv)

BEST PARAMS: {'max_depth': 16, 'n_estimators': 250}

0.586 (+/-0.039) for {'max_depth': 2, 'n_estimators': 5}
0.582 (+/-0.058) for {'max_depth': 2, 'n_estimators': 50}
0.582 (+/-0.057) for {'max_depth': 2, 'n_estimators': 250}
0.608 (+/-0.044) for {'max_depth': 4, 'n_estimators': 5}
0.622 (+/-0.035) for {'max_depth': 4, 'n_estimators': 50}
0.621 (+/-0.04) for {'max_depth': 4, 'n_estimators': 250}
0.762 (+/-0.036) for {'max_depth': 8, 'n_estimators': 5}
0.789 (+/-0.021) for {'max_depth': 8, 'n_estimators': 50}
0.799 (+/-0.025) for {'max_depth': 8, 'n_estimators': 250}
0.957 (+/-0.016) for {'max_depth': 16, 'n_estimators': 5}
1.0 (+/-0.001) for {'max_depth': 16, 'n_estimators': 50}
1.0 (+/-0.0) for {'max_depth': 16, 'n_estimators': 250}
0.957 (+/-0.017) for {'max_depth': 32, 'n_estimators': 5}
1.0 (+/-0.0) for {'max_depth': 32, 'n_estimators': 50}
1.0 (+/-0.0) for {'max_depth': 32, 'n_estimators': 250}
0.962 (+/-0.012) for {'max_depth': None, 'n_estimators': 5}
1.0 (+/-0.0) for {'max_dep