In [1]:
# Import Dependencies
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from sklearn.ensemble import RandomForestClassifier

In [2]:
# Load Dataset
dataset = pd.read_csv("winequality-red.csv", sep=";")
dataset['target'] = np.where(dataset['quality']<=5,0,1)

# Assign X and y
X = dataset.drop(["quality", "target"], axis=1)
y= dataset.target
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [3]:
# Standard Scaler

In [4]:
# Pre-processing Standard Scaler
X_scaler= StandardScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

# Create a random forest classifier
rf = RandomForestClassifier()
rf = rf.fit(X_train_scaled, y_train)
rf.score(X_train_scaled, y_train)

# Random Forests in sklearn will automatically calculate feature importance
importances = rf.feature_importances_

# List Feature Importances, sorted
feature_importances = pd.DataFrame(rf.feature_importances_,  index = X_train.columns, columns = ['importance']).sort_values('importance', ascending=False)
feature_importances

Unnamed: 0,importance
alcohol,0.19737
sulphates,0.131547
total sulfur dioxide,0.105472
volatile acidity,0.104327
density,0.085384
chlorides,0.073398
citric acid,0.064561
pH,0.062886
fixed acidity,0.061874
free sulfur dioxide,0.058055


In [5]:
predictions = rf.predict(X_test_scaled)
print(predictions)
print(rf.classes_)

[0 0 0 0 1 0 0 0 1 1 1 0 1 0 0 1 0 1 1 0 0 1 0 1 0 1 1 0 0 1 0 0 1 1 0 0 1
 1 0 1 0 0 1 0 1 1 1 1 0 1 0 0 1 1 0 0 1 1 1 0 1 1 1 0 1 0 1 1 1 0 1 0 1 1
 1 0 1 0 1 1 0 1 0 1 1 1 0 1 1 0 1 1 0 0 0 1 0 1 0 0 0 0 1 1 1 1 1 0 1 0 1
 0 1 0 0 1 0 0 0 1 1 1 1 0 1 0 1 0 0 1 1 0 0 1 1 0 0 0 0 1 1 1 1 0 1 0 1 1
 1 0 1 1 0 0 1 0 1 1 0 1 1 1 1 0 0 1 1 0 0 0 0 0 1 0 1 1 1 0 0 0 0 1 0 1 0
 1 1 1 1 0 1 1 0 1 1 0 0 1 0 1 0 0 0 0 1 0 0 1 1 1 1 0 0 1 1 1 1 0 1 1 1 1
 1 1 1 0 0 1 0 0 0 0 1 1 0 1 0 1 1 0 0 0 1 1 0 1 1 1 1 0 1 0 0 1 1 0 1 1 1
 1 1 0 0 1 1 1 0 0 1 1 0 1 0 1 1 0 1 1 1 1 1 0 0 1 0 1 0 1 0 1 0 1 1 0 0 1
 0 1 1 1 0 0 1 0 1 0 1 1 1 1 0 0 1 1 1 1 0 0 1 0 1 1 1 0 0 0 1 1 0 1 1 1 0
 1 1 0 1 1 0 0 1 1 0 0 1 0 1 0 1 1 0 1 0 1 1 1 0 0 0 0 1 0 0 1 0 1 1 0 0 0
 0 0 1 1 0 1 1 1 0 1 0 1 1 1 1 1 1 0 0 1 0 0 0 1 1 1 1 0 0 1]
[0 1]


In [6]:
print(f"Training Data Score: {rf.score(X_train_scaled, y_train)}")
print(f"Testing Data Score: {rf.score(X_test_scaled, y_test)}")

Training Data Score: 1.0
Testing Data Score: 0.8025


In [7]:
from sklearn.metrics import classification_report

In [8]:
print(classification_report(y_test, predictions, zero_division=0))

              precision    recall  f1-score   support

           0       0.78      0.78      0.78       178
           1       0.82      0.82      0.82       222

    accuracy                           0.80       400
   macro avg       0.80      0.80      0.80       400
weighted avg       0.80      0.80      0.80       400



In [9]:
# picked top 5 based on feature importance

In [10]:
X2 = dataset.drop(["quality", "target", 'chlorides', 'residual sugar', 'citric acid', 'pH', 'free sulfur dioxide', 'fixed acidity'], axis=1)
y2= dataset.target
X_train2, X_test2, y_train2, y_test2 = train_test_split(X2, y2, random_state=42)

In [11]:
from sklearn.model_selection import GridSearchCV

In [12]:
# Pre-processing PowerTransformer Scaler
X_scaler2= StandardScaler().fit(X_train2)
X_train_scaled2 = X_scaler2.transform(X_train2)
X_test_scaled2 = X_scaler2.transform(X_test2)

# Create a random forest classifier
rf2 = RandomForestClassifier()

param_grid = {'n_estimators':[50, 100, 250, 500],
              'criterion': ['entropy', 'gini'],
              'max_depth': [125, 150, 175]}

grid = GridSearchCV(rf2, param_grid, verbose =3)

In [13]:
grid.fit(X_train_scaled2, y_train2)

Fitting 5 folds for each of 24 candidates, totalling 120 fits
[CV] criterion=entropy, max_depth=125, n_estimators=50 ...............
[CV]  criterion=entropy, max_depth=125, n_estimators=50, score=0.779, total=   0.1s
[CV] criterion=entropy, max_depth=125, n_estimators=50 ...............


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s


[CV]  criterion=entropy, max_depth=125, n_estimators=50, score=0.746, total=   0.1s
[CV] criterion=entropy, max_depth=125, n_estimators=50 ...............
[CV]  criterion=entropy, max_depth=125, n_estimators=50, score=0.817, total=   0.2s
[CV] criterion=entropy, max_depth=125, n_estimators=50 ...............


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.2s remaining:    0.0s


[CV]  criterion=entropy, max_depth=125, n_estimators=50, score=0.787, total=   0.1s
[CV] criterion=entropy, max_depth=125, n_estimators=50 ...............
[CV]  criterion=entropy, max_depth=125, n_estimators=50, score=0.854, total=   0.1s
[CV] criterion=entropy, max_depth=125, n_estimators=100 ..............
[CV]  criterion=entropy, max_depth=125, n_estimators=100, score=0.792, total=   0.3s
[CV] criterion=entropy, max_depth=125, n_estimators=100 ..............
[CV]  criterion=entropy, max_depth=125, n_estimators=100, score=0.742, total=   0.3s
[CV] criterion=entropy, max_depth=125, n_estimators=100 ..............
[CV]  criterion=entropy, max_depth=125, n_estimators=100, score=0.825, total=   0.2s
[CV] criterion=entropy, max_depth=125, n_estimators=100 ..............
[CV]  criterion=entropy, max_depth=125, n_estimators=100, score=0.808, total=   0.3s
[CV] criterion=entropy, max_depth=125, n_estimators=100 ..............
[CV]  criterion=entropy, max_depth=125, n_estimators=100, score=0.

[CV]  criterion=entropy, max_depth=175, n_estimators=500, score=0.746, total=   1.2s
[CV] criterion=entropy, max_depth=175, n_estimators=500 ..............
[CV]  criterion=entropy, max_depth=175, n_estimators=500, score=0.838, total=   1.2s
[CV] criterion=entropy, max_depth=175, n_estimators=500 ..............
[CV]  criterion=entropy, max_depth=175, n_estimators=500, score=0.817, total=   1.2s
[CV] criterion=entropy, max_depth=175, n_estimators=500 ..............
[CV]  criterion=entropy, max_depth=175, n_estimators=500, score=0.858, total=   1.4s
[CV] criterion=gini, max_depth=125, n_estimators=50 ..................
[CV]  criterion=gini, max_depth=125, n_estimators=50, score=0.787, total=   0.1s
[CV] criterion=gini, max_depth=125, n_estimators=50 ..................
[CV]  criterion=gini, max_depth=125, n_estimators=50, score=0.742, total=   0.1s
[CV] criterion=gini, max_depth=125, n_estimators=50 ..................
[CV]  criterion=gini, max_depth=125, n_estimators=50, score=0.842, total

[CV]  criterion=gini, max_depth=175, n_estimators=250, score=0.787, total=   0.5s
[CV] criterion=gini, max_depth=175, n_estimators=250 .................
[CV]  criterion=gini, max_depth=175, n_estimators=250, score=0.746, total=   0.5s
[CV] criterion=gini, max_depth=175, n_estimators=250 .................
[CV]  criterion=gini, max_depth=175, n_estimators=250, score=0.842, total=   0.6s
[CV] criterion=gini, max_depth=175, n_estimators=250 .................
[CV]  criterion=gini, max_depth=175, n_estimators=250, score=0.796, total=   0.5s
[CV] criterion=gini, max_depth=175, n_estimators=250 .................
[CV]  criterion=gini, max_depth=175, n_estimators=250, score=0.862, total=   0.5s
[CV] criterion=gini, max_depth=175, n_estimators=500 .................
[CV]  criterion=gini, max_depth=175, n_estimators=500, score=0.796, total=   1.1s
[CV] criterion=gini, max_depth=175, n_estimators=500 .................
[CV]  criterion=gini, max_depth=175, n_estimators=500, score=0.742, total=   1.1s


[Parallel(n_jobs=1)]: Done 120 out of 120 | elapsed:  1.0min finished


GridSearchCV(estimator=RandomForestClassifier(),
             param_grid={'criterion': ['entropy', 'gini'],
                         'max_depth': [125, 150, 175],
                         'n_estimators': [50, 100, 250, 500]},
             verbose=3)

In [14]:
print(grid.best_params_)
print(grid.best_score_)

{'criterion': 'entropy', 'max_depth': 175, 'n_estimators': 100}
0.8157112970711295


In [15]:
param_grid2 = {'n_estimators':[1,10,25,50, 100, 250, 500],
              'criterion': ['entropy', 'gini'],
              'max_depth': [125, 150, 175,250, 300,500]}

In [40]:
grid2 = GridSearchCV(rf2, param_grid2, verbose =3)
grid2.fit(X_train_scaled2, y_train2)

Fitting 5 folds for each of 84 candidates, totalling 420 fits
[CV] criterion=entropy, max_depth=125, n_estimators=1 ................
[CV]  criterion=entropy, max_depth=125, n_estimators=1, score=0.696, total=   0.0s
[CV] criterion=entropy, max_depth=125, n_estimators=1 ................
[CV]  criterion=entropy, max_depth=125, n_estimators=1, score=0.704, total=   0.0s
[CV] criterion=entropy, max_depth=125, n_estimators=1 ................
[CV]  criterion=entropy, max_depth=125, n_estimators=1, score=0.725, total=   0.0s
[CV] criterion=entropy, max_depth=125, n_estimators=1 ................
[CV]  criterion=entropy, max_depth=125, n_estimators=1, score=0.675, total=   0.0s
[CV] criterion=entropy, max_depth=125, n_estimators=1 ................
[CV]  criterion=entropy, max_depth=125, n_estimators=1, score=0.699, total=   0.0s
[CV] criterion=entropy, max_depth=125, n_estimators=10 ...............
[CV]  criterion=entropy, max_depth=125, n_estimators=10, score=0.783, total=   0.0s
[CV] criterio

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s


[CV]  criterion=entropy, max_depth=125, n_estimators=10, score=0.845, total=   0.0s
[CV] criterion=entropy, max_depth=125, n_estimators=25 ...............
[CV]  criterion=entropy, max_depth=125, n_estimators=25, score=0.792, total=   0.1s
[CV] criterion=entropy, max_depth=125, n_estimators=25 ...............
[CV]  criterion=entropy, max_depth=125, n_estimators=25, score=0.746, total=   0.1s
[CV] criterion=entropy, max_depth=125, n_estimators=25 ...............
[CV]  criterion=entropy, max_depth=125, n_estimators=25, score=0.825, total=   0.1s
[CV] criterion=entropy, max_depth=125, n_estimators=25 ...............
[CV]  criterion=entropy, max_depth=125, n_estimators=25, score=0.808, total=   0.1s
[CV] criterion=entropy, max_depth=125, n_estimators=25 ...............
[CV]  criterion=entropy, max_depth=125, n_estimators=25, score=0.841, total=   0.1s
[CV] criterion=entropy, max_depth=125, n_estimators=50 ...............
[CV]  criterion=entropy, max_depth=125, n_estimators=50, score=0.804, 

[CV]  criterion=entropy, max_depth=150, n_estimators=250, score=0.842, total=   0.8s
[CV] criterion=entropy, max_depth=150, n_estimators=250 ..............
[CV]  criterion=entropy, max_depth=150, n_estimators=250, score=0.821, total=   0.8s
[CV] criterion=entropy, max_depth=150, n_estimators=250 ..............
[CV]  criterion=entropy, max_depth=150, n_estimators=250, score=0.845, total=   0.8s
[CV] criterion=entropy, max_depth=150, n_estimators=500 ..............
[CV]  criterion=entropy, max_depth=150, n_estimators=500, score=0.783, total=   1.6s
[CV] criterion=entropy, max_depth=150, n_estimators=500 ..............
[CV]  criterion=entropy, max_depth=150, n_estimators=500, score=0.733, total=   1.6s
[CV] criterion=entropy, max_depth=150, n_estimators=500 ..............
[CV]  criterion=entropy, max_depth=150, n_estimators=500, score=0.842, total=   1.5s
[CV] criterion=entropy, max_depth=150, n_estimators=500 ..............
[CV]  criterion=entropy, max_depth=150, n_estimators=500, score=

[CV]  criterion=entropy, max_depth=250, n_estimators=25, score=0.783, total=   0.1s
[CV] criterion=entropy, max_depth=250, n_estimators=25 ...............
[CV]  criterion=entropy, max_depth=250, n_estimators=25, score=0.738, total=   0.0s
[CV] criterion=entropy, max_depth=250, n_estimators=25 ...............
[CV]  criterion=entropy, max_depth=250, n_estimators=25, score=0.800, total=   0.1s
[CV] criterion=entropy, max_depth=250, n_estimators=25 ...............
[CV]  criterion=entropy, max_depth=250, n_estimators=25, score=0.796, total=   0.1s
[CV] criterion=entropy, max_depth=250, n_estimators=25 ...............
[CV]  criterion=entropy, max_depth=250, n_estimators=25, score=0.828, total=   0.1s
[CV] criterion=entropy, max_depth=250, n_estimators=50 ...............
[CV]  criterion=entropy, max_depth=250, n_estimators=50, score=0.783, total=   0.1s
[CV] criterion=entropy, max_depth=250, n_estimators=50 ...............
[CV]  criterion=entropy, max_depth=250, n_estimators=50, score=0.754, 

[CV]  criterion=entropy, max_depth=300, n_estimators=250, score=0.808, total=   0.5s
[CV] criterion=entropy, max_depth=300, n_estimators=250 ..............
[CV]  criterion=entropy, max_depth=300, n_estimators=250, score=0.854, total=   0.7s
[CV] criterion=entropy, max_depth=300, n_estimators=500 ..............
[CV]  criterion=entropy, max_depth=300, n_estimators=500, score=0.792, total=   1.1s
[CV] criterion=entropy, max_depth=300, n_estimators=500 ..............
[CV]  criterion=entropy, max_depth=300, n_estimators=500, score=0.738, total=   1.2s
[CV] criterion=entropy, max_depth=300, n_estimators=500 ..............
[CV]  criterion=entropy, max_depth=300, n_estimators=500, score=0.846, total=   1.2s
[CV] criterion=entropy, max_depth=300, n_estimators=500 ..............
[CV]  criterion=entropy, max_depth=300, n_estimators=500, score=0.800, total=   1.2s
[CV] criterion=entropy, max_depth=300, n_estimators=500 ..............
[CV]  criterion=entropy, max_depth=300, n_estimators=500, score=

[CV]  criterion=gini, max_depth=125, n_estimators=25, score=0.729, total=   0.1s
[CV] criterion=gini, max_depth=125, n_estimators=25 ..................
[CV]  criterion=gini, max_depth=125, n_estimators=25, score=0.808, total=   0.1s
[CV] criterion=gini, max_depth=125, n_estimators=25 ..................
[CV]  criterion=gini, max_depth=125, n_estimators=25, score=0.783, total=   0.0s
[CV] criterion=gini, max_depth=125, n_estimators=25 ..................
[CV]  criterion=gini, max_depth=125, n_estimators=25, score=0.845, total=   0.1s
[CV] criterion=gini, max_depth=125, n_estimators=50 ..................
[CV]  criterion=gini, max_depth=125, n_estimators=50, score=0.792, total=   0.1s
[CV] criterion=gini, max_depth=125, n_estimators=50 ..................
[CV]  criterion=gini, max_depth=125, n_estimators=50, score=0.738, total=   0.1s
[CV] criterion=gini, max_depth=125, n_estimators=50 ..................
[CV]  criterion=gini, max_depth=125, n_estimators=50, score=0.812, total=   0.1s
[CV] cr

[CV]  criterion=gini, max_depth=150, n_estimators=500, score=0.783, total=   1.1s
[CV] criterion=gini, max_depth=150, n_estimators=500 .................
[CV]  criterion=gini, max_depth=150, n_estimators=500, score=0.742, total=   1.0s
[CV] criterion=gini, max_depth=150, n_estimators=500 .................
[CV]  criterion=gini, max_depth=150, n_estimators=500, score=0.833, total=   1.1s
[CV] criterion=gini, max_depth=150, n_estimators=500 .................
[CV]  criterion=gini, max_depth=150, n_estimators=500, score=0.808, total=   1.1s
[CV] criterion=gini, max_depth=150, n_estimators=500 .................
[CV]  criterion=gini, max_depth=150, n_estimators=500, score=0.866, total=   1.0s
[CV] criterion=gini, max_depth=175, n_estimators=1 ...................
[CV]  criterion=gini, max_depth=175, n_estimators=1, score=0.700, total=   0.0s
[CV] criterion=gini, max_depth=175, n_estimators=1 ...................
[CV]  criterion=gini, max_depth=175, n_estimators=1, score=0.671, total=   0.0s
[CV]

[CV]  criterion=gini, max_depth=250, n_estimators=50, score=0.746, total=   0.1s
[CV] criterion=gini, max_depth=250, n_estimators=50 ..................
[CV]  criterion=gini, max_depth=250, n_estimators=50, score=0.812, total=   0.1s
[CV] criterion=gini, max_depth=250, n_estimators=50 ..................
[CV]  criterion=gini, max_depth=250, n_estimators=50, score=0.787, total=   0.1s
[CV] criterion=gini, max_depth=250, n_estimators=50 ..................
[CV]  criterion=gini, max_depth=250, n_estimators=50, score=0.845, total=   0.1s
[CV] criterion=gini, max_depth=250, n_estimators=100 .................
[CV]  criterion=gini, max_depth=250, n_estimators=100, score=0.804, total=   0.3s
[CV] criterion=gini, max_depth=250, n_estimators=100 .................
[CV]  criterion=gini, max_depth=250, n_estimators=100, score=0.750, total=   0.3s
[CV] criterion=gini, max_depth=250, n_estimators=100 .................
[CV]  criterion=gini, max_depth=250, n_estimators=100, score=0.825, total=   0.3s
[CV]

[CV]  criterion=gini, max_depth=500, n_estimators=10, score=0.824, total=   0.0s
[CV] criterion=gini, max_depth=500, n_estimators=25 ..................
[CV]  criterion=gini, max_depth=500, n_estimators=25, score=0.792, total=   0.1s
[CV] criterion=gini, max_depth=500, n_estimators=25 ..................
[CV]  criterion=gini, max_depth=500, n_estimators=25, score=0.729, total=   0.1s
[CV] criterion=gini, max_depth=500, n_estimators=25 ..................
[CV]  criterion=gini, max_depth=500, n_estimators=25, score=0.808, total=   0.1s
[CV] criterion=gini, max_depth=500, n_estimators=25 ..................
[CV]  criterion=gini, max_depth=500, n_estimators=25, score=0.833, total=   0.1s
[CV] criterion=gini, max_depth=500, n_estimators=25 ..................
[CV]  criterion=gini, max_depth=500, n_estimators=25, score=0.849, total=   0.1s
[CV] criterion=gini, max_depth=500, n_estimators=50 ..................
[CV]  criterion=gini, max_depth=500, n_estimators=50, score=0.792, total=   0.2s
[CV] cr

[Parallel(n_jobs=1)]: Done 420 out of 420 | elapsed:  2.6min finished


GridSearchCV(estimator=RandomForestClassifier(),
             param_grid={'criterion': ['entropy', 'gini'],
                         'max_depth': [125, 150, 175, 250, 300, 500],
                         'n_estimators': [1, 10, 25, 50, 100, 250, 500]},
             verbose=3)

In [41]:
print(grid2.best_params_)
print(grid2.best_score_)

{'criterion': 'entropy', 'max_depth': 300, 'n_estimators': 100}
0.8115516039051605


In [43]:
rf3 = RandomForestClassifier(criterion= 'entropy', max_depth= 300, n_estimators=100)

In [44]:
rf3.fit(X_train_scaled2, y_train2)

RandomForestClassifier(criterion='entropy', max_depth=300)

In [45]:
print(f"Training Data Score: {rf3.score(X_train_scaled2, y_train2)}")
print(f"Testing Data Score: {rf3.score(X_test_scaled2, y_test2)}")

Training Data Score: 1.0
Testing Data Score: 0.7875


In [46]:
predictions = rf3.predict(X_test_scaled2)
print(predictions)
print(rf3.classes_)

[0 0 1 0 1 0 0 0 1 1 1 0 1 0 0 1 0 1 1 0 0 1 0 1 0 1 1 0 0 1 0 1 1 0 0 0 1
 1 1 1 0 0 1 0 1 1 1 1 0 1 0 0 1 1 0 0 1 1 1 0 0 1 0 0 1 0 1 1 1 1 1 0 1 1
 1 0 1 0 1 1 0 1 0 1 1 1 0 1 1 0 1 0 1 0 0 0 0 1 0 1 0 0 1 1 1 1 1 0 1 0 1
 0 1 0 0 1 0 0 0 1 1 1 0 0 1 0 1 0 0 1 1 0 0 1 1 0 0 0 0 1 1 1 1 0 1 0 1 1
 1 0 1 1 1 0 1 0 0 1 0 1 1 1 1 0 1 1 1 0 0 0 0 0 1 0 1 1 1 0 1 0 0 1 0 1 0
 1 1 1 1 0 1 1 0 1 1 0 0 1 0 1 0 0 0 0 1 0 0 0 1 1 1 0 0 1 1 0 1 0 1 0 0 1
 1 1 1 0 0 1 0 0 0 0 1 1 0 1 0 1 1 0 0 0 0 1 1 1 0 1 1 0 1 1 0 0 1 0 0 1 1
 1 1 0 0 1 1 1 0 0 1 1 0 1 0 1 1 1 0 1 1 1 1 0 0 1 0 1 0 1 0 1 0 1 0 0 0 1
 0 1 1 1 0 0 1 0 1 0 1 1 1 1 0 0 1 1 0 1 0 0 1 0 1 1 1 0 0 0 1 1 0 1 1 1 0
 1 1 0 1 1 0 0 1 1 1 0 1 0 1 0 1 1 1 1 0 1 1 1 1 0 0 0 1 0 0 1 0 1 1 0 0 0
 0 0 1 1 1 1 1 1 0 1 0 1 1 1 1 1 1 0 0 1 0 0 0 1 1 1 1 0 0 1]
[0 1]


In [47]:
print(classification_report(y_test, predictions))

              precision    recall  f1-score   support

           0       0.76      0.77      0.76       178
           1       0.81      0.80      0.81       222

    accuracy                           0.79       400
   macro avg       0.78      0.79      0.79       400
weighted avg       0.79      0.79      0.79       400



In [48]:
import joblib

In [49]:
filename = 'RandomForest_Red.sav'
joblib.dump(rf3, filename)

['RandomForest_Red.sav']

In [50]:
loaded_model= joblib.load(filename)

result=loaded_model.score(X_test_scaled2, y_test2)

print(result)

0.7875


In [51]:
#https://stackoverflow.com/questions/40155128/plot-trees-for-a-random-forest-in-python-with-scikit-learn
# print decision path
from sklearn.tree import export_graphviz
import os

In [52]:
import pydotplus
import six
from sklearn import tree

In [53]:
dotfile = six.StringIO()
i_tree = 0
for tree_in_forest in rf3.estimators_:        
    tree.export_graphviz(tree_in_forest,
                feature_names=X2.columns,
                filled=True,
                rounded=True,
                out_file='tree.dot')
    i_tree = i_tree + 1

In [54]:
os.system('dot -Tpng tree.dot -o treered.png')

0