In [19]:
import DTree
import numpy as np
from collections import Counter

class RandomForest:
    
    def __init__(self, n_trees=20):
        self.n_trees = n_trees
    
    def train(self, X_train, y_train, feature_names, percentage=0.5):
        n_samples = X_train.shape[0]
        self.forest = []
        for i in range(self.n_trees):
            indices = np.random.randint(0, n_samples, np.int(percentage * n_samples))
            cur_X_train = X_train[indices]
            cur_y_train = y_train[indices]
            tree = DTree.DTree()
            tree.train(cur_X_train, cur_y_train, feature_names, forest=len(feature_names))
            self.forest.append(tree)
        
    
    def predict(self, X_test, y_test):
        predctions = []
        for x in X_test:
            cur_prediction = Counter()
            for tree in self.forest:
                cur_prediction[tree.predict(tree.tree, x)] += 1
            predctions.append(cur_prediction.most_common(1)[0][0])
        predctions = np.array(predctions)
        if y_test is not None:
            accuracy = np.sum(predctions == y_test)/len(y_test)
            #print('Accuracy:', accuracy)
            return predctions, accuracy
        else:
            return predctions

In [20]:
from sklearn.cross_validation import train_test_split
from collections import Counter
import pandas as pd
import Bagging

data = pd.read_csv('car.csv')
feature_names = data.columns.values[0:-1].tolist()
X = data.iloc[:,0:-1].values
y = data.iloc[:,-1].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 123)

forest = RandomForest()
forest.train(X_train, y_train, feature_names)
prediction = forest.predict(X_test, y_test)[1]
print('Random Forest Accuracy: ', prediction)

Random Forest Accuracy:  0.905587668593449
