In [4]:
import numpy as np
import matplotlib.pyplot as plt
import random

from decision_tree import DecisionTree
from logistic_regression import gradient_descent
from random_forest import RandomForest



def accuracy_score(Y_true, Y_predict):
    error=0
    for i in range(len(Y_predict)):
        if Y_true[i] != Y_predict[i]:
            error = error + 1
    error= error*100/len(Y_predict)
    return 100-error


def evaluate_performance():
    # Load Data
    filename = 'SPECTF.dat'
    data = np.loadtxt(filename, delimiter=',')
    X = data[:, 1:]
    y = np.array([data[:, 0]]).T
    n, d = X.shape

    all_accuracies=[]
    all_log_accuracies=[]
    all_rand_accuracies=[]
    for trial in range(3):
        print(trial)
        #divide data to train and test
        select = list(range(n))
        random.shuffle(select)
        select_train = np.array(select[0:int(9*n / 10)])
        select_test = np.array(select[int(9*n / 10):n])

        data_train = data[select_train, :]
        data_test = data[select_test, :]
        
        data_train_Y=data_train[:,0]
        data_train_X=data_train[:,1:data_train.shape[1]]

        data_test_Y=data_test[:,0]
        data_test_X=data_test[:,1:data_test.shape[1]]
        #END

        #Decision Tree
        tree = DecisionTree(100)
        tree.fit(data_train)
        y_pred =tree.predict(data_test)
        
        accuracy = accuracy_score(data_test_Y, y_pred)
        all_accuracies.append(accuracy)
        # END
        
        #Logistic Regression
        beta=gradient_descent(data_train_X,data_train_Y,1, l=1, step_size=1e-8, max_steps=10)
        Y_log_pred=[]
        for i in range(data_test_X.shape[0]):
            if data_test_X[i].T.dot(beta)>0:
                Y_log_pred.append(1)
            else:
                Y_log_pred.append(0)
        log_accuracy=accuracy_score(data_test_Y, Y_log_pred)
        all_log_accuracies.append(log_accuracy)
        # END

        # Random Forest
        trees = RandomForest(7,100)
        trees.fit(data_train)
        predict=trees.predict(data_test)
        y_rand_pred = [row[0] for row in predict]

        rand_accuracy = accuracy_score(data_test_Y, y_rand_pred)
        all_rand_accuracies.append(rand_accuracy)

        # END


    meanDecisionTreeAccuracy = np.mean(all_accuracies)
    stddevDecisionTreeAccuracy = np.std(all_accuracies)
    meanLogisticRegressionAccuracy = np.mean(all_log_accuracies)
    stddevLogisticRegressionAccuracy = np.std(all_log_accuracies)
    meanRandomForestAccuracy = np.mean(all_rand_accuracies)
    stddevRandomForestAccuracy = np.std(all_rand_accuracies)

    # make certain that the return value matches the API specification
    stats = np.zeros((3, 2))
    stats[0, 0] = meanDecisionTreeAccuracy
    stats[0, 1] = stddevDecisionTreeAccuracy
    stats[1, 0] = meanRandomForestAccuracy
    stats[1, 1] = stddevRandomForestAccuracy
    stats[2, 0] = meanLogisticRegressionAccuracy
    stats[2, 1] = stddevLogisticRegressionAccuracy
    return stats


# Do not modify from HERE...
if __name__ == "__main__":
    stats = evaluate_performance()
    print("Decision Tree Accuracy = ", stats[0, 0], " (", stats[0, 1], ")")
    print("Random Forest Tree Accuracy = ", stats[1, 0], " (", stats[1, 1], ")")
    print("Logistic Reg. Accuracy = ", stats[2, 0], " (", stats[2, 1], ")")
# # ...to HERE.

0
1


KeyboardInterrupt: 

In [2]:
print(7)

7
