In [None]:
def plot_roc(title, test_y, pred_y):
    print('ROC for : ', title)
    # Compute ROC curve and ROC area for each class
    fpr = dict()
    tpr = dict()
    roc_auc = dict()
    n_class = test_y.shape[1]
    for i in range(n_class):
        fpr[i], tpr[i], _ = roc_curve(test_y[:, i], pred_y[:, i])
        roc_auc[i] = auc(fpr[i], tpr[i])

    # Compute micro-average ROC curve and ROC area
    fpr["micro"], tpr["micro"], _ = roc_curve(test_y.ravel(), pred_y.ravel())
    roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])

    # Compute macro-average ROC curve and ROC area

    # Aggregate all false positive rates
    all_fpr = np.unique(np.concatenate([fpr[i] for i in range(n_class)]))

    # Interpolate all ROC curves
    mean_tpr = np.zeros_like(all_fpr)
    for i in range(n_class):
        mean_tpr += interp(all_fpr, fpr[i], tpr[i])

    # Average it and compute AUC
    mean_tpr /= n_class

    fpr["macro"] = all_fpr
    tpr["macro"] = mean_tpr
    roc_auc["macro"] = auc(fpr["macro"], tpr["macro"])

    # Plot all ROC curves
    f, (ax1, ax2) = plt.subplots(1, 2, figsize=(18, 10))
    ax1.plot(fpr["micro"], tpr["micro"], label='micro-avg ROC (area = {0:0.2f})'.format(roc_auc["micro"]), color='deeppink')
    ax1.plot(fpr["macro"], tpr["macro"], label='macro-avg ROC (area = {0:0.2f})'.format(roc_auc["macro"]), color='navy')
    ax1.plot([0, 1], [0, 1], 'k--')
    ax1.set_xlabel('False Positive Rate')
    ax1.set_ylabel('True Positive Rate')
    ax1.set_title('ROC multi-class')
    ax1.legend(loc="lower right")

    colors = cycle(['aqua', 'darkorange', 'cornflowerblue'])
    for i, color in zip(range(n_class), colors):
        ax2.plot(fpr[i], tpr[i], color=color, label='ROC curve of class {0} (area = {1:0.2f})'.format(i, roc_auc[i]))
    ax2.set_title('ROC Individual Classes')
    ax2.set_xlabel('False Positive Rate')
    ax2.set_ylabel('True Positive Rate')
    ax2.set_title('ROC multi-class')
    plt.show()
    
    return (roc_auc)

In [None]:
roc_auc_rf = plot_roc('Random Forest', test_y, pred_y)

In [None]:
cmat_rf = confusion_matrix(test_y.argmax(axis=1), pred_y.argmax(axis=1))

In [None]:
print('Training Random Forest', end='')
model_rf = RandomForestClassifier(n_estimators=100)
start = time.time()
model_rf.fit(train_lsa, train_y)
end = time.time()
print(' - Done')

train_time_rf = end-start

print('Testing Random Forest', end= '')
start = time.time()
pred_y = model_rf.predict(test_lsa)
end = time.time()
print(' - Done')

test_time_rf = end-start

acc_rf = accuracy_score(test_y, pred_y)
cmat_rf = confusion_matrix(test_y.argmax(axis=1), pred_y.argmax(axis=1))

print('Accuracy :', acc_rf * 100)
print('Time Taken - Train : {0:.4f}, Test : {1:.4f}'.format(train_time_rf, test_time_rf))

roc_auc_rf = plot_roc('Random Forest', test_y, pred_y)

In [None]:
print('Original Data Shape ')
print('Train : ', train_features.shape)
print('Test  : ', test_features.shape)
print('Extracted Data Shape ')
print('Train : ', train_lsa.shape)
print('Test  : ', test_lsa.shape)
print()
print('Accuracy')
print('Random Forest-100    : {0:7.4f}'.format(acc_rf*100))
print('Decision Tree-100    : {0:7.4f}'.format(acc_dt*100))
print('k Nearest Neighbours : {0:7.4f}'.format(acc_knn*100))
print()
print('Time Taken')
print('Random Forest-100 - Train : {0:8.4f}, Test : {1:7.4f}'.format(train_time_rf, test_time_rf))
print('Decision Tree-100 - Train : {0:8.4f}, Test : {1:7.4f}'.format(train_time_dt, test_time_dt))
print('kNN               - Train : {0:8.4f}, Test : {1:7.4f}'.format(train_time_knn, test_time_knn))
print()
print('Area under ROC')
print('Random Forest-100 - Micro : {0:7.4f}, Macro : {1:7.4f}'.format(roc_auc_rf['micro']*100, roc_auc_rf['macro']*100))
print('Decision Tree-100 - Micro : {0:7.4f}, Macro : {1:7.4f}'.format(roc_auc_dt['micro']*100, roc_auc_dt['macro']*100))
print('kNN               - Micro : {0:7.4f}, Macro : {1:7.4f}'.format(roc_auc_knn['micro']*100, roc_auc_knn['macro']*100))

In [None]:
print('Confusion Matrices')

print('Random Forest')
print(cmat_rf)

print()

print('Decision Tree')
print(cmat_dt)

print()

print('kNN')
print(cmat_knn)