In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn import model_selection
import numpy as np
from joblib import dump, load

WINDOW_SIZE = 4000
def find_counts(predictions, truth, clips):
    TP = 0; TN = 0; FP = 0; FN = 0
    false_positives = []
    false_negatives = []
    for i in range(predictions.shape[0]):
        if predictions[i] < 0.5 and truth[i] == 0:
            TN += 1
        elif predictions[i] >= 0.5 and truth[i] == 1:
            TP += 1
        elif predictions[i] < 0.5 and truth[i] == 1:
            FN += 1
            false_negatives.append(clips[i])
        elif predictions[i] >= 0.5 and truth[i] == 0:
            FP += 1
            false_positives.append(clips[i])
            
    print("TOTAL_POSITIVE:", TP + FP)
    print("TOTAL_NEGATIVE:", TN + FN)
    false_pos_clips = np.array(false_positives)
    false_neg_clips = np.array(false_negatives)
    np.save('../util/false_positives_rf_'+str(WINDOW_SIZE)+'.npy',false_pos_clips)
    np.save('../util/false_negatives_rf_'+str(WINDOW_SIZE)+'.npy',false_neg_clips)
    
    return TP, TN, FP, FN

def find_accuracies(TP, TN, FP, FN):
    accuracy = (TP + TN) / (TP + TN + FP + FN)
    a_0 = TP / (TP + FN)
    a_1 = TN / (TN + FP)
    balanced_accuracy = 1/2 * (a_0 + a_1)
    return accuracy, balanced_accuracy, a_0, a_1

def main():
    # Load data
    X = np.load('../util/X_mfccs_error_4000.npy')
    X = np.reshape(X, (X.shape[0], -1))
    clips = np.load('../util/X_clips_error_4000.npy')
    y = np.load('../util/labels_error_4000.npy')
    x_train, x_valid, clips_train, clips_valid, y_train, y_valid = model_selection.train_test_split(X, clips, y, test_size=0.10, random_state=42)
    
    # Train random forest classifier
    clf = RandomForestClassifier().fit(x_train, y_train)
    predictions = clf.predict(x_valid)
    TP, TN, FP, FN = find_counts(predictions, y_valid, clips_valid)
    print(find_accuracies(TP, TN, FP, FN))
    dump(clf, 'rf.joblib')


if __name__ == '__main__':
    main()