In [None]:
import sys
import random
from collections import defaultdict
import numpy as np
from sklearn.model_selection import StratifiedKFold
from sklearn import metrics
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import f1_score
from sklearn.metrics import plot_precision_recall_curve

In [None]:
##################
# You can modify file names and path to files as you need.
path_to_data_file = 'data_file.txt'
performance_file = 'random_hyperparameter_tuning.txt'
n = 1000 # Number of dataset
##################

whole_data = [line.strip().split('\t') for line in open(path_to_data_file)]
del(whole_data[0])

featDic = {}
data_list_x = []
data_list_y = []

for line in whole_data:
    featDic[line[0]] = list(map(float,line[1:11])) + list(map(int,line[11:13])) + list(map(float,line[13:15])) + list(map(int,line[15]))
    data_list_x.append(featDic[line[0]][:-1])
    data_list_y.append(featDic[line[0]][-1])
    
data_x = np.array(data_list_x)
data_y = np.array(data_list_y)

In [None]:
import random

def sample_floats(low, high, k=1):
    result = []
    seen = set()
    for i in range(k):
        x = random.uniform(low, high)
        while x in seen:
            x = random.uniform(low, high)
        seen.add(x)
        result.append(x)
    return result

In [None]:
n_iterations = 1000

stats = defaultdict(list)

for i in range(n_iterations):
    
    pred_random = np.array(sample_floats(0.0,1.0,k=n))
    
    fpr, tpr, threshold = metrics.roc_curve(data_y, pred_random)
    random_acc = metrics.accuracy_score(data_y, pred_random.round())
    random_roc = metrics.auc(fpr, tpr)
    
    precision_random, recall_random, thresholds = precision_recall_curve(data_y, pred_random)
    random_f1 = f1_score(data_y, pred_random.round())

    random_rp = metrics.auc(recall_random, precision_random)
    
    stats['acc'].append(random_acc)
    stats['roc'].append(random_roc)
    stats['f1'].append(random_f1)
    stats['rp'].append(random_rp)
    
alpha=0.95
p_l = ((1.0-alpha)/2) * 100
p_u = (alpha + ((1.0 - alpha)/ 2.0)) * 100

mean_perf = dict()
lower = dict()
upper = dict()

result = open(performance_file, 'w')

perf_list = ['acc', 'roc', 'f1', 'rp']
for perf in perf_list:
    mean_perf[perf] = sum(stats[perf]) / len(stats[perf])
    lower[perf] = max(0.0, np.percentile(stats[perf],p_l))
    upper[perf] = min(1.0, np.percentile(stats[perf],p_u))

    result.write('\t'.join([perf, str(mean_perf[perf]), str(lower[perf]), str(upper[perf])]) + '\n')
    
result.close()