In [1]:
pred_result = {
    "true_positive": 0,
    "true_negative": 0,
    "false_positive": 0,
    "false_negative": 0,
    "undefined": 0
}

attr_names = [
    "top_left", "top_middle", "top_right",
    "middle_left", "middle_middle", "middle_right",
    "bottom_left", "bottom_middle", "bottom_right"
]

In [2]:
def name_attrs(instance):
    return set([i + ': ' + str(k) for i, k in zip(attr_names, instance)])

In [3]:
def write_result(pos_votes, neg_votes, instance):
    if pos_votes > neg_votes:
        if instance[-1] == "positive":
            pred_result["true_positive"] += 1
            return
        else:
            pred_result["false_positive"] += 1
            return
    elif pos_votes < neg_votes:
        if instance[-1] == "negative":
            pred_result["true_negative"] += 1
            return
        else:
            pred_result["false_negative"] += 1
            return
    else:
        pred_result["undefined"] += 1
        return

In [4]:
def predict(param, train_pos, train_neg, instance, algorithm="First"):
    if algorithm == "First":
        pos_votes, neg_votes = votes_counter_first(param, train_pos, train_neg, instance)
    elif algorithm == "Second":
        pos_votes, neg_votes = votes_counter_second(param, train_pos, train_neg, instance)
    else:
        pos_votes, neg_votes = votes_counter_third(param, train_pos, train_neg, instance)
    pos_votes = pos_votes / float(len(train_pos))
    neg_votes = neg_votes / float(len(train_neg))
    write_result(pos_votes, neg_votes, instance)

In [5]:
def votes_counter_first(param, train_pos, train_neg, instance):
    named_instance = name_attrs(instance)
    pos_votes = 0
    neg_votes = 0
    for positive in train_pos:
        named_positive = name_attrs(positive)
        if len(named_positive.intersection(named_instance)) / float(len(named_instance)) >= param:
            pos_votes += 1
    for negative in train_neg:
        named_negative = name_attrs(negative)
        if len(named_negative.intersection(named_instance)) / float(len(named_instance)) >= param:
            neg_votes += 1
    return pos_votes, neg_votes

In [6]:
def votes_counter_second(param, train_pos, train_neg, instance):
    named_instance = name_attrs(instance)
    pos_votes = 0
    neg_votes = 0
    for positive in train_pos:
        named_positive = name_attrs(positive)
        intersec = named_instance.intersection(named_positive)
        includ_counter = 0
        for each in train_neg:
            if intersec.issubset(name_attrs(each)):
                includ_counter += 1
        if includ_counter / float(len(train_neg)) <= param:
            pos_votes += 1

    for negative in train_neg:
        named_negative = name_attrs(negative)
        intersec = named_instance.intersection(named_negative)
        includ_counter = 0
        for each in train_pos:
            if intersec.issubset(name_attrs(each)):
                includ_counter += 1
        if includ_counter / float(len(train_pos)) <= param:
            neg_votes += 1
    return pos_votes, neg_votes

In [7]:
def votes_counter_third(param, train_pos, train_neg, instance):
    named_instance = name_attrs(instance)
    biggest_positive = [0] * param
    biggest_negative = [0] * param
    for positive in train_pos:
        named_positive = name_attrs(positive)
        smallest_in_list = min(biggest_positive)
        current_intersection = len(named_instance.intersection(named_positive))
        if smallest_in_list < current_intersection:
            biggest_positive.remove(smallest_in_list)
            biggest_positive.append(current_intersection)
    for negative in train_neg:
        named_negative = name_attrs(negative)
        smallest_in_list = min(biggest_negative)
        current_intersection = len(named_instance.intersection(named_negative))
        if smallest_in_list < current_intersection:
            biggest_negative.remove(smallest_in_list)
            biggest_negative.append(current_intersection)
    biggest_negative.sort(reverse=True)
    biggest_positive.sort(reverse=True)

    for neg, pos in zip(biggest_negative, biggest_positive):
        if neg > pos:
            neg_votes = 1
            pos_votes = 0
            return pos_votes, neg_votes
        elif pos > neg:
            pos_votes = 1
            neg_votes = 0
            return pos_votes, neg_votes
    return 0, 0

In [8]:
def cross_valid_check(param, files_amount=10, blocks_num=10, algorithm="First"):
    accuracy = 0
    for i in range(files_amount):
        split = split_file("train{}.csv".format(i + 1), blocks_num)
        local_accuracy = 0
        for iteration in range(blocks_num):
            train, to_predict = next(split)
            train_pos = [a for a in train if a[-1] == "positive"]
            train_neg = [a for a in train if a[-1] == "negative"]
            for sample in to_predict:
                predict(param, train_pos, train_neg, sample, algorithm)
            local_accuracy += (pred_result["true_positive"] + pred_result["true_negative"]) / len(to_predict)
            print("iteration: {}".format(iteration))
            print(pred_result)
            print("//--------------------------------------")
            for keys in pred_result:
                pred_result[keys] = 0
        local_accuracy = local_accuracy / float(blocks_num)
        accuracy += local_accuracy
    accuracy = accuracy / files_amount
    return accuracy

In [9]:
def split_file(file_name, blocks_num):
    with open(file_name, "r") as f:
        data = [a.strip().split(",") for a in f][1:]

    for k in range(blocks_num):
        training = [x for i, x in enumerate(data) if i % blocks_num != k]
        validation = [x for i, x in enumerate(data) if i % blocks_num == k]
        yield training, validation

In [15]:
def cross_valid_executor(files_amount=10, blocks_num=10, algorithm="First"):
    coeff = 0.5
    param = 0.5
    best_accuracy = 0
    for i in range(10):
        if algorithm == "First":
            coeff += 0.05
        elif algorithm == "Second":
            coeff -= 0.05
        else:
            coeff = 5*(i + 1)
        accuracy = cross_valid_check(coeff, files_amount=files_amount, blocks_num=blocks_num, algorithm=algorithm)
        if accuracy > best_accuracy:
            param = coeff
            best_accuracy = accuracy
    return best_accuracy, param

In [11]:
def my_division(x, y):
    if y == 0:
        return "NaN"
    else:
        return x / float(y)

In [18]:
def get_result(input_length):
    print(pred_result)
    acc = my_division(pred_result["true_positive"] + pred_result["true_negative"], input_length)
    prec = my_division(pred_result["true_positive"], pred_result["true_positive"] + pred_result["false_positive"])
    FDR = my_division(pred_result["false_positive"], pred_result["false_positive"] + pred_result["true_positive"])
    FPR = my_division(pred_result["false_positive"], pred_result["true_negative"] + pred_result["false_positive"])
    NPV = my_division(pred_result["true_negative"], pred_result["true_negative"] + pred_result["false_negative"])
    TPR = my_division(pred_result["true_positive"], pred_result["true_positive"] + pred_result["false_negative"])
    TNR = my_division(pred_result["true_negative"], pred_result["false_positive"] + pred_result["true_negative"])
    rec = my_division(pred_result["true_positive"], pred_result["true_positive"] + pred_result["false_negative"])
    print("True Positive Rate: {}\nTrue Negative Rate: {}\n\
Negative Predictive Value: {}\nFalse Positive Rate: {}\nFalse Discovery Rate: {}\nAccuracy: {}\n\
Recall: {}\nPrecision: {}".format(TPR, TNR, NPV, FPR, FDR, acc, rec, prec))

In [19]:
for i in range(10):
    data_name = "train{}.csv".format(i + 1)
    test_name = "test{}.csv".format(i + 1)
    with open(data_name, "r") as f:
        train = [record.strip().split(",") for record in f][1:]
        train_pos = [record for record in train if record[-1] == "positive"]
        train_neg = [record for record in train if record[-1] == "negative"]
    with open(test_name, "r") as f:
        to_predict = [record.strip().split(",") for record in f][1:]
        
    for sample in to_predict:
        predict(0.7, train_pos, train_neg, sample, algorithm="First")

    print("Run number {}".format(i + 1))
    get_result(len(to_predict))
    print("//----------------------------------------------------------------------------------------------------")
    
    for keys in pred_result:
        pred_result[keys] = 0

Run number 1
{'true_positive': 55, 'true_negative': 32, 'false_positive': 0, 'false_negative': 6, 'undefined': 0}
True Positive Rate: 0.9016393442622951
True Negative Rate: 1.0
Negative Predictive Value: 0.8421052631578947
False Positive Rate: 0.0
False Discovery Rate: 0.0
AccuracyPrecision: 0.9354838709677419
Recall: 0.9016393442622951
Precision: 1.0
//----------------------------------------------------------------------------------------------------
Run number 2
{'true_positive': 50, 'true_negative': 36, 'false_positive': 0, 'false_negative': 1, 'undefined': 0}
True Positive Rate: 0.9803921568627451
True Negative Rate: 1.0
Negative Predictive Value: 0.972972972972973
False Positive Rate: 0.0
False Discovery Rate: 0.0
AccuracyPrecision: 0.9885057471264368
Recall: 0.9803921568627451
Precision: 1.0
//----------------------------------------------------------------------------------------------------
Run number 3
{'true_positive': 65, 'true_negative': 35, 'false_positive': 0, 'false_neg

In [21]:
for i in range(10):
    data_name = "train{}.csv".format(i + 1)
    test_name = "test{}.csv".format(i + 1)
    with open(data_name, "r") as f:
        train = [record.strip().split(",") for record in f][1:]
        train_pos = [record for record in train if record[-1] == "positive"]
        train_neg = [record for record in train if record[-1] == "negative"]
    with open(test_name, "r") as f:
        to_predict = [record.strip().split(",") for record in f][1:]
        
    for sample in to_predict:
        predict(0, train_pos, train_neg, sample, algorithm="Second")
    
    print("Run number {}".format(i + 1))
    get_result(len(to_predict))
    print("//----------------------------------------------------------------------------------------------------")
    
    for keys in pred_result:
        pred_result[keys] = 0

Run number 1
{'true_positive': 61, 'true_negative': 32, 'false_positive': 0, 'false_negative': 0, 'undefined': 0}
True Positive Rate: 1.0
True Negative Rate: 1.0
Negative Predictive Value: 1.0
False Positive Rate: 0.0
False Discovery Rate: 0.0
AccuracyPrecision: 1.0
Recall: 1.0
Precision: 1.0
//----------------------------------------------------------------------------------------------------
Run number 2
{'true_positive': 51, 'true_negative': 34, 'false_positive': 2, 'false_negative': 0, 'undefined': 0}
True Positive Rate: 1.0
True Negative Rate: 0.9444444444444444
Negative Predictive Value: 1.0
False Positive Rate: 0.05555555555555555
False Discovery Rate: 0.03773584905660377
AccuracyPrecision: 0.9770114942528736
Recall: 1.0
Precision: 0.9622641509433962
//----------------------------------------------------------------------------------------------------
Run number 3
{'true_positive': 65, 'true_negative': 35, 'false_positive': 0, 'false_negative': 0, 'undefined': 0}
True Positive R

In [20]:
for i in range(10):
    data_name = "train{}.csv".format(i + 1)
    test_name = "test{}.csv".format(i + 1)
    with open(data_name, "r") as f:
        train = [record.strip().split(",") for record in f][1:]
        train_pos = [record for record in train if record[-1] == "positive"]
        train_neg = [record for record in train if record[-1] == "negative"]
    with open(test_name, "r") as f:
        to_predict = [record.strip().split(",") for record in f][1:]
        
    for sample in to_predict:
        predict(25, train_pos, train_neg, sample, algorithm="Third")
    
    print("Run number {}".format(i + 1))
    get_result(len(to_predict))
    print("//----------------------------------------------------------------------------------------------------")
    
    for keys in pred_result:
        pred_result[keys] = 0

Run number 1
{'true_positive': 61, 'true_negative': 32, 'false_positive': 0, 'false_negative': 0, 'undefined': 0}
True Positive Rate: 1.0
True Negative Rate: 1.0
Negative Predictive Value: 1.0
False Positive Rate: 0.0
False Discovery Rate: 0.0
AccuracyPrecision: 1.0
Recall: 1.0
Precision: 1.0
//----------------------------------------------------------------------------------------------------
Run number 2
{'true_positive': 51, 'true_negative': 34, 'false_positive': 2, 'false_negative': 0, 'undefined': 0}
True Positive Rate: 1.0
True Negative Rate: 0.9444444444444444
Negative Predictive Value: 1.0
False Positive Rate: 0.05555555555555555
False Discovery Rate: 0.03773584905660377
AccuracyPrecision: 0.9770114942528736
Recall: 1.0
Precision: 0.9622641509433962
//----------------------------------------------------------------------------------------------------
Run number 3
{'true_positive': 65, 'true_negative': 33, 'false_positive': 2, 'false_negative': 0, 'undefined': 0}
True Positive R

In [16]:
best_accuracy, best_param = cross_valid_executor(files_amount=10, blocks_num=3, algorithm="Third")
print("best_accuracy: ", best_accuracy)
print("parameter: ", best_param)

iteration: 0
{'true_positive': 176, 'true_negative': 71, 'false_positive': 2, 'false_negative': 0, 'undefined': 40}
//--------------------------------------
iteration: 1
{'true_positive': 179, 'true_negative': 79, 'false_positive': 0, 'false_negative': 0, 'undefined': 30}
//--------------------------------------
iteration: 2
{'true_positive': 175, 'true_negative': 76, 'false_positive': 2, 'false_negative': 0, 'undefined': 35}
//--------------------------------------
iteration: 0
{'true_positive': 182, 'true_negative': 68, 'false_positive': 3, 'false_negative': 0, 'undefined': 38}
//--------------------------------------
iteration: 1
{'true_positive': 181, 'true_negative': 69, 'false_positive': 3, 'false_negative': 0, 'undefined': 37}
//--------------------------------------
iteration: 2
{'true_positive': 184, 'true_negative': 76, 'false_positive': 0, 'false_negative': 0, 'undefined': 30}
//--------------------------------------
iteration: 0
{'true_positive': 175, 'true_negative': 70, '

iteration: 2
{'true_positive': 184, 'true_negative': 88, 'false_positive': 9, 'false_negative': 0, 'undefined': 2}
//--------------------------------------
iteration: 0
{'true_positive': 186, 'true_negative': 93, 'false_positive': 5, 'false_negative': 0, 'undefined': 1}
//--------------------------------------
iteration: 1
{'true_positive': 184, 'true_negative': 94, 'false_positive': 2, 'false_negative': 1, 'undefined': 4}
//--------------------------------------
iteration: 2
{'true_positive': 185, 'true_negative': 94, 'false_positive': 4, 'false_negative': 0, 'undefined': 2}
//--------------------------------------
iteration: 0
{'true_positive': 189, 'true_negative': 94, 'false_positive': 4, 'false_negative': 0, 'undefined': 2}
//--------------------------------------
iteration: 1
{'true_positive': 189, 'true_negative': 95, 'false_positive': 5, 'false_negative': 0, 'undefined': 0}
//--------------------------------------
iteration: 2
{'true_positive': 186, 'true_negative': 92, 'false_

iteration: 1
{'true_positive': 190, 'true_negative': 92, 'false_positive': 9, 'false_negative': 0, 'undefined': 0}
//--------------------------------------
iteration: 2
{'true_positive': 189, 'true_negative': 92, 'false_positive': 9, 'false_negative': 1, 'undefined': 0}
//--------------------------------------
iteration: 0
{'true_positive': 185, 'true_negative': 88, 'false_positive': 8, 'false_negative': 1, 'undefined': 0}
//--------------------------------------
iteration: 1
{'true_positive': 184, 'true_negative': 89, 'false_positive': 7, 'false_negative': 1, 'undefined': 0}
//--------------------------------------
iteration: 2
{'true_positive': 185, 'true_negative': 90, 'false_positive': 6, 'false_negative': 0, 'undefined': 0}
//--------------------------------------
iteration: 0
{'true_positive': 185, 'true_negative': 93, 'false_positive': 6, 'false_negative': 0, 'undefined': 0}
//--------------------------------------
iteration: 1
{'true_positive': 184, 'true_negative': 94, 'false_

iteration: 0
{'true_positive': 189, 'true_negative': 93, 'false_positive': 8, 'false_negative': 0, 'undefined': 0}
//--------------------------------------
iteration: 1
{'true_positive': 189, 'true_negative': 96, 'false_positive': 5, 'false_negative': 0, 'undefined': 0}
//--------------------------------------
iteration: 2
{'true_positive': 189, 'true_negative': 97, 'false_positive': 3, 'false_negative': 0, 'undefined': 0}
//--------------------------------------
iteration: 0
{'true_positive': 188, 'true_negative': 91, 'false_positive': 9, 'false_negative': 0, 'undefined': 2}
//--------------------------------------
iteration: 1
{'true_positive': 188, 'true_negative': 96, 'false_positive': 6, 'false_negative': 0, 'undefined': 0}
//--------------------------------------
iteration: 2
{'true_positive': 188, 'true_negative': 99, 'false_positive': 2, 'false_negative': 0, 'undefined': 0}
//--------------------------------------
iteration: 0
{'true_positive': 190, 'true_negative': 96, 'false_

iteration: 2
{'true_positive': 187, 'true_negative': 95, 'false_positive': 5, 'false_negative': 1, 'undefined': 0}
//--------------------------------------
iteration: 0
{'true_positive': 191, 'true_negative': 93, 'false_positive': 6, 'false_negative': 1, 'undefined': 0}
//--------------------------------------
iteration: 1
{'true_positive': 192, 'true_negative': 93, 'false_positive': 5, 'false_negative': 0, 'undefined': 0}
//--------------------------------------
iteration: 2
{'true_positive': 190, 'true_negative': 97, 'false_positive': 2, 'false_negative': 1, 'undefined': 0}
//--------------------------------------
iteration: 0
{'true_positive': 186, 'true_negative': 90, 'false_positive': 9, 'false_negative': 1, 'undefined': 0}
//--------------------------------------
iteration: 1
{'true_positive': 187, 'true_negative': 94, 'false_positive': 5, 'false_negative': 0, 'undefined': 0}
//--------------------------------------
iteration: 2
{'true_positive': 185, 'true_negative': 90, 'false_

iteration: 1
{'true_positive': 184, 'true_negative': 95, 'false_positive': 5, 'false_negative': 1, 'undefined': 0}
//--------------------------------------
iteration: 2
{'true_positive': 185, 'true_negative': 94, 'false_positive': 6, 'false_negative': 0, 'undefined': 0}
//--------------------------------------
iteration: 0
{'true_positive': 189, 'true_negative': 95, 'false_positive': 5, 'false_negative': 0, 'undefined': 0}
//--------------------------------------
iteration: 1
{'true_positive': 189, 'true_negative': 95, 'false_positive': 5, 'false_negative': 0, 'undefined': 0}
//--------------------------------------
iteration: 2
{'true_positive': 186, 'true_negative': 92, 'false_positive': 8, 'false_negative': 3, 'undefined': 0}
//--------------------------------------
iteration: 0
{'true_positive': 187, 'true_negative': 93, 'false_positive': 7, 'false_negative': 2, 'undefined': 0}
//--------------------------------------
iteration: 1
{'true_positive': 188, 'true_negative': 97, 'false_

In [14]:
best_accuracy, best_param = cross_valid_executor(files_amount=10, blocks_num=3, algorithm="Second")
print("best_accuracy: ", best_accuracy)
print("parameter: ", best_param)

iteration: 0
{'true_positive': 131, 'true_negative': 64, 'false_positive': 36, 'false_negative': 58, 'undefined': 0}
//--------------------------------------
iteration: 1
{'true_positive': 146, 'true_negative': 65, 'false_positive': 34, 'false_negative': 42, 'undefined': 1}
//--------------------------------------
iteration: 2
{'true_positive': 136, 'true_negative': 65, 'false_positive': 35, 'false_negative': 52, 'undefined': 0}
//--------------------------------------
iteration: 0
{'true_positive': 137, 'true_negative': 68, 'false_positive': 31, 'false_negative': 55, 'undefined': 0}
//--------------------------------------
iteration: 1
{'true_positive': 138, 'true_negative': 66, 'false_positive': 32, 'false_negative': 54, 'undefined': 0}
//--------------------------------------
iteration: 2
{'true_positive': 150, 'true_negative': 67, 'false_positive': 30, 'false_negative': 41, 'undefined': 2}
//--------------------------------------
iteration: 0
{'true_positive': 131, 'true_negative':

iteration: 1
{'true_positive': 110, 'true_negative': 76, 'false_positive': 24, 'false_negative': 74, 'undefined': 0}
//--------------------------------------
iteration: 2
{'true_positive': 109, 'true_negative': 74, 'false_positive': 25, 'false_negative': 75, 'undefined': 0}
//--------------------------------------
iteration: 0
{'true_positive': 108, 'true_negative': 74, 'false_positive': 25, 'false_negative': 78, 'undefined': 0}
//--------------------------------------
iteration: 1
{'true_positive': 111, 'true_negative': 77, 'false_positive': 23, 'false_negative': 74, 'undefined': 0}
//--------------------------------------
iteration: 2
{'true_positive': 96, 'true_negative': 81, 'false_positive': 19, 'false_negative': 89, 'undefined': 0}
//--------------------------------------
iteration: 0
{'true_positive': 117, 'true_negative': 78, 'false_positive': 22, 'false_negative': 72, 'undefined': 0}
//--------------------------------------
iteration: 1
{'true_positive': 112, 'true_negative': 

iteration: 2
{'true_positive': 116, 'true_negative': 62, 'false_positive': 39, 'false_negative': 72, 'undefined': 0}
//--------------------------------------
iteration: 0
{'true_positive': 122, 'true_negative': 68, 'false_positive': 33, 'false_negative': 68, 'undefined': 0}
//--------------------------------------
iteration: 1
{'true_positive': 113, 'true_negative': 69, 'false_positive': 32, 'false_negative': 77, 'undefined': 0}
//--------------------------------------
iteration: 2
{'true_positive': 117, 'true_negative': 63, 'false_positive': 38, 'false_negative': 73, 'undefined': 0}
//--------------------------------------
iteration: 0
{'true_positive': 117, 'true_negative': 64, 'false_positive': 32, 'false_negative': 69, 'undefined': 0}
//--------------------------------------
iteration: 1
{'true_positive': 105, 'true_negative': 65, 'false_positive': 31, 'false_negative': 80, 'undefined': 0}
//--------------------------------------
iteration: 2
{'true_positive': 117, 'true_negative':

iteration: 0
{'true_positive': 115, 'true_negative': 57, 'false_positive': 42, 'false_negative': 71, 'undefined': 1}
//--------------------------------------
iteration: 1
{'true_positive': 108, 'true_negative': 61, 'false_positive': 38, 'false_negative': 79, 'undefined': 0}
//--------------------------------------
iteration: 2
{'true_positive': 107, 'true_negative': 62, 'false_positive': 37, 'false_negative': 80, 'undefined': 0}
//--------------------------------------
iteration: 0
{'true_positive': 113, 'true_negative': 59, 'false_positive': 42, 'false_negative': 76, 'undefined': 0}
//--------------------------------------
iteration: 1
{'true_positive': 122, 'true_negative': 64, 'false_positive': 37, 'false_negative': 67, 'undefined': 0}
//--------------------------------------
iteration: 2
{'true_positive': 116, 'true_negative': 55, 'false_positive': 45, 'false_negative': 73, 'undefined': 0}
//--------------------------------------
iteration: 0
{'true_positive': 105, 'true_negative':

iteration: 1
{'true_positive': 130, 'true_negative': 52, 'false_positive': 48, 'false_negative': 59, 'undefined': 0}
//--------------------------------------
iteration: 2
{'true_positive': 120, 'true_negative': 55, 'false_positive': 45, 'false_negative': 69, 'undefined': 0}
//--------------------------------------
iteration: 0
{'true_positive': 127, 'true_negative': 56, 'false_positive': 44, 'false_negative': 62, 'undefined': 0}
//--------------------------------------
iteration: 1
{'true_positive': 129, 'true_negative': 60, 'false_positive': 40, 'false_negative': 59, 'undefined': 0}
//--------------------------------------
iteration: 2
{'true_positive': 125, 'true_negative': 56, 'false_positive': 44, 'false_negative': 63, 'undefined': 0}
//--------------------------------------
iteration: 0
{'true_positive': 125, 'true_negative': 56, 'false_positive': 43, 'false_negative': 67, 'undefined': 0}
//--------------------------------------
iteration: 1
{'true_positive': 122, 'true_negative':

iteration: 2
{'true_positive': 117, 'true_negative': 69, 'false_positive': 27, 'false_negative': 68, 'undefined': 0}
//--------------------------------------
iteration: 0
{'true_positive': 118, 'true_negative': 62, 'false_positive': 37, 'false_negative': 67, 'undefined': 0}
//--------------------------------------
iteration: 1
{'true_positive': 114, 'true_negative': 77, 'false_positive': 23, 'false_negative': 70, 'undefined': 0}
//--------------------------------------
iteration: 2
{'true_positive': 117, 'true_negative': 73, 'false_positive': 26, 'false_negative': 67, 'undefined': 0}
//--------------------------------------
iteration: 0
{'true_positive': 127, 'true_negative': 66, 'false_positive': 33, 'false_negative': 59, 'undefined': 0}
//--------------------------------------
iteration: 1
{'true_positive': 117, 'true_negative': 75, 'false_positive': 25, 'false_negative': 68, 'undefined': 0}
//--------------------------------------
iteration: 2
{'true_positive': 115, 'true_negative':

In [15]:
best_accuracy, best_param = cross_valid_executor(files_amount=10, blocks_num=3, algorithm="First")
print("best_accuracy: ", best_accuracy)
print("parameter: ", best_param)

iteration: 0
{'true_positive': 140, 'true_negative': 67, 'false_positive': 33, 'false_negative': 49, 'undefined': 0}
//--------------------------------------
iteration: 1
{'true_positive': 151, 'true_negative': 78, 'false_positive': 22, 'false_negative': 37, 'undefined': 0}
//--------------------------------------
iteration: 2
{'true_positive': 135, 'true_negative': 72, 'false_positive': 28, 'false_negative': 53, 'undefined': 0}
//--------------------------------------
iteration: 0
{'true_positive': 143, 'true_negative': 68, 'false_positive': 31, 'false_negative': 49, 'undefined': 0}
//--------------------------------------
iteration: 1
{'true_positive': 143, 'true_negative': 74, 'false_positive': 24, 'false_negative': 49, 'undefined': 0}
//--------------------------------------
iteration: 2
{'true_positive': 136, 'true_negative': 81, 'false_positive': 18, 'false_negative': 55, 'undefined': 0}
//--------------------------------------
iteration: 0
{'true_positive': 133, 'true_negative':

iteration: 1
{'true_positive': 146, 'true_negative': 90, 'false_positive': 10, 'false_negative': 38, 'undefined': 0}
//--------------------------------------
iteration: 2
{'true_positive': 155, 'true_negative': 86, 'false_positive': 13, 'false_negative': 29, 'undefined': 0}
//--------------------------------------
iteration: 0
{'true_positive': 150, 'true_negative': 90, 'false_positive': 9, 'false_negative': 36, 'undefined': 0}
//--------------------------------------
iteration: 1
{'true_positive': 148, 'true_negative': 89, 'false_positive': 11, 'false_negative': 37, 'undefined': 0}
//--------------------------------------
iteration: 2
{'true_positive': 144, 'true_negative': 91, 'false_positive': 9, 'false_negative': 41, 'undefined': 0}
//--------------------------------------
iteration: 0
{'true_positive': 154, 'true_negative': 90, 'false_positive': 10, 'false_negative': 35, 'undefined': 0}
//--------------------------------------
iteration: 1
{'true_positive': 148, 'true_negative': 9

iteration: 0
{'true_positive': 185, 'true_negative': 101, 'false_positive': 0, 'false_negative': 5, 'undefined': 0}
//--------------------------------------
iteration: 1
{'true_positive': 184, 'true_negative': 100, 'false_positive': 1, 'false_negative': 6, 'undefined': 0}
//--------------------------------------
iteration: 2
{'true_positive': 185, 'true_negative': 100, 'false_positive': 1, 'false_negative': 5, 'undefined': 0}
//--------------------------------------
iteration: 0
{'true_positive': 179, 'true_negative': 96, 'false_positive': 0, 'false_negative': 7, 'undefined': 0}
//--------------------------------------
iteration: 1
{'true_positive': 178, 'true_negative': 96, 'false_positive': 0, 'false_negative': 7, 'undefined': 0}
//--------------------------------------
iteration: 2
{'true_positive': 180, 'true_negative': 96, 'false_positive': 0, 'false_negative': 5, 'undefined': 0}
//--------------------------------------
iteration: 0
{'true_positive': 178, 'true_negative': 99, 'fal

iteration: 2
{'true_positive': 0, 'true_negative': 0, 'false_positive': 0, 'false_negative': 0, 'undefined': 286}
//--------------------------------------
iteration: 0
{'true_positive': 0, 'true_negative': 0, 'false_positive': 0, 'false_negative': 0, 'undefined': 290}
//--------------------------------------
iteration: 1
{'true_positive': 0, 'true_negative': 0, 'false_positive': 0, 'false_negative': 0, 'undefined': 290}
//--------------------------------------
iteration: 2
{'true_positive': 0, 'true_negative': 0, 'false_positive': 0, 'false_negative': 0, 'undefined': 289}
//--------------------------------------
iteration: 0
{'true_positive': 0, 'true_negative': 0, 'false_positive': 0, 'false_negative': 0, 'undefined': 290}
//--------------------------------------
iteration: 1
{'true_positive': 0, 'true_negative': 0, 'false_positive': 0, 'false_negative': 0, 'undefined': 290}
//--------------------------------------
iteration: 2
{'true_positive': 0, 'true_negative': 0, 'false_positive'

iteration: 1
{'true_positive': 0, 'true_negative': 0, 'false_positive': 0, 'false_negative': 0, 'undefined': 288}
//--------------------------------------
iteration: 2
{'true_positive': 0, 'true_negative': 0, 'false_positive': 0, 'false_negative': 0, 'undefined': 288}
//--------------------------------------
iteration: 0
{'true_positive': 0, 'true_negative': 0, 'false_positive': 0, 'false_negative': 0, 'undefined': 291}
//--------------------------------------
iteration: 1
{'true_positive': 0, 'true_negative': 0, 'false_positive': 0, 'false_negative': 0, 'undefined': 290}
//--------------------------------------
iteration: 2
{'true_positive': 0, 'true_negative': 0, 'false_positive': 0, 'false_negative': 0, 'undefined': 290}
//--------------------------------------
iteration: 0
{'true_positive': 0, 'true_negative': 0, 'false_positive': 0, 'false_negative': 0, 'undefined': 286}
//--------------------------------------
iteration: 1
{'true_positive': 0, 'true_negative': 0, 'false_positive'

iteration: 0
{'true_positive': 0, 'true_negative': 0, 'false_positive': 0, 'false_negative': 0, 'undefined': 285}
//--------------------------------------
iteration: 1
{'true_positive': 0, 'true_negative': 0, 'false_positive': 0, 'false_negative': 0, 'undefined': 285}
//--------------------------------------
iteration: 2
{'true_positive': 0, 'true_negative': 0, 'false_positive': 0, 'false_negative': 0, 'undefined': 285}
//--------------------------------------
iteration: 0
{'true_positive': 0, 'true_negative': 0, 'false_positive': 0, 'false_negative': 0, 'undefined': 289}
//--------------------------------------
iteration: 1
{'true_positive': 0, 'true_negative': 0, 'false_positive': 0, 'false_negative': 0, 'undefined': 289}
//--------------------------------------
iteration: 2
{'true_positive': 0, 'true_negative': 0, 'false_positive': 0, 'false_negative': 0, 'undefined': 289}
//--------------------------------------
iteration: 0
{'true_positive': 0, 'true_negative': 0, 'false_positive'

In [15]:
while 1: 
    data_name = input('Enter train_data file_name (or "Exit" if you\'re done): ')
    if data_name == "Exit":
        break
    test_name = input('Enter test_data file_name (or "Exit" if you\'re done): ')
    if test_name == "Exit":
        break

    with open(data_name, "r") as f:
        train = [record.strip().split(",") for record in f][1:]
        train_pos = [record for record in train if record[-1] == "positive"]
        train_neg = [record for record in train if record[-1] == "negative"]
    with open(test_name, "r") as f:
        to_predict = [record.strip().split(",") for record in f][1:]
        
    for sample in to_predict:
        predict(0.7, train_pos, train_neg, sample, algorithm="First")
    
    get_result(len(to_predict))
    
    for keys in pred_result:
        pred_result[keys] = 0

Enter train_data file_name (or "Exit" if you're done): Exit
