In [4]:
import time
import numpy as np
import pandas as pd
from functions import *

In [5]:
split_data_tp1_app = open_file('../data/data_tp1_app.txt')
split_data_tp1_dec = open_file('../data/data_tp1_dec.txt')

split_data_tp2_app = open_file('../data/data_tp2_app.txt')
split_data_tp2_dec = open_file('../data/data_tp2_dec.txt')

split_data_tp3_app = open_file('../data/data_tp3_app.txt')
split_data_tp3_dec = open_file('../data/data_tp3_dec.txt')

In [16]:
def create_hyperplans_classes(train_data):
    hyper_plans_classes = []
    classes = get_unique_class_num(train_data)
    for i in range(len(classes)):
        for j in range(i+1, len(classes)):
            hyper_plans_classes.append((classes[i], classes[j]))
    return hyper_plans_classes

def predict_one_line_perceptron(line, weights):
    total = 0
    for i in range(len(line)):
        total += line[i] * weights[i]
    total += weights[-1]
    return total

def linear_perceptron_one_epoch(train_data, weights):
    count = 0
    for index, row in train_data.iterrows():
        temp_line = row.tolist()
        line = list(map(float, temp_line))
        # Step 1
        if line[-1] == -1:
            point_list = [ -x for x in line[:-1]]
            point_list.append(line[-1])
        else:
            point_list = line
        # Step 2
        if np.dot(weights, point_list) <= 0:
            weights = [weights[i] + point_list[i] for i in range(len(weights))]  
            count += 1
    return weights, count

def linear_perceptron_two_classes_converge(train_data):
    weights = np.zeros(len(train_data.columns))
    count = 1
    while count != 0:
        weights, count = linear_perceptron_one_epoch(train_data, weights)
    return weights


def linear_perceptron_two_classes_non_converge(train_data, epochs):
    weights = np.zeros(len(train_data.columns))
    final_weights = []
    best_count = None
    for n in range(epochs):
        weights, count = linear_perceptron_one_epoch(train_data, weights)
        if best_count == None:
            best_count = count
        if best_count >= count:
            final_weights = weights
            best_count = count
        
    return final_weights, best_count


def linear_train(train_data, is_converging:bool = True):
    hyper_plan_list = create_hyperplans_classes(train_data)
    data_pd = pd.DataFrame(train_data)
    results = {}
    for hyper_plan in hyper_plan_list:
        # Subset with data of the classes split by the hyperplans
        hyperplan_data = data_pd[(data_pd[0] == hyper_plan[0]) | (data_pd[0] == hyper_plan[1])].copy()
        # Transform the classes to 1 and -1
        hyperplan_data[3] = hyperplan_data[0].apply(lambda row: 1 if row==hyper_plan[0] else -1)

        del hyperplan_data[0]
        if is_converging:
            weights = linear_perceptron_two_classes_converge(hyperplan_data)
        else:
            weights = linear_perceptron_two_classes_non_converge(hyperplan_data, 5)
        results[hyper_plan] = weights
    return results

def test_linear_model(test_data, wheighted_hp:dict):
    count = 0
    for line in test_data:
        line = [float(val) for val in line]
        predict_classes = []
        for key, value in wheighted_hp.items():
            res = predict_one_line_perceptron(line[1:], value)
            if res >= 0:
                predict_classes.append(key[0])
            else:
                predict_classes.append(key[1])
        max_predict_class = get_n_max_occurence_from_list(predict_classes, 1)
        #print(max_predict_class, line[0])
        if float(max_predict_class) == float(line[0]):
            count += 1
    print(count/len(test_data))


weighted_hp = linear_train(split_data_tp1_app, is_converging=True)
test_linear_model(split_data_tp1_dec, weighted_hp)


0.994
