In [4]:
import numpy as np
import numba as nb
import time
# import matplotlib.pyplot as plt
# import matplotlib as mpl
import random
import threading

@nb.vectorize(['float64(float64)'])
def my_sign(n):
    if n >= 0:
        return 1.0
    else:
        return -1.0

# @nb.jit(nogil = True)
@nb.jit
def cal_err(x, y, i, d_size):
    err = 0
    for j in range(d_size):
        err += (my_sign(x[j] - i) != y[j])
    return err

# @nb.jit(nogil = True)
@nb.jit
def ds_algo(x, y):
    d_size = x.size
    thetas = np.append(x,[-1,1])
    best_err = 10*d_size
    best_theta = 0
    best_s = 0
    for i in thetas:
        # err = 0
        # for j in range(d_size):
        #     if my_sign(x[j] - i) != y[j] :
        #         err += 1
        err = cal_err(x, y, i, d_size)
        # print(err)
        if err < best_err:
            best_err = err
            best_theta = i
            best_s = 1
        if (d_size - err) < best_err:
            best_err = (d_size - err)
            best_theta = i
            best_s = -1
    return best_err/d_size, (best_theta, best_s)

# @nb.jit(nogil = True)
@nb.jit
def test_err(x, y, theta, s, d_size):
    err = 0
    for i in range(d_size):
        err += y[i] != s*np.sign(x[i] - theta)
    return err
#--------------------------------------------------------------
def task():
    global e_in, e_out, size, test_size
    data_x = 2*np.random.ranf(size)-1
    data_y = my_sign(data_x)
    for i in range(size):
        if random.random() <= 0.2:
            data_y[i] *= -1
    e, ans = ds_algo(data_x, data_y)
    e_in += e
    
    test_x = 2*np.random.ranf(test_size)-1
    test_y = my_sign(test_x)

    for i in range(test_size):
        if random.random() <= 0.2:
            test_y[i] *= -1
    e_tmp = test_err(test_x, test_y, ans[0], ans[1], test_size) / test_size
    e_out += e_tmp

e_in = 0
e_out = 0
size = 20
test_size = 1000
times = 5000
threads = []
start = time.time()
for t in range(times):
    #-----------------------
    # threads.append(threading.Thread(target = task))
    # threads[t].start()
    #-----------------------
    task()
    
    # --------------plot in number line--------------
    # fig = plt.figure()
    # ax = fig.add_subplot(111)
    # ax.set_xlim(-1.5,1.5)
    # ax.set_ylim(-1.5, 1.5)
    # ax.axis("off")

    # # plt.hlines(0, -1, 1)
    # plt.vlines(0, -0.5, 0.5)
    
    # colors = ['green', 'blue']
    # levels = [0, 1]
    # cmap, norm = mpl.colors.from_levels_and_colors(levels=levels, colors=colors, extend='max')
    # plt.scatter(test_x, np.zeros(test_size), c=-test_y, cmap="bwr", s=4)

    # if ans[1] == 1:
    #     plt.axvspan(ans[0], 1, ymin=0.33, ymax=0.67, color="blue", alpha=0.3)
    #     plt.axvspan(-1, ans[0], ymin=0.33, ymax=0.67, color="red", alpha=0.3)
    # fig.set_size_inches(18.5, 10.5)

#-----------------------
# for t in threads:
#     t.join()
#-----------------------
end = time.time()
e_in /= times
e_out /= times

print(f"E_in:{e_in:.4f}")
print(f"E_out:{e_out:.4f}")
print(f"use time:{end-start:.4f} s")

E_in:0.1704
E_out:0.2626
use time:1.3193 s


In [147]:
import csv
import pandas as pd
file_name = "hw2_train.txt"
df = pd.read_csv(file_name, sep=" |\t", engine="python", header=None)
# df.head()
train = df.to_numpy().transpose()

In [148]:
dime = 9
ans_arr = []
e_ins = []
for i in range(dime):
    train_e, train_arr = ds_algo(train[i], train[9])
    e_ins.append(train_e)
    ans_arr.append(train_arr)
print(ans_arr)
print(e_ins)

[(4.25, -1), (-3.258, 1), (-8.421, 1), (1.774, -1), (-7.973, 1), (4.37, -1), (4.216, -1), (-2.752, -1), (-0.008, -1)]
[0.39, 0.39, 0.4, 0.25, 0.43, 0.28, 0.34, 0.39, 0.36]


In [149]:
#bob = best of best
bob_e = min(e_ins)
bob_dime = e_ins.index(bob_e)
bob_arr = ans_arr[bob_dime]
print(bob_e, bob_dime, bob_arr)

0.25 3 (1.774, -1)


In [150]:
file_name = "hw2_test.txt"
df2 = pd.read_csv(file_name, sep=" |\t", engine="python", header=None)
test = df2.to_numpy().transpose()

# df.head()

In [151]:
dime = 9
e_test = test_err(test[bob_dime], test[9], bob_arr[0], bob_arr[1], len(test[9])) / len(test[9])
print(e_test)

0.36
