In [None]:
%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# Algoritmo del perceptrón

Para este ejercicio crearemos un modelo que determine si una moneda es de \$1 o de \$5 utilizando su peso y su tamaño. Los datos se encuentran en `coins.csv`

In [None]:
dataset = pd.read_csv('../data/coins2.csv')
dataset.head(5)

In [None]:
fig = plt.figure(figsize=(8,8))
ax = fig.add_subplot(111)
ax = dataset[dataset.type == '$1'].plot.scatter('size', 'weight', c='blue', ax=ax)
dataset[dataset.type == '$2'].plot.scatter('size', 'weight', c='red', ax=ax)
ax.legend(['$1', '$2'])
ax.set_title('Medidas de tamaño y peso de diferentes monedas.');

# El conjunto de hipotesis para el perceptrón
$$ h(\mathbf{x}) =  \mathrm{sign}(\mathbf{w}^\intercal\mathbf{x}_n) $$

Escriba una función que represente el conjunto de hipotesis para el perceptrón. Es decir, dado $\mathrm(w)$ y un $\mathrm{umbral}$ determine si es una moneda de \$1 o de \$5

In [None]:
dataset['x0'] = 1

In [None]:
columns_in_order = list(dataset.columns)
columns_in_order.remove('x0')
columns_in_order.insert(0, 'x0')
columns_in_order

In [None]:
dataset = dataset[columns_in_order]

In [None]:
random_values = np.random.rand(len(dataset))
train_sample = dataset[random_values < 0.7]
test_sample = dataset[random_values >= 0.7]

In [None]:
def model(row, w):
    return '$1' if row.values[:-1].dot(w) > 0 else '$2'

In [None]:
def guess(df, w):
    return df.apply(lambda row: model(row,w), axis = 1)

In [None]:
def find_missclasified(df, guess):
    missclasified = df[df.type != guess]
    return len(missclasified), None if len(missclasified) == 0 else missclasified.iloc[0] 

In [None]:
find_missclasified(train_sample, guess(train_sample, np.random.rand(3)))

In [None]:
def train(df, max_iterations):
    w = np.random.rand(len(df.columns) - 1)
    iterations = 0
    while iterations < max_iterations:
        g = guess(df, w)
        n, miss = find_missclasified(df, g)
        if miss is None:
            break
        update = (1 if miss.type == '$1' else -1) * \
           miss.values[:-1].astype(float)
        w += update
        iterations += 1
    return w

In [None]:
def pocket(df_train, df_test, max_iterations):
    ein = []
    eout = []
    ws = []
    w = np.random.rand(len(df_train.columns) - 1)
    iterations = 0
    while iterations < max_iterations:
        ws.append(w)
        gin = guess(df_train, w)
        gout = guess(df_test, w)
        n_in, miss_in = find_missclasified(df_train, gin)
        n_out, miss_out = find_missclasified(df_test, gout)
        ein.append(n_in/len(df_train))
        eout.append(n_out/len(df_test))
        if miss_in is None:
            break
        update = (1 if miss_in.type == '$1' else -1) * \
           miss_in.values[:-1].astype(float)
        w += update
        iterations += 1
    return ws, ein, eout

In [None]:
ws, ein, eout = pocket(train_sample, test_sample, 2000)

In [None]:
errors = pd.DataFrame({'ws': ws, 'ein': ein, 'eout':eout})
errors[['ein', 'eout']].plot()

In [None]:
w = errors.sort_values(by='eout').iloc[0].ws

In [None]:
fig = plt.figure(figsize=(8,8))
ax = fig.add_subplot(111)
ax = dataset[dataset.type == '$1'].plot.scatter('size', 'weight', c='blue', ax=ax)
dataset[dataset.type == '$2'].plot.scatter('size', 'weight', c='red', ax=ax)
ax.legend(['$1', '$2'])
ax.set_title('Medidas de tamaño y peso de diferentes monedas.');

ax.plot([1, 3.5], [ -w[0]/w[2] - w[1]*3.5/w[2], -w[0]/w[2] - w[1]/w[2]])