# Practica cinco

Grupo 14:
* Joaquín Ibáñez Penalva
* Aurora Zuoris

Para la realización de esta práctica se usará la librería de numpy, pandas, matplotlib, y sklearn.

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split

## Ejercicio 1

In [None]:
class Perceptron:
  def __init__(self, max_iter=1000, mezclar=True, eta=1.0, random_state=42):
    self._max_iter = max_iter
    self._mezclar = mezclar
    self._eta = eta
    self._random_state = random_state

  def ajustar(self, X, y):
    if len(X.shape) != 2:
      raise ValueError("X debe ser una matriz bidimensional")
    if len(y.shape) != 1:
      raise ValueError("y debe ser un vector")
    if X.shape[0] != y.shape[0]:
      raise ValueError("X y y deben tener la misma cantidad de filas")
    
    if len(np.unique(y)) != 2:
      raise ValueError("y debe tener dos clases")
    
    self._label_to_code = {label: code for code, label in enumerate(np.unique(y))}
    self._code_to_label = {code: label for label, code in self._label_to_code.items()}

    y = np.vectorize(self._label_to_code.get)(y)
    
    n, m = X.shape

    weights = np.zeros(m)
    bias = 0

    for _ in range(self._max_iter):
      if self._mezclar:
        X, y = self._shuffle(X, y)
      
      res = np.dot(X, weights) + bias
      res_sign = np.sign(res)
      res_sign[res_sign == 0] = -1
      errors = ~((res_sign == 1) ^ (y == 1))
      if np.all(~errors):
        break
      errors_sign = np.where(errors, np.where(res_sign == 1, -1, 1), 0)

      delta_bias = self._eta * np.sum(errors_sign)
      delta_weights = self._eta * np.dot(errors_sign, X)

      bias += delta_bias
      weights += delta_weights
    self.pesos_ = weights
    self.pesos_umbral_ = bias
    return self
  
  def predecir(self, X):
    y_code =  np.dot(X, self.pesos_) + self.pesos_umbral_ > 0
    y_code = np.where(y_code, 1, 0)
    return np.vectorize(self._code_to_label.get)(y_code)
    

  def _shuffle(self, X, y):
    n, _ = X.shape
    rng = np.random.RandomState(self._random_state)
    idx = rng.permutation(n)
    
    if(type(X) == pd.DataFrame):
      X = X.to_numpy()
    return X[idx], y[idx]


## Ejercicio 2

In [None]:
df = pd.read_csv('iris_pca_2d.csv')

df.head()

In [None]:
# Se separan los datos segun las clases de primeras.
classes = set(df['clase'].unique())
split_classes = [classes - {c} for c in classes]

fig, axs = plt.subplots(3, 2, figsize=(10, 15))

pc1_range = np.linspace(df['pc1'].min(), df['pc1'].max(), 100)

for i, c in enumerate(split_classes):
  class_to_col = {cl: co for co, cl in zip(('red', 'blue'), c)}
  local_df = df[df['clase'].isin(c)]
  x_train, x_test, y_train, y_test = train_test_split(local_df[['pc1', 'pc2']], local_df['clase'], test_size=0.3, random_state=42)
  perceptron = Perceptron(max_iter=50_000).ajustar(x_train, y_train)

  line = - (perceptron.pesos_[0] / perceptron.pesos_[1]) * pc1_range - (perceptron.pesos_umbral_ / perceptron.pesos_[1])

  axs[i, 0].scatter(x_train['pc1'], x_train['pc2'], c=y_train.map(lambda x: class_to_col[x]), label=y_train.unique())
  axs[i, 1].scatter(x_test['pc1'], x_test['pc2'], c=y_test.map(lambda x: class_to_col[x]), label=y_test)

  axs[i, 0].plot(pc1_range, line, 'k-')
  axs[i, 1].plot(pc1_range, line, 'k-')

  axs[i, 0].set_ylim(df['pc2'].min(), df['pc2'].max())
  axs[i, 1].set_ylim(df['pc2'].min(), df['pc2'].max())

  legend_elems = [
    plt.Line2D([0], [0], marker='o', color='w', label=c, markerfacecolor=class_to_col[c], markersize=10)
    for c in c
  ]

  axs[i, 0].legend(handles=legend_elems)
  axs[i, 1].legend(handles=legend_elems)

  name = '-'.join(c)

  axs[i, 0].set_title(f'Entrenamiento {name}')
  axs[i, 1].set_title(f'Test {name}')