# Percettrone

In [8]:
import numpy as np
import pandas as pd
import sklearn.metrics as metrics

In [9]:
# Import del dataset già suddiviso in train e test
train = pd.read_csv('./Dataset/Train.csv', index_col=0)

test = pd.read_csv('./Dataset/Test.csv', index_col=0)

# Suddivisione di tra istanze e label
X_train = train.iloc[:, :-1]
y_train = train.iloc[:, -1]

X_test = test.iloc[:, :-1]
y_test = test.iloc[:, -1]

In [10]:
def _sigmoid(x):
    return 1 / (1 + np.exp(-x))

def predict(weights: np.ndarray, x: np.ndarray, threshold: float, activation_function: str = 'sign') -> int:
  if activation_function == 'sigmoid':
    return 1 if (_sigmoid(np.dot(weights, np.insert(x, 0, threshold))) >= 0) else 0
  elif activation_function == 'sign':
    return 1 if (int(np.dot(weights, np.insert(x, 0, threshold)) >= 0)) else 0
  else:
    raise ValueError('Invalid activation function')

In [11]:
def fit(data_without_label: pd.DataFrame, labels: pd.Series, threshold: float, activation_function: str = 'sign') -> np.ndarray:
  # Inizializzazione dei pesi in modo randomico
  initial_weights = [np.random.rand() for _ in range(data_without_label.shape[1])]
  # Aggiunta del peso associato alla soglia
  initial_weights.insert(0, -threshold)
  # Conversione in numpy array
  weights = np.array(initial_weights)
  # Calcolo dei pesi
  print('Training...')
  return _compute_weights(data_without_label, labels, weights, threshold, activation_function=activation_function)
  

def _compute_weights(data_without_label: pd.DataFrame, labels: pd.Series, 
                     weights: np.ndarray,threshold: float, learning_rate: float = 0.01, 
                     max_iterations: int = 10_000, 
                     activation_function: str = 'sign') -> np.ndarray: 
  no_convergence = False
  n_iterations = 0 # Numero di iterazioni

  while not no_convergence:
    # Se il numero di iterazioni supera il limite impostato, si interrompe il training
    if n_iterations > max_iterations:
      print('\n\nIteration limit')
      return weights

    if n_iterations % 10 == 0:
      completed_iterations = n_iterations / max_iterations * 100
      progess_bar = "=" * int(completed_iterations / 5) + ">" + " " * (10 - int(completed_iterations / 5))
      print(f'[{progess_bar}] {completed_iterations:.2f}%\r', end='')

    
    no_convergence = True
    # Per ogni istanza del dataset
    for index, row in data_without_label.iterrows():
      y = predict(weights, row.to_numpy(), threshold, activation_function=activation_function)

      # Se il prodotto scalare è diverso da quello atteso, si aggiornano i pesi
      if y != labels[index]:
        # Calcolo la variazione dei pesi
        delta = np.multiply((learning_rate * (labels[index] - y)), np.insert(row.to_numpy(), 0, -threshold))
        # Aggiorno i pesi
        weights = weights + delta

        no_convergence = False
    
    n_iterations += 1

  return weights


In [12]:
threshold = 0.5

weights = fit(X_train, y_train, threshold)

Training...
[===>       ] 19.40%

In [None]:
y_train_pred = []

for index, row in X_train.iterrows():
  y_train_pred.append(predict(weights, row.to_numpy(), threshold))

print("Accuracy sull'insieme di train:", round(metrics.accuracy_score(y_train, y_train_pred)*100, 2), "%")

Accuracy sull'insieme di train: 96.74 %


In [None]:
y_test_pred = []

for index, row in X_test.iterrows():
  y_test_pred.append(predict(weights, row.to_numpy(), threshold))

print("Accuracy sull'insieme di test:", round(metrics.accuracy_score(y_test, y_test_pred)*100, 2), "%")



Accuracy sull'insieme di test: 96.95 %
