In [1]:
#code of Interactive Quantum Classifier Inspired by Quantum Open System Theory
#LINK https://ieeexplore.ieee.org/document/9533917

#LINK https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=9533917

#this code was written by Fernando Maciano de Paula Neto (fernando@cin.ufpe.br) and Eduardo Barreto Brito (ebb2@cin.ufpe.br)

In [2]:
import numpy as np
import pandas as pd
import math

from sklearn import datasets
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, f1_score

from helpers.icq_methods import create_and_execute_classifier, get_sigmaQ, get_sigmaE, get_U_operator, get_p, update_weights

from tqdm.notebook import tqdm

In [3]:
def create_and_execute_classifiers(vectorX, vectorWs):
  """
    Creates classifiers with differents weights and outputs the index of
    the weight that has the highest probability of having class 1
  """
  list_p11_i = []

  for vectorWs_i in vectorWs:
    # First we create and execute the classifier
    zi, p11_i = create_and_execute_classifier(vectorX, vectorWs_i)  

    # Then we save the probability of being class 1
    list_p11_i.append(p11_i)
  
  # Finally, we get the biggest prob
  return np.argmax(list_p11_i)

In [4]:
def create_and_execute_1_classifier(X, Y, w, n=0.1):
  """"
    Creates, train and executes 1 classifier throughout all instances,
    updating the weights instance per instance (batch-size = 1).
   
    X is a NxM vector of Atributes
    Y is the N vector of Classes
    W is the M vector of Weights
    N is the learning rate
    
    Returns updated_weight, error
  """
  error = 0
  
  for x,y in zip(X,Y):
    # First we create and execute the classifier
    z, p11 = create_and_execute_classifier(x,w)

    # Then we update our weights based on our result
    w = update_weights(w,y,z,x,p11,n)

    # Next we store the error
    if (z != y):
      error += 1

  return w, error

In [5]:
def training_1_batch_classifier(X, Y, w, n=0.1):
  """
    Creates, trains and executes 1 classifier after all instances using the average
    of the input vector (X), class vector (Y), probability of being class 1 and assigned
    class (uses updates_weights(w,y_avg,z_avg,x_avg,p11_avg,n)).
    
    X is a NxM vector of Atributes
    Y is the N vector of Classes
    W is the M vector of Weights
    N is the learning rate

    Returns updated_weight, error
  """
  error = 0
  x_avg = 0
  y_avg = 0
  z_avg = 0
  p11_avg = 0
  lines = X.shape[0]

  # We do something similar to create_and_execute_1_classifier method, but updating only once
  for x,y in zip(X,Y):
    z, p11 = create_and_execute_classifier(x,w)
    x_avg += x/lines
    y_avg += y/lines
    z_avg += z/lines
    p11_avg += p11/lines
    if (z != y):
      error += 1

  w = update_weights(w,y_avg,z_avg,x_avg,p11_avg,n)

  return w, error

In [6]:
def training_n_steps_classifier(X, Y, w, batch_size, Nsteps, n=0.1, stop_error_zero=False):
  """
    Creates, trains and executes classifiers through batches using @training_1_batch_classifier method.
    
    X is a NxM vector of Atributes
    Y is the N vector of Classes
    w is the M vector of Weights
    batch_size is the number of instances used per batch training
    Nsteps is the number of times the classifier will be training. Similar to number of epochs
    n is the learning rate
    stop_error_zero defines whether we should stop when we have error equals zero

    Returns the weights after the training and the min error obtained after executing
  """
  # First we need to know how many batches we will have for training
  lines = X.shape[0]
  splits = math.ceil(lines / batch_size)
  
  min_error = np.Inf
  min_w_error = np.Inf
  for i in range(Nsteps):
    errors = 0
    # For each batch split, we need to train our classfier
    for split in range(splits):
      # First step is to define our dataset
      X_batch = X[split*batch_size:(split+1)*batch_size , :]
      Y_batch = Y[split*batch_size:(split+1)*batch_size]

      # We save the weights in which we're executing the current classifier in case this is the best one
      w_old = w[:]

      # Then we train and classify for this part of the dataset
      w, error = training_1_batch_classifier(X_batch,Y_batch,w=w,n=n)
      errors += error
    
    # and save the sum of errors for all batches if needed 
    if (min_error > errors):
      min_w_error = w_old
      min_error = errors
      
    if (errors == 0 and stop_error_zero):
      break
  return min_w_error, min_error

In [7]:
def replicate_classes(data_set, features_names, classes):
  """
    Creates datasets which assigns 1 to one class and 0 to another, and balance
    the number of instances of class 0 and class 1.
    
    Returns an array with all datasets. For Iris dataset
    list_x_y[0] = datasets where all instances of class 0 has class 1 and instances of other classes has class 0
    list_x_y[1] = datasets where all instances of class 1 has class 1 and instances of other classes has class 0
    list_x_y[2] = datasets where all instances of class 2 has class 1 and instances of other classes has class 0
    
    It also duplicates the instances with the targeted class, making all datasets having 100 instances of class 0
    and 100 instances of class 1 for Iris dataset.
  """
  list_x_y = []
  n_classes = len(classes)

  for class_i in classes:
    # We don't want to change the original dataset, so we make a copy of it
    y_class_i = data_set.copy()

    # First we change all classes to n + 1, in order for it to have a special value
    y_class_i.loc[ y_class_i["target"] == class_i  , "target"]  = n_classes+1

    # Then, we change every class different from the one we want to 0
    y_class_i.loc[ y_class_i["target"] < n_classes  , "target"]  = 0

    # Finally, we change every class that is equal to the one we want to 1
    y_class_i.loc[ y_class_i["target"] == (n_classes+1)  , "target"]  = 1

    # Last but not least, we replicate the dataset to have a balanced number of instances with
    # desired and undesired classes. As we're dealing only with Iris, which has 50/50/50, we only
    # need to replicate once
    y_class_i = pd.concat([y_class_i, y_class_i[y_class_i["target"]==1]], axis=0)
  
    # Now we split between attributes and target
    list_x_y.append([y_class_i[features_names], y_class_i["target"]])

  return list_x_y

In [8]:
def test_many_classifiers(X, y, list_of_weights_classifiers):
  """
    Uses the classifier weights to try to predict the correct class and then prints the metrics

    Returns precision, recall, f1_measure, accuracy, y_pred
  """
  lines = X.shape[0]
  y_pred = []

  # For each instance, we try to predict the correct class
  for i in range(lines):
    output_class = create_and_execute_classifiers(X.iloc[i, :].values, list_of_weights_classifiers)
    y_pred.append(output_class)
  
  # Then we get the correct classes to start recording
  y_true = y["target"].tolist()

  # To do so, we count the number of hits
  hit=0
  for y, yhat in zip(y_true, y_pred):
    if (y == yhat):
      hit+=1

  # Finally, we calculate every score we need
  precision = precision_score(y_true, y_pred, labels=[0,1,2], average='micro')
  recall = recall_score(y_true, y_pred, labels=[0,1,2],average='micro')
  f1_measure = f1_score(y_true, y_pred, labels=[0,1,2], average='micro')
  accuracy = accuracy_score(y_true, y_pred)

  return precision, recall, f1_measure, accuracy, y_pred

In [9]:
def avg_metrics(measures):
  """ 
    Prints the average of the execution portraid in measures.
    The "measures" param must be a vector of the following tuple:
    (precision, recall, f1_measure, accuracy)
  """
  precisions = 0
  recalls = 0
  f1_measures = 0
  accs = 0
  count = 0

  for precision, recall, f1_measure, accuracy in measures:
    precisions += precision
    recalls += recall
    f1_measures += f1_measure
    accs += accuracy
    count+=1
    
  print("acc", accs/count)
  print("precision", precisions/count)
  print("recall", recalls/count)
  print("f1_measure", f1_measures/count)

In [10]:
def standard_scaling(x_train, x_test):
    """
        Uses the scikit-learn StandardScaler to normalize the dataset. 
        See https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.StandardScaler.html

        It also uses the same normalizing factor on x_test.
    """
    x_train_copy = x_train.copy()
    x_test_copy = x_test.copy()

    scaler = StandardScaler()
    scaler.fit(x_train_copy)
    
    x_train_copy = scaler.transform(x_train_copy)
    x_test_copy = scaler.transform(x_test_copy)

    iris = datasets.load_iris(as_frame=True)

    x_train_copy = pd.DataFrame(x_train_copy, columns = iris['feature_names'])
    x_test_copy = pd.DataFrame(x_test_copy, columns = iris['feature_names'])

    return x_train_copy, x_test_copy

In [11]:
def maximum_absolute_scaling_by_column(x_train, x_test):
    """
        Divides the whole column by the max absolute value available.
        
        It also uses the same normalizing factor on x_test.
    """
    # copy the dataframe
    x_train_copy = x_train.copy()
    x_test_copy = x_test.copy()
    
    # apply maximum absolute scaling
    for column in x_train_copy.columns:
        col_abs_max = x_train_copy[column].abs().max()
        
        x_train_copy[column] = x_train_copy[column]  / col_abs_max
        x_test_copy[column] = x_test_copy[column]  / col_abs_max
    
    return x_train_copy, x_test_copy

In [12]:
def min_max_scaling_by_column(x_train, x_test):
    """
        Divides the whole column by the difference between its max and min values.

        It also uses the same normalizing factor on x_test.
    """
    # copy the dataframe
    x_train_copy = x_train.copy()
    x_test_copy = x_test.copy()
    
    # apply min-max scaling
    for column in x_train_copy.columns:
        col_min = x_train_copy[column].min()
        col_max = x_train_copy[column].max()

        x_train_copy[column] = (x_train_copy[column] - col_min) / (col_max - col_min)
        x_test_copy[column] = (x_test_copy[column] - col_min) / (col_max - col_min)
        
    return x_train_copy, x_test_copy

In [13]:
def min_max_scaling_by_column_type_2(x_train, x_test):
    """
        Divides the whole column through the following equation:
        column = (value - min) / (max - min) - 1

        It also uses the same normalizing factor on x_test.
    """
    # copy the dataframe
    a, b = -1, 0
    x_train_copy = x_train.copy()
    x_test_copy = x_test.copy()
    
    # apply min-max scaling
    for column in x_train_copy.columns:
        col_min = x_train_copy[column].min()
        col_max = x_train_copy[column].max()
        
        x_train_copy[column] =(b-a)*(x_train_copy[column] - col_min) / (col_max - col_min) + a
        x_test_copy[column] =(b-a)*(x_test_copy[column] - col_min) / (col_max - col_min) + a
        
    return x_train_copy, x_test_copy

In [14]:
def training_k_fold_classifier(kfold, 
                              X, 
                              y, 
                              Nsteps, 
                              batch_size, 
                              n_learning_rate, 
                              features_names, 
                              classes, 
                              normalizing_function=min_max_scaling_by_column_type_2, 
                              random_state=42, 
                              print_error_binary_dataset=False, 
                              print_metrics_per_fold=True):
    # Instantiating the K-Fold cross validation object with 5 folds
    k_folds = StratifiedKFold(n_splits = kfold, shuffle = True, random_state = random_state)
    metrics = []

    # Iterating through each of the folds in K-Fold
    for train_index, test_index in tqdm(k_folds.split(X, y), total=(kfold), desc="Training model"):
      # Splitting the training set from the validation set for this specific fold
      X_train, X_test = X.iloc[train_index, :], X.iloc[test_index, :]
      y_train, y_test = y.iloc[train_index], y.iloc[test_index]

      # Applies the normalizing_function on each Dataset
      X_train, X_test = normalizing_function(X_train, X_test)
      
      # We create a new Pandas dataset
      dataset = pd.concat([X_train, y_train], axis=1)

      # This list contains 3 values, each one being a dataset for each class
      # of the iris dataset. See @replicate_classes doc
      list_of_x_y = replicate_classes(dataset, features_names, classes)
      list_of_trainned_w = []
      count=0

      # Then, for each dataset of each class, we train our model
      for Xi, yi in list_of_x_y:
        # We initialize the weights vector with values 0.1. 
        # The number of rows must be the number of attributes, as it is a diagonal matrix
        w = [0.1 for i in range(Xi.shape[1])]

        # Now we need to convert our datasets into np arrays, as it's the type expected in the training_n_steps_classifier
        X_train = np.array(Xi)
        y_train = np.array(yi)

        # Now we update our weights and save the error for printing
        wi, error = training_n_steps_classifier(X_train,y_train, w, Nsteps=Nsteps, batch_size=batch_size, n=n_learning_rate, stop_error_zero=True)
        if print_error_binary_dataset:
          print("error #", count, error)

        list_of_trainned_w.append(wi)
        count+=1

      # For testing, we try every weights we already had before, in order to find the best one
      precision, recall, f1_measure, accuracy, y_pred = test_many_classifiers(X_test , y_test, list_of_trainned_w)

      # And then we append the metrics for this specific execution
      metrics.append([precision, recall, f1_measure, accuracy])

      if print_metrics_per_fold:
        print("Métricas (precision, recall, f1_measure, accuracy):", metrics[-1])

    return list_of_trainned_w, metrics

In [15]:
# Now that we have built all methods, we can have our training

# First thing is to load the iris dataset from sklearn
iris = datasets.load_iris(as_frame=True)

# Then we get our X and Y
X_iris = pd.DataFrame(data= iris['data'],
                      columns= iris['feature_names'])
y_iris = pd.DataFrame(data= iris['target'],
                      columns= ['target'])

# And then execute it
list_of_trainned_w, metrics = training_k_fold_classifier(kfold=10, 
                                                            X=X_iris, 
                                                            y=y_iris, 
                                                            Nsteps=2000, 
                                                            batch_size=60, 
                                                            n_learning_rate=0.009,
                                                            features_names=iris['feature_names'], 
                                                            classes=[0,1,2],
                                                            normalizing_function=min_max_scaling_by_column_type_2,
                                                            random_state=42) 

Training model:   0%|          | 0/10 [00:00<?, ?it/s]

metricas, precision, recall, f1_measure, acc [0.8666666666666667, 0.8666666666666667, 0.8666666666666667, 0.8666666666666667]
metricas, precision, recall, f1_measure, acc [0.8, 0.8, 0.8000000000000002, 0.8]
metricas, precision, recall, f1_measure, acc [0.7333333333333333, 0.7333333333333333, 0.7333333333333333, 0.7333333333333333]
metricas, precision, recall, f1_measure, acc [0.7333333333333333, 0.7333333333333333, 0.7333333333333333, 0.7333333333333333]
metricas, precision, recall, f1_measure, acc [0.6666666666666666, 0.6666666666666666, 0.6666666666666666, 0.6666666666666666]
metricas, precision, recall, f1_measure, acc [0.7333333333333333, 0.7333333333333333, 0.7333333333333333, 0.7333333333333333]
metricas, precision, recall, f1_measure, acc [0.8666666666666667, 0.8666666666666667, 0.8666666666666667, 0.8666666666666667]
metricas, precision, recall, f1_measure, acc [0.7333333333333333, 0.7333333333333333, 0.7333333333333333, 0.7333333333333333]
metricas, precision, recall, f1_measu

In [16]:
# Most important part - what are our metrics?
avg_metrics(metrics)

acc 0.74
precision 0.74
recall 0.74
f1_measure 0.74


In [17]:
# Now that we have built all methods, we can have our training

# First thing is to load the iris dataset from sklearn
iris = datasets.load_iris(as_frame=True)

# Then we get our X and Y
X_iris = pd.DataFrame(data= iris['data'],
                      columns= iris['feature_names'])
y_iris = pd.DataFrame(data= iris['target'],
                      columns= ['target'])

# And then execute it
list_of_trainned_w, metrics = training_k_fold_classifier(kfold=10, 
                                                            X=X_iris, 
                                                            y=y_iris, 
                                                            Nsteps=2000, 
                                                            batch_size=60, 
                                                            n_learning_rate=0.009,
                                                            features_names=iris['feature_names'], 
                                                            classes=[0,1,2],
                                                            normalizing_function=min_max_scaling_by_column,
                                                            random_state=42) 

Training model:   0%|          | 0/10 [00:00<?, ?it/s]

metricas, precision, recall, f1_measure, acc [0.8, 0.8, 0.8000000000000002, 0.8]
metricas, precision, recall, f1_measure, acc [0.6, 0.6, 0.6, 0.6]
metricas, precision, recall, f1_measure, acc [0.7333333333333333, 0.7333333333333333, 0.7333333333333333, 0.7333333333333333]
metricas, precision, recall, f1_measure, acc [0.4666666666666667, 0.4666666666666667, 0.4666666666666667, 0.4666666666666667]
metricas, precision, recall, f1_measure, acc [0.7333333333333333, 0.7333333333333333, 0.7333333333333333, 0.7333333333333333]
metricas, precision, recall, f1_measure, acc [0.8, 0.8, 0.8000000000000002, 0.8]
metricas, precision, recall, f1_measure, acc [0.7333333333333333, 0.7333333333333333, 0.7333333333333333, 0.7333333333333333]
metricas, precision, recall, f1_measure, acc [0.6666666666666666, 0.6666666666666666, 0.6666666666666666, 0.6666666666666666]
metricas, precision, recall, f1_measure, acc [0.6, 0.6, 0.6, 0.6]
metricas, precision, recall, f1_measure, acc [0.7333333333333333, 0.73333333

In [18]:
# Most important part - what are our metrics?
avg_metrics(metrics)

acc 0.6866666666666668
precision 0.6866666666666668
recall 0.6866666666666668
f1_measure 0.6866666666666668


In [19]:
# Now that we have built all methods, we can have our training

# First thing is to load the iris dataset from sklearn
iris = datasets.load_iris(as_frame=True)

# Then we get our X and Y
X_iris = pd.DataFrame(data= iris['data'],
                      columns= iris['feature_names'])
y_iris = pd.DataFrame(data= iris['target'],
                      columns= ['target'])

# And then execute it
list_of_trainned_w, metrics = training_k_fold_classifier(kfold=10, 
                                                            X=X_iris, 
                                                            y=y_iris, 
                                                            Nsteps=2000, 
                                                            batch_size=60, 
                                                            n_learning_rate=0.009,
                                                            features_names=iris['feature_names'], 
                                                            classes=[0,1,2],
                                                            normalizing_function=maximum_absolute_scaling_by_column,
                                                            random_state=42) 

Training model:   0%|          | 0/10 [00:00<?, ?it/s]

metricas, precision, recall, f1_measure, acc [0.3333333333333333, 0.3333333333333333, 0.3333333333333333, 0.3333333333333333]
metricas, precision, recall, f1_measure, acc [0.6666666666666666, 0.6666666666666666, 0.6666666666666666, 0.6666666666666666]
metricas, precision, recall, f1_measure, acc [0.5333333333333333, 0.5333333333333333, 0.5333333333333333, 0.5333333333333333]
metricas, precision, recall, f1_measure, acc [0.6666666666666666, 0.6666666666666666, 0.6666666666666666, 0.6666666666666666]
metricas, precision, recall, f1_measure, acc [0.6, 0.6, 0.6, 0.6]
metricas, precision, recall, f1_measure, acc [0.4, 0.4, 0.4000000000000001, 0.4]
metricas, precision, recall, f1_measure, acc [0.8, 0.8, 0.8000000000000002, 0.8]
metricas, precision, recall, f1_measure, acc [0.6666666666666666, 0.6666666666666666, 0.6666666666666666, 0.6666666666666666]
metricas, precision, recall, f1_measure, acc [0.4, 0.4, 0.4000000000000001, 0.4]
metricas, precision, recall, f1_measure, acc [0.6, 0.6, 0.6, 

In [20]:
# Most important part - what are our metrics?
avg_metrics(metrics)

acc 0.5666666666666667
precision 0.5666666666666667
recall 0.5666666666666667
f1_measure 0.5666666666666667


In [15]:
# Now that we have built all methods, we can have our training

# First thing is to load the iris dataset from sklearn
iris = datasets.load_iris(as_frame=True)

# Then we get our X and Y
X_iris = pd.DataFrame(data= iris['data'],
                      columns= iris['feature_names'])
y_iris = pd.DataFrame(data= iris['target'],
                      columns= ['target'])

# And then execute it
list_of_trainned_w, metrics = training_k_fold_classifier(kfold=10, 
                                                            X=X_iris, 
                                                            y=y_iris, 
                                                            Nsteps=2000, 
                                                            batch_size=60, 
                                                            n_learning_rate=0.009,
                                                            features_names=iris['feature_names'], 
                                                            classes=[0,1,2],
                                                            normalizing_function=standard_scaling,
                                                            random_state=42) 

Training model:   0%|          | 0/10 [00:00<?, ?it/s]

Métricas (precision, recall, f1_measure, accuracy): [0.3333333333333333, 0.3333333333333333, 0.3333333333333333, 0.3333333333333333]
Métricas (precision, recall, f1_measure, accuracy): [0.3333333333333333, 0.3333333333333333, 0.3333333333333333, 0.3333333333333333]
Métricas (precision, recall, f1_measure, accuracy): [0.3333333333333333, 0.3333333333333333, 0.3333333333333333, 0.3333333333333333]
Métricas (precision, recall, f1_measure, accuracy): [0.3333333333333333, 0.3333333333333333, 0.3333333333333333, 0.3333333333333333]
Métricas (precision, recall, f1_measure, accuracy): [0.3333333333333333, 0.3333333333333333, 0.3333333333333333, 0.3333333333333333]
Métricas (precision, recall, f1_measure, accuracy): [0.3333333333333333, 0.3333333333333333, 0.3333333333333333, 0.3333333333333333]
Métricas (precision, recall, f1_measure, accuracy): [0.3333333333333333, 0.3333333333333333, 0.3333333333333333, 0.3333333333333333]
Métricas (precision, recall, f1_measure, accuracy): [0.33333333333333

In [16]:
# Most important part - what are our metrics?
avg_metrics(metrics)

acc 0.33333333333333337
precision 0.33333333333333337
recall 0.33333333333333337
f1_measure 0.33333333333333337
