# Confussion Matrix Classifier


The code aims to elaborate a summary of prediction results on a classification problem by determining the number of TP, FP, FN and TN:

- TP (True Positives): Point is both expected and found
- FN (False Negatives): Point is expected but not found
- FP (False Positives): Point is not in expected but is found
- TN (True Negatives): Point is neither expected nor found (usually undefined)

The number of TP, FP, FN and TN is obtained by comparing the set of expected points and the set of found points.

Aspects to consider when trying to classify into TP, FN, FP

1 - Each expected value corresponds to more than one prediction (Only choose one as a True Positive, the rest must be classified as False Positives)

2 - No predictions found for an expected value

3 - Expected values difference is smaller than the threshold

4 - Found values are within two expected values' threshold

5 - Null values, structural errors (If there are any)

In [1]:
# X axis locations, in milliseconds

found = [1,2,3,11,12,13,21,22,23,61,62,63,131,132,133,2000,48029384] # Predicted Values

expected = [2, 12, 22, 62, 90, 132, 190, 9000] # Expected Values

threshold = 20 


## Pre-processing of found values

If there are any null values, structural errors, unwanted outliers or duplicate values, the code should be able to deal with them.

 - For Null Values, if there are any, the entries could be either removed or substituted with sensible values.
 - The structural errors, probably dealt with within the prediction algorithm, must be fixed.
 - In this case, unwanted outliers or duplicate values should not be eliminated, as we are evaluating the prediction model.

## Define Limits

Create a list of limits to examine, for each found value, whether the value is an exact expected value, a non-expected value within the threshold (within limits), a non-expected value outside the threshold (outside the limits), or a missing value that is expected but not found.


In [2]:
import itertools
limits = []

for i in range(0,len(expected)):

    if i+1 < len(expected):

        if expected[i+1] - expected[i] < threshold:
            limits.append(expected[i] + ((expected[i+1]-expected[i])/2))
            limits.append(expected[i] + ((expected[i+1]-expected[i])/2))

        elif expected[i+1] - expected[i] < (threshold*2) and expected[i+1] - expected[i] > threshold:
            limits.append(expected[i] + threshold - (((expected[i]+threshold)-(expected[i+1]-threshold))/2))
            limits.append(expected[i] + threshold - (((expected[i]+threshold)-(expected[i+1]-threshold))/2))

        elif expected[i+1] - expected[i] > threshold:
            limits.append(expected[i]+threshold)
            limits.append(expected[i+1]-threshold)
      

# Concatenate Limits (Account for threshold in initial and final expected values)

limits_to_concatenate = [[expected[0]-threshold], limits, [expected[-1]+threshold]]
limits = list(itertools.chain.from_iterable(limits_to_concatenate))


print(limits)

[-18, 7.0, 7.0, 17.0, 17.0, 42, 42, 76.0, 76.0, 110, 112, 152, 170, 210, 8980, 9020]


In [3]:
import numpy as np

True_Positive = [] # Prediction expected 

False_Positive = [] # Not expected but found

False_Negative = [] # Prediction expected but not found

Reverse_False_Negative = [0]*len(expected)

counter = 0

Test = []


# Function to later choose the one True Positive from the range of found values within the threshold

def closest_value(input_list, input_value):
 
  arr = np.asarray(input_list)
 
  i = (np.abs(arr - input_value)).argmin()
 
  return arr[i]


for i in range(0,len(limits),2):
    
    for j in range(counter,len(found)):   

        # If prediction is expected or within threshold from expected 
        if limits[i] <= found[j] < limits[i+1] or found[j] == expected[i//2]:
            Test.append(found[j])

        # If prediction is expected and found take record
        if (limits[i] < found[j] < limits[i+1]) == True:
            Reverse_False_Negative[i//2] = 1

        # If prediction reaches out the limits, select the closest value to the expected value and store it as True Positive
        if found[j] > limits[i+1] and len(Test) > 0:
            TruePos = closest_value(Test,expected[i//2])
            True_Positive.append(TruePos)
            counter = j
            Test.clear()
            continue

        if found[j] < limits[i+1]:
            counter = j
            continue
        

expectedvalues = [True_Positive, False_Positive]
expectedvalues = list(itertools.chain.from_iterable(expectedvalues))

for j in range(0,len(found)): 

    # If prediction is not expected but found
    if found[j] not in expectedvalues:
        False_Positive.append(found[j])


In [4]:
def find_indices(List, Item):
    indices = []
    for idx, value in enumerate(List):
        if value == Item:
            indices.append(idx)
    return indices

In [5]:
Index = find_indices(Reverse_False_Negative, 0)

for i in range(0,len(Index)):
    False_Negative.append(expected[Index[i]])


In [6]:
print(False_Negative)

print(True_Positive)

print(False_Positive)

[90, 190, 9000]
[2, 12, 22, 62, 132]
[1, 3, 11, 13, 21, 23, 61, 63, 131, 133, 2000, 48029384]


# Classification Function (All in a Single Function)

In [7]:
def analyse_results(expected, found, threshold=20):
  """
    Allows comparing two sets of points (found vs expected) so we can easily calculate
    algorithm accuracy. Two points are considered to be the same if they are within the
    given threshold, and no other point is already within that threshold.
    :param found: List of indexes found by the algorithm
    :param expected: List of indexes expected to be found
    :param threshold: Maximum threshold to either side (so found point has to be within +- the threshold)
    :return: Dictionary with the relevant statistics:
              - TP (True Positives): Point is in both expected and found
              - FN (False Negatives): Point is in expected, but not in found
              - FP (False Positives): Point is not in expected, but is in found
              - TN (True Negatives): Point is in neither (usually undefined)
  """
  
  import itertools
  limits = []

  for i in range(0,len(expected)):

    if i+1 < len(expected):

        if expected[i+1] - expected[i] < threshold:
            limits.append(expected[i] + ((expected[i+1]-expected[i])/2))
            limits.append(expected[i] + ((expected[i+1]-expected[i])/2))

        elif expected[i+1] - expected[i] < (threshold*2) and expected[i+1] - expected[i] > threshold:
            limits.append(expected[i] + threshold - (((expected[i]+threshold)-(expected[i+1]-threshold))/2))
            limits.append(expected[i] + threshold - (((expected[i]+threshold)-(expected[i+1]-threshold))/2))

        elif expected[i+1] - expected[i] > threshold:
            limits.append(expected[i]+threshold)
            limits.append(expected[i+1]-threshold)
      

  # Concatenate Limits (Account for threshold in initial and final expected values)

  limits_to_concatenate = [[expected[0]-threshold], limits, [expected[-1]+threshold]]
  limits = list(itertools.chain.from_iterable(limits_to_concatenate))

  # Allocate Variables
  
  False_Positive = [] # Not expected but found within the threshold

  True_Positive = [] # Prediction expected 

  False_Positive_ = [] # Not expected but found

  False_Negative = [] # Prediction expected but not found

  Reverse_False_Negative = [0]*len(expected)

  counter = 0

  Test = []
  

  # Function to later choose the one True Positive from the range of found values within the threshold

  def closest_value(input_list, input_value):
 
    arr = np.asarray(input_list)
 
    i = (np.abs(arr - input_value)).argmin()
 
    return arr[i]



  for i in range(0,len(limits),2):
    
    for j in range(counter,len(found)):   

        # If prediction is within threshold from expected 
        if limits[i] <= found[j] < limits[i+1] or found[j]== expected[i//2]:
            Test.append(found[j])

        # If prediction is expected and found take record
        if (limits[i] < found[j] < limits[i+1]) == True:
            Reverse_False_Negative[i//2] = 1

        # If prediction reaches out the limits, select the closest value to the expected value and store it as True Positive
        if found[j] > limits[i+1] and len(Test) > 0:
            TruePos = closest_value(Test,expected[i//2])
            True_Positive.append(TruePos)
            counter = j
            Test.clear()
            continue

        if found[j] < limits[i+1]:
            counter = j
            continue


  expectedvalues = [True_Positive, False_Positive]
  expectedvalues = list(itertools.chain.from_iterable(expectedvalues))


  for j in range(0,len(found)): 

    # If prediction is not expected but found
    if found[j] not in expectedvalues:
        False_Positive.append(found[j])


  def find_indices(List, Item):
    indices = []
    for idx, value in enumerate(List):
        if value == Item:
            indices.append(idx)
    return indices
  
  Index = find_indices(Reverse_False_Negative, 0)


  for i in range(0,len(Index)):
    False_Negative.append(expected[Index[i]])
  
  return {
   "True Positives: " + str(len(True_Positive)),  # expected + found 
   "False Positives: " + str(len(False_Positive)), # not expected + extra ones found within the threshold
   "False Negatives: " + str(len(False_Negative)), # expected + not found
  }



In [8]:
# Call function

analyse_results(expected, found, threshold)

{'False Negatives: 3', 'False Positives: 12', 'True Positives: 5'}