<a href="https://colab.research.google.com/github/HunterAshby/FetchRewards/blob/main/Fetch_Rewards_Exercise.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

##Fetch Rewards NLP Exercise without using libraries.

In [274]:
#Loading given samples into workable variables
sample_one = "The easiest way to earn points with Fetch Rewards is to just shop for the products you already love. If you have any participating brands on your receipt, you'll get points based on the cost of the products. You don't need to clip any coupons or scan individual barcodes. Just scan each grocery receipt after you shop and we'll find the savings for you."
sample_two = "The easiest way to earn points with Fetch Rewards is to just shop for the items you already buy. If you have any eligible brands on your receipt, you will get points based on the total cost of the products. You do not need to cut out any coupons or scan individual UPCs. Just scan your receipt after you check out and we will find the savings for you."
sample_three = "We are always looking for opportunities for you to earn more points, which is why we also give you a selection of Special Offers. These Special Offers are opportunities to earn bonus points on top of the regular points you earn every time you purchase a participating brand. No need to pre-select these offers, we'll give you the points whether or not you knew about the offer. We just think it is easier that way."

In [275]:
#Build a functions that can first compare 
#two given words output 0-1 1 if exact match
def compare_word_similarity(word_one: str, word_two: str) -> float:
  """
  Input: Two words that need to be compared for similarity

  Output: A score between 0 and 1 to show the similarity of the words

  The function works by comparing the length of the words and the letters
  used in the words. The length of the words is worth 50% and the letters are
  worth the remaining 50%
  """

  #Find the length difference in words using a percentage,
  #Shorter word divided by longer word will be the standard
  word_list = [word_one, word_two]
  if len(word_two) > len(word_one):
    word_list = [word_two, word_one]

  length_score = len(word_list[1]) / len(word_list[0])

  #Now we need to find the letter similarity score
  #First check if the words are equal
  if word_one == word_two:
    letter_score = 1

  #If not we move on to checking all the letters themselves
  else:
    #intantiate a dictionary to store letter values
    letters = {}
    for word in word_list:
      #loop over words checking if the letters have already been counted, and
      #counting the ones that have yet to be counted
      for letter in word:
        if letter not in letters.keys():
          letters[letter] = word.count(letter)
        #Once on the second word, subtract the letters that appear in both the 
        #first word and the second word to get a similiarity
        elif word_list.index(word) != 0:
          letters[letter] = letters[letter] - word.count(letter)
    #Sum up the letter differences
    letter_diff = sum(letters.values())

    #Turn the letter simiarity into a value between 0 and 1
    letter_diff = letter_diff / (len(word_one) + len(word_two))

    #Find the inverse of the letter score to make the numbers right
    letter_score = 1 - letter_diff

    #Combine the two scores to get a similarity score for the two words
  similarity_score = (50 * length_score) + (50 * letter_score)
  return similarity_score / 100

##We have a word comparing machine that works, now we need to preprocess the samples and then pass in pairs of words at a time

In [276]:
def remove_punctuation(string: str) -> str:
  """
  Input: string that needs it's punctuation removed

  Output: string with its punctuation removed
  """
  punc = '''!()-[]{};:'"\,<>./?@#$%^&*_~'''
  for letter in string:
    if letter in punc:
      string = string.replace(letter, "")
  return string

In [277]:
def make_list_chunks(lst: list, size: int) -> list:
  """
  Input: list that needs to be split into chunks of n size

  Output: A list of lists made from input list
  """
  for i in range(0, len(lst), size):
    yield lst[i:i + size]
  

In [278]:
def turn_chunks_into_str(lst: list) -> list:
  """
  Input: a list that has been chunked into sizes

  Output: a list where the chunks have been turned into
  consecutive strings
  """
  return_list = []
  for i in lst:
    str_i = str(i)
    str_i = remove_punctuation(str_i)
    return_list.append(str_i)
  return return_list

In [279]:
def compare_phrases(phrase_one: str, phrase_two: str) -> float:
  """
  Input: Two phrases you want to compare

  Output: A value between 0 and 1 comparing the similarity of the phrases

  Using the word similarity function above, we do preprocessing and compare 
  entire phases with this function, we will compare word similarity and length
  similarity using a 50 50 split for the scores
  """
  #First do basic preprocessing, remove punctuation lowercase all letters, that
  #sort of thing
  phrase_one = phrase_one.lower()
  phrase_two = phrase_two.lower()
  phrase_one = remove_punctuation(phrase_one)
  phrase_two = remove_punctuation(phrase_two)

  #Transform phrases into lists for comparison
  list_one = phrase_one.split()
  list_two = phrase_two.split()

  #Find the length score
  if len(list_one) <= len(list_two):
    length_score = len(list_one) / len(list_two)

  elif len(list_one) > len(list_two):
    length_score = len(list_two) / len(list_one)
  
  #Find the word score
  #I am going to take groups of words and compare them, as small changes in
  #sentence structure could place words in a bad spot to be compared for example
  #compering your with reciept, when both words are in the samples
  list_one = list(make_list_chunks(list_one, 1))
  list_two = list(make_list_chunks(list_two, 1))
  list_one = turn_chunks_into_str(list_one)
  list_two = turn_chunks_into_str(list_two)
  
  #Set lists to same length
  if len(list_one) < len(list_two):
    for i in range(len(list_two) - len(list_one)):
      list_one.append("")

  elif len(list_one) > len(list_two):
    for i in range(len(list_one) - len(list_two)):
      list_two.append("")
  
  #Now these two lists can have their words passed into the word compare machine
  running_word_score = 0
  for e, i in enumerate(list_one):
    running_word_score += compare_word_similarity(i, list_two[e])
  
  word_score = running_word_score / len(list_one)

  similarity_score = (word_score * 50) + (length_score * 50)
  return similarity_score / 100

In [280]:
compare_phrases(sample_one, sample_two)

0.7717423880467359

In [281]:
compare_phrases(sample_one, sample_three)

0.6215546207032275