#Environment Description: Sequence Alignment Game

##Representation
Following is the representation of elements in the sequence

*   -1 - Dash (-)
*   1 - Adenine (A)
*   2 - Thymine (T)
*   3 - Cytosine (C)
*   4 - Guanine (G)


##Actions

*   0 - no dash
*   1 - dash on seq1
*   2 - dash on seq2

##Rules

The objective of the sequence alignment game is to get the highest score through making decisions. In this game, there are two sequences of nucleotides (like a four letter alphabet).The game ends when the index reaches the end of the last sequence, and at that point the final scores are tallied.

##Score

Score is computed as follows: for each index in the sequence, 1 is added to the sequence if the two are matching, -1 is added if there is a mismatch, and -2 is added if there is a gap in one of the sequences

##Variables

*   index - represents the current index the game is currently at
*   done - represents whether or not the game is over
*   seq1 - one nucleotide sequence to be aligned
*   seq2 - another nucleotide sequence to be aligned
*   observation - 1x3 array (index, seq1, seq2)

##Methods

*   reset - returns a fresh new game, resetting the index and making new sequences
*   step - given an action to take, returns observation, reward, and whether game is done
*   display - displays the two sequences and color-coded current index
*   randomAction - returns a random integer between 1 and TOTALACTIONS (inclusive), representing a random action
*   replay - replay

In [0]:
import numpy as np
import random
import sys
import time

class SequenceAlignmentEnvironment:
  
  def __init__(self):
    self.SEQUENCELENGTH = 50 #arbitrary length
    self.TOTALACTIONS = 3 #no dash, dash on 1, dash on 2
    self.TOTALOPTIONS = 4 #A, T, C, G

    self.done = False
    self.index = 0
    self.seq1 = np.zeros(2*self.SEQUENCELENGTH)
    self.seq2 = np.zeros(2*self.SEQUENCELENGTH)
    
    self.actions = np.zeros(2*self.SEQUENCELENGTH)
    self.startingseq1 = self.seq1
    self.startingseq2 = self.seq2
  
  def reset(self):
    #sets done to false, index to 0, and creates two new sequences
    self.done = False
    self.index = 0
    self.seq1 = np.zeros(2*self.SEQUENCELENGTH)
    self.seq2 = np.zeros(2*self.SEQUENCELENGTH)
    
    for i in range (self.SEQUENCELENGTH):
      self.seq1[i] = np.random.randint(1, self.TOTALOPTIONS+1)
      self.seq2[i] = np.random.randint(1, self.TOTALOPTIONS+1)
    
    self.startingseq1 = self.seq1
    self.startingseq2 = self.seq2
    
    observation = self.__makeObservation()
    return observation
  
  def step (self, action): 
    if not self.done:
      self.actions[self.index] = action
    
    observation = self.__makeObservation ()

    #if (action == 0):
      #do nothing
    if (action == 1):
      #dash on seq1
      self.seq1 = np.insert (self.seq1, self.index, -1)
      self.seq1 = self.seq1[:-1]
    if (action == 2): #dash on seq2
      self.seq2 = np.insert (self.seq2, self.index, -1)
      self.seq2 = self.seq2[:-1]

    self.index += 1

    new_observation = self.__makeObservation ()

    reward = self.__calculateReward (observation, new_observation)

    threshold = self.seq1.size if self.seq1.size<self.seq2.size else self.seq2.size
    if (self.index>=threshold):
      self.done = True
    return new_observation, reward, self.done
  
  def display (self):
    #displays the two sequences
    color_to_display = ''
    to_print = ''
    
    #printing seq1
    for index in range(2*self.SEQUENCELENGTH):
      element = self.seq1[index]
      if element == 0:
        break
      color_to_display = self.__matchingColor (index)
      to_print += ('\x1b[' + color_to_display +'m' + self.__numberToCode(element) + '\x1b[0m')
    
    to_print += ('\n')
    
    #printing seq2
    for index in range(2*self.SEQUENCELENGTH):
      element = self.seq2[index]
      if element == 0:
        break
      color_to_display = self.__matchingColor (index)
      code = self.__numberToCode(element)
      to_print += ('\x1b[' + color_to_display +'m' + code + '\x1b[0m')
    
    to_print += ('\n')
    num_of_char = len(to_print)
    return (num_of_char, to_print)

  
  def randomAction (self):
    return np.random.randint(0, self.TOTALACTIONS)
  
  def replay (self, display_time = 0.2):
    #replays the game
    if not self.done:
      return
    
    self.seq1 = self.startingseq1
    self.seq2 = self.startingseq2
    self.index = 0
    
    to_display = self.display()
    string = to_display[1]
    length = to_display[0]
    sys.stdout.write(string)
    
    for action in self.actions:
      for _ in range(length):
        sys.stdout.write('\b')
      self.step(action)
      
      to_display = self.display()
      string = to_display[1]
      length = to_display[0]
      sys.stdout.write(string)
      
      time.sleep(display_time)

  def customInput (self, seq1, seq2):
    self.seq1 = seq1
    self.seq2 = seq2
    self.startingseq1 = seq1
    self.startingseq2 = seq2
  
  def __matchingColor (self, index):
    ele1 = self.seq1[index]
    ele2 = self.seq2[index]
    if (self.index == index):
      return '37;40' #current index
    elif (ele1==-1 or ele2==-1):
      return '43' #yellow, one element is a dash
    elif (ele1==ele2):
      return '37;42' #matching elements
    elif (ele1==0 or ele2==0):
      return '37;46' #beyond end of sequence
    else:
      return '37;41' #mismatching elements
    
  
  def __numberToCode (self, number):
    if number == -1:
      return '-'
    if number == 1:
      return 'A'
    if number == 2:
      return 'T'
    if number == 3:
      return 'C'
    if number == 4:
      return 'G'
  
  def __makeObservation (self):
    observation = np.concatenate ((self.seq1, self.seq2))
    observation = np.insert (observation, 2*self.SEQUENCELENGTH, self.index)
    return observation
  
  def __calculateReward (self, oldobservation, newobservation):
    return (self.__score(newobservation) - self.__score (oldobservation))
  
  def __score (self, observation):
    #return score for a given observation
    seqA = observation[:100]
    seqB = observation[101:200]
    score = 0
    smaller = seqA.size if seqA.size<seqB.size else seqB.size
    for i in range (smaller):
      if (seqA[i] == -1 or seqB[i] == -1):
        score += -2
      elif (seqA[i] == seqB[i] and seqA[i] != 0):
        score += 1
      else:
        score += -1
    return score