In [10]:
import random
import copy

In [11]:
from IPython.display import HTML
shell = get_ipython()

def adjust_font_size():
    display(HTML('''<style>
        body{
            font-size: 32px;
        }
    '''))

if adjust_font_size not in shell.events.callbacks['pre_execute']:
    shell.events.register('pre_execute', adjust_font_size)

In [119]:
genomesize = 48
DNAcharacters = 'TGCA'
mutationprob = 5
class individual:
    def __init__(self, initial=[]): # constructor - constructs a new individual object
        #print("Creating a new individual")
        self.fitness = 0
        self.genome = []
        for i in range(0,genomesize):
            if initial != []:
                self.genome.append(initial[i])
            else:
                self.genome.append(random.choice(DNAcharacters))
        self.calcFitness()

    def print(self):
        for c in self.genome:
            print(c,end = "")
        print("  fitness:" + str(self.fitness))

    def calcFitness(self):
        self.fitness = 1
        for c in DNAcharacters:
            self.fitness *= 4 * self.getScore(c) / genomesize
    
    def getScore(self, base):
        '''
        Gets the total distance between each pair of the provided base in the genome.
        '''
        dist = 0
        count = 0
        last = -1
        for i, b in enumerate(self.genome):
            if b == base:
                # Don't do the math for the first base found
                if last > -1:
                    dist += i - last
                if b == self.genome[(i + 4) % genomesize]:
                    dist += 1
                last = i
                count += 1
                
        return dist * count
    
    def mutation(self):
        for i in range(0, genomesize, 4):
            # Allow random bases to be replaced if the codon doesn't have all bases
            if not self.check_codon(i):
                for j in range(0, 4):
                    if (random.uniform(0, 100) < mutationprob):
                        new_base = random.choice(DNAcharacters)
                        self.genome[i + j] = new_base if new_base not in self.genome[i:i + 4] else random.choice(DNAcharacters)
            elif (random.uniform(0, 100) < mutationprob):
                # Swap some bases if the codon doesn't match the following one
                if self.genome[i:i + 4] != self.genome[i + 4:i + 8]:
                    j = random.randint(i, i + 2)
                    temp = self.genome[j]
                    self.genome[j] = self.genome[j + 1]
                    self.genome[j + 1] = temp
                # Swap the first 2 if the first base matches the last one of the previous codon
                if self.genome[i] == self.genome[i - 1]:
                    temp = self.genome[i]
                    self.genome[i] = self.genome[i + 1]
                    self.genome[i + 1] = temp
        self.calcFitness()
        
    def check_codon(self, i):
        '''
        Return true if the current and next 3 bases in the genome are all different.
        '''
        for j in range(i, i + 4):
            if DNAcharacters[j - i] not in self.genome[i:i + 4]:
                return False
        return True

    def copy(self, source):
        self.fitness = source.fitness
        for i in range(0,genomesize):
            self.genome[i] = source.genome[i]

   # def __str__(self):
    #    output = "hi "
     #   for c in self.genome:
      #      output = output + c
      #  output += " "
      #  output += "fitness: "
      #  output += str(self.fitness)
      #  return output

popsize = 100
tourn_size = 3

class population:
    def __init__(self): # constructor - constructs a new pop object
        #print("Creating a new population")
        self.avg_fitness = 0
        self.generation = 0
        self.gen_diverged = -1
        self.the_pop = []
        for i in range(0, popsize):
            self.the_pop.append(individual())
        self.best = self.the_pop[0]
        self.worst = self.the_pop[0]
    
    def calcstats(self):
        self.avg_fitness = 0
        self.best = self.the_pop[0]
        self.worst = self.the_pop[0]
        
        for i in self.the_pop:
            self.avg_fitness += i.fitness
            
            if i.fitness < self.worst.fitness:
                self.worst = i
            elif i.fitness > self.best.fitness:
                self.best = i
        self.avg_fitness /= popsize

    def generational(self):
        self.generation += 1
        
        tempPop = population()
        for i in range(0, popsize, 2): #  needs an even pop size
            parent = self.tournament() # select, returns an index
            parent2 = self.tournament() # select, returns an index
            tempPop.the_pop[i].copy(self.the_pop[parent])
            tempPop.the_pop[i+1].copy(self.the_pop[parent2])
            tempPop.crossover(i,i+1)
            tempPop.the_pop[i+1].mutation()
            tempPop.the_pop[i].mutation()
            #tempPop.the_pop[i].calcFitness(), already done in mutation
        #mutate them?
        #when new/temp population is full, copy new/temp pop back into the_pop
        for i in range(0,popsize):
            self.the_pop[i].copy(tempPop.the_pop[i])
        self.calcstats()
    
    def onepoint_crossover(self,p1,p2):
        crossover_point = random.randint(0,genomesize)
        for j in range(crossover_point,genomesize):
            temp = self.the_pop[p1].genome[j] # remember parent 1's character
            self.the_pop[p1].genome[j] = self.the_pop[p2].genome[j]
            self.the_pop[p2].genome[j] = temp

    def crossover(self,p1,p2): # uniform crossover
        for j in range(0,genomesize):
            if random.randint(0,100) < 10: # uniform crossover
                #print(j)
                temp = self.the_pop[p1].genome[j] # remember parent 1's character
                self.the_pop[p1].genome[j] = self.the_pop[p2].genome[j]
                self.the_pop[p2].genome[j] = temp
                # temp = a
                # a = b
                # b = temp
                #self.the_pop[p1].genome[j],self.the_pop[p2].genome[j] = self.the_pop[p2].genome[j],self.the_pop[p1].genome[j]
                # a,b=b,a
                
    def tournament(self):
        best_so_far = random.randint(0,popsize-1)
        best_fitness = self.the_pop[best_so_far].fitness
        #print(best_so_far)
        for i in range(0,tourn_size - 1):
            current = random.randint(0,popsize-1)
            #print(current)
            current_fit = self.the_pop[current].fitness
            if(current_fit > best_fitness):
                best_so_far = current
                best_fitness = current_fit
        return best_so_far


In [25]:
p = population()
p.the_pop[0].print()
p.the_pop[1].print()
p.onepoint_crossover(0,1)
p.the_pop[0].print()
p.the_pop[1].print()

CCAGTCCTGTCCGAGGCCAGTGGCGGGGATCCAGATCGGAGCGGTCCTCA  fitness:2929543.5055104
TCATAGCCGTAGGGAAGTTTCGACGACTTGATTTCTATGCTGTAGTCATA  fitness:3572975.9010815998
CCAGTGCCGTAGGGAAGTTTCGACGACTTGATTTCTATGCTGTAGTCATA  fitness:2929543.5055104
TCATACCTGTCCGAGGCCAGTGGCGGGGATCCAGATCGGAGCGGTCCTCA  fitness:3572975.9010815998


In [96]:
p = population()
for i in range(0, 30):
    print('Generation ', i)
    p.the_pop[0].mutation()
    p.the_pop[0].print()

Generation  0
CATGATCTACGTTACTGCTCCCGTCTTGAATCCATGCTTTTTAAGCAG  fitness:3978000.0
Generation  1
CATGATCTAGCTTACTGCTCCCGTCTTGAATCCATGCGATTTAAGCAG  fitness:4413588.8671875
Generation  2
CTAGATCGAGCTTACTGCTCCCGTCTTGAATGCATGCGATTAAAGCAG  fitness:5021683.333333334
Generation  3
CTAGATCGAGCTTACTGCTCCGGTCTTGAATGCATGCGATTAAGGCAG  fitness:4988888.3701292435
Generation  4
CTAGATCGAGCTAAGTCCTCCAGTCTTGAATGCATGCGATTAAGGCAG  fitness:5126301.73611111
Generation  5
CTAGATCGAGCTAATCCCTCCAGTCTTGAATGCATGCGATTAAGGCAG  fitness:5230920.138888888
Generation  6
CTAGATCGAGCTTATCCCTCCAGTCTTGAATGCATGCGATTAAGGCAG  fitness:5126301.736111111
Generation  7
CTAGATCGAGCTTATCCCTCACGTCTTGAATGCATGCGATTAAGGCAG  fitness:5023775.701388889
Generation  8
CTAGATCGAGCTTATCCCTCACGTCTTGAATGCATGCGATTAACGCAG  fitness:4846685.277777778
Generation  9
CTAGATCGAGCTTATCGCTCACGTCTTGAATGCATGCGATTAACGCAG  fitness:4921249.666666668
Generation  10
CTAGATCGAGCTTATCGCTCACGTCTTGAATGCATGCGATTAGCGCAG  fitness:4720382.333333334
Generation  11
CTAG

In [120]:
# Export data to csv
p = population()
print('General, General, Population 1, Population 1, Population 1, Population 1, Population 2, Population 2, Population 2, Population 2')
print('Generation number, Time since diverging, Averge fitness, Most fit genome, Most fit fitness, Worst fit genome, Average fitness, Most fit genome, Most fit fitness, Worst fit genome')
for i in range(0, 20000):
    p.generational()
print('%d, %d, %.5f, %s, %.5f, %s, %.5f, %s, %.5f, %s' % (p.generation, p.generation - p.gen_diverged, p.avg_fitness, ''.join(p.best.genome), p.best.fitness, ''.join(p.worst.genome), p.avg_fitness, ''.join(p.best.genome), p.best.fitness, ''.join(p.worst.genome)))

General, General, Population 1, Population 1, Population 1, Population 1, Population 2, Population 2, Population 2, Population 2
Generation number, Time since diverging, Averge fitness, Most fit genome, Most fit fitness, Worst fit genome, Average fitness, Most fit genome, Most fit fitness, Worst fit genome
20000, 20001, 8771571.00000, CATGCATGCTGACTGACTGACTGACTGACTGACTGACATGCATGCATG, 8817984.00000, CATGCATGCTGACTGACTGACTGACTGACTGACTGACATGCATGCTAG, 8771571.00000, CATGCATGCTGACTGACTGACTGACTGACTGACTGACATGCATGCATG, 8817984.00000, CATGCATGCTGACTGACTGACTGACTGACTGACTGACATGCATGCTAG


In [115]:
ind = individual(initial=list('GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC'))
ind.fitness

9834496.0