In [3]:

import pandas as pd
import numpy as np
#import prospr as pr
from matplotlib import pyplot as plt
from matplotlib.ticker import MaxNLocator
from matplotlib.lines import Line2D
import seaborn as sns
import random
import csv
from collections import Counter
import itertools


In [4]:
class Protein:
  def __init__(self, sequence , folds=[]):
    self.sequence = sequence
    self.hash_fold = folds
    self.loc_list = []
    self.score = (0,0)
    self.collisions = 0
    self.bonds = 0

    self.update_loc_list()
    self.update_score()

  def update_loc_list(self):

    self.loc_list = []
    moves = self.hash_fold

    # Setup storage of the amino positions.
    cur_pos = [0,0]
    self.loc_list.append((0,0))
    # Save the coordinates of the aminos in order.
    for move in moves:
        cur_pos[abs(move) - 1] += move // abs(move)
        self.loc_list.append(tuple(cur_pos))

    pass

  def random_fold(self):
    self.hash_fold = []
    for _ in range(len(self.sequence)-1) :
        direction = np.random.choice([-2, -1, 1, 2])
        self.hash_fold.append(direction)
    self.update_loc_list()
    pass

  def set_hash(self, index, direction, update = True):
    self.hash_fold[index] = direction

    if update:
      self.update_loc_list()
      self.update_score()
    pass

  def mutate(self):
    index = np.random.randint(0,len(self.hash_fold))
    new_number = np.random.choice([-2, -1, 1, 2])
    self.hash_fold[index] = new_number

    self.update_loc_list()
    self.update_score()
    pass

  def remove_back(self):
    for i in range(1, len(self.hash_fold)):
        # Check if the current element is the negative of the previous element
        if self.hash_fold[i] == -self.hash_fold[i - 1]:
            # Modify the current element to a different value from the possible options
            while True:
                new_value = np.random.choice([-2, -1, 1, 2])
                if new_value != -self.hash_fold[i - 1]:
                    self.hash_fold[i] = new_value
                    break
    self.update_loc_list()
    pass

  def get_bonds(self):
    bonds = 0
    for i, loc in enumerate(self.loc_list):
      if self.sequence[i] == 'H':
         x,y = loc
         directions = [(1,0), (-1,0), (0,1), (0,-1)]
         neighbours = [(x + dx, y + dy) for dx, dy in directions]
         for neighbour in neighbours:
            if self.find_neighbours(neighbour, self.sequence[i+2:] ,self.loc_list[i+2:]):
               bonds += 1
    self.bonds = bonds
    return bonds

  def find_neighbours(self, loc, changed_seq, changed_loc):
    aminos = []
    for i, item in enumerate(changed_loc):
      if item == loc:
        if changed_seq[i] == 'H':
          aminos.append(i)
    if len(aminos) > 0:
       return True
    else:
       return False

  def get_pairs(self):
    pairs = []
    for i, loc in enumerate(self.loc_list):
      if self.sequence[i] == 'H':
         x,y = loc
         directions = [(1,0), (-1,0), (0,1), (0,-1)]
         neighbours = [(x + dx, y + dy) for dx, dy in directions]
         for neighbour in neighbours:
            if self.find_neighbours(neighbour, self.sequence[i+2:], self.loc_list[i+2:]):
               pairs.append((loc,neighbour))
    return pairs

  def get_collision(self):
    collision_count = 0
    visited_positions = set()

    for coordinate in self.loc_list:
        if coordinate in visited_positions:
            collision_count += 1
        else:
            visited_positions.add(coordinate)

    self.collisions = collision_count
    return collision_count

  def update_score(self):
    self.get_collision()
    self.get_bonds()
    self.score = (self.bonds, self.collisions)
    pass

  def __str__(self):
    return f"Protein: {self.name}\nSequence: {self.sequence}"



In [5]:
def get_ordered_positions(protein):
    """
    Fetch the positions of the currently folded amino chain in order of
    placement.
    """
    # Fetch done moves and the aminos used so far.
    moves = protein.hash_fold
    aminos = protein.sequence[: len(moves) + 1]

    # Setup storage of the amino positions.
    cur_pos = np.array([0 for _ in range(2)], dtype=np.int64)
    positions = np.array([*cur_pos, aminos[0]])

    # Save the coordinates of the aminos in order.
    for amino, move in zip(aminos[1:], moves):
        cur_pos[abs(move) - 1] += move // abs(move)
        positions = np.vstack((positions, np.array([*cur_pos, amino])))

    return positions


In [6]:
def hc_c(protein, max_iterations):
    data = []
    current_protein = Protein(protein.sequence, folds=list(protein.hash_fold))
    best_fitness = current_protein.get_collision()
    best_solution = current_protein.hash_fold
    data.append([0,best_fitness])

    for _ in range(max_iterations):
        mutated_protein = Protein(current_protein.sequence, folds=list(current_protein.hash_fold))
        mutated_protein.mutate()

        if mutated_protein.collisions <= best_fitness:
            current_protein = Protein(mutated_protein.sequence, folds=list(mutated_protein.hash_fold))
            best_solution = current_protein.hash_fold
            best_fitness = current_protein.collisions
            data.append([_, best_fitness])

        #if _ % 100 == 0:
        #   print("Interation: " + str(_))

    return current_protein, best_solution, best_fitness, data

In [7]:
def hc_c_ffa(protein, max_iterations):
    data = []
    fitness_dict = {}
    current_protein = Protein(protein.sequence, folds=list(protein.hash_fold))
    best_fitness = current_protein.get_collision()
    current_fitness = current_protein.get_collision()
    best_solution = current_protein.hash_fold
    data.append([0, current_fitness ,best_fitness])
    fitness_dict.setdefault(current_fitness, 1)

    for _ in range(max_iterations):
        mutated_protein = Protein(current_protein.sequence, folds=list(current_protein.hash_fold))
        mutated_protein.mutate()
        if mutated_protein.collisions in fitness_dict:
            fitness_dict[mutated_protein.collisions] += 1
        else:
            fitness_dict.setdefault(mutated_protein.collisions, 1)
        fitness_dict[current_fitness] += 1


        if fitness_dict[mutated_protein.collisions] <= fitness_dict[current_fitness]:
            current_protein = Protein(mutated_protein.sequence, folds=list(mutated_protein.hash_fold))
            current_fitness = current_protein.collisions

            if current_fitness <= best_fitness:
                best_solution = current_protein.hash_fold
                best_fitness = current_protein.collisions 
            
            data.append([_, current_fitness ,best_fitness])

        #if _ % 100 == 0:
        #   print("Interation: " + str(_))

    return current_protein, best_solution, best_fitness, data

In [8]:
def random_sequence(length, HP = 0.5):
    sequence = ""
    for i in range(length):
        if random.random() < HP:
            sequence += "P"
        else:
            sequence += "H"
    return sequence

In [9]:
results = []
resultsFFA = []
for i in range (50, 201, 25):
    iter_results = []
    iter_resultsFFA = []
    for j in range (3):
        sequence = random_sequence(i, HP = 0.0)
        protein = Protein(sequence)
        protein.random_fold()

        current_protein, best_solution, best_fitness, data = hc_c(protein, 100000)
        iter_results.append([current_protein, best_solution, best_fitness, data])

        current_protein, best_solution, best_fitness, data = hc_c_ffa(protein, 100000)
        iter_resultsFFA.append([current_protein, best_solution, best_fitness, data])

        print(str(i) + " iter " + str(j))

    results.append([i, iter_results])
    resultsFFA.append([i, iter_resultsFFA])