In [1]:
!pip install forgi



In [2]:
import forgi 
import forgi.graph.bulge_graph as fgb
import os
import shutil 
import pickle 

In [3]:
def main(DBN_folder_path,dest_path,min_length):
  '''
  :DBN_folder_path: the path of the parent file of all DBN target files
                    target DBN files include free energy value
  :dest_path: the path of the parent file of the file that will include all new DBN files
                    new DBN files doesn't include free enrgy value, know as FASTA accoridng to the forgi documentation 
  '''

  #dest_path is the path of the parent folder of the new folder 
  new_DBN_folder_path=copyFolder(DBN_folder_path,dest_path,'_DBNfiles')


  #create folder that will contain all pickled files
  new_folder_name= os.path.basename(DBN_folder_path)+'_pickled'
  data_dir=os.path.join(dest_path,new_folder_name)
  os.mkdir(data_dir)

  ##iterate over new DBN files in order to modify them to *regular* DBN files 
  abs_paths_DBN_files= absoluteFilePaths(new_DBN_folder_path)

  i=0
  for DBN_file_path in abs_paths_DBN_files:
    contig_name = os.path.basename(DBN_file_path)[:-4]
    if contig_name == '.DS_S' or ContigIsSmall(DBN_file_path,min_length):
        continue 
    i+=1
    print(i,'working on ',contig_name)
    free_energy=getFreeEnergy(DBN_file_path) #changes DBN file
    contig=Contig(DBN_file_path,free_energy)
    pickle_path=os.path.join(data_dir,contig_name)
    with open(pickle_path, 'wb') as handle:
       pickle.dump(contig, handle, protocol=pickle.HIGHEST_PROTOCOL)



  return 

In [4]:
class Contig:
  def __init__(self,DBN_file_path,free_energy):
    self.bg, =fgb.BulgeGraph.from_fasta(DBN_file_path)
    self.name = os.path.basename(DBN_file_path)[:-4]
    self.length= self.bg.seq_length
    self.free_energy = float(free_energy)
    self.unpaired_ratio=100*(countUnpaired(DBN_file_path)/(self.length))
    self.stems=Contig.countStems(self.bg)
    self.hairpins=Contig.countHairpins(self.bg)
    self.interior_loops= Contig.countInteriorLoops(self.bg)
    self.multiloops= Contig.countMultiloops(self.bg)


  def countStems(bg):
    cnt=0
    for h in bg.stem_iterator():
      cnt+=1
    return cnt


  def countHairpins(bg):
    cnt=0
    for h in bg.hloop_iterator():
      cnt+=1
    return cnt

  def countInteriorLoops(bg):
    cnt=0
    for i in bg.iloop_iterator():
      cnt+=1
    return cnt

  def countMultiloops(bg):
    junctions=bg.junctions
    reg_multiloops=[]
    cnt=0
    for el in junctions:
      if bg.describe_multiloop(el)=={'regular_multiloop'}:
        cnt +=1
    return cnt 


  def describe_contig(self):
    print('name = ',self.name)
    print('length = ',self.length)
    print('free_energy = ',self.free_energy )
    print('unpaired = ',self.unpaired_ratio)
    print('stems = ',self.stems)
    print('hairpins = ',self.hairpins)
    print('interior_loops = ',self.interior_loops)
    print('multiloops = ',self.multiloops)

In [5]:
def ContigIsSmall(DBN_file_path,min_length):
    with open(DBN_file_path, 'r') as dbn:
        lines = dbn.readlines()
        length= len(lines[1].strip())
    return (length<min_length)


def copyFolder(original_folder_path,destination,suffix):
  new_folder_name= os.path.basename(original_folder_path)+suffix
  new_folder_path=os.path.join(destination,new_folder_name)
  shutil.copytree(original_folder_path, new_folder_path)
  return new_folder_path

def absoluteFilePaths(directory):
  for dirpath,_,filenames in os.walk(directory):
      for f in filenames:
          yield os.path.abspath(os.path.join(dirpath, f))

#returns free energy and !!!updates!!! DBN file to regular DBN file by removing free energy - value from line 2
def getFreeEnergy(DBN_file_path):
  with open(DBN_file_path,'r') as file:
    content = file.readlines()
  DBNstr = ""
  FreeEnergy = ""
  i=-1
  for x in content[2]:
    i +=1
    if x != " ":
      DBNstr+=x
    else:
      break
  for j in range(i+2,len(content[2])-2):
    FreeEnergy += content[2][j]
  content[2]=DBNstr
  with open(DBN_file_path,'w') as file:
    file.writelines(content)
  return FreeEnergy 

def countUnpaired(DBN_file_path):
  with open(DBN_file_path,'r') as file:
    content = file.readlines()
  cnt=0
  for ch in content[2]:
    if (ch =='.'):
      cnt+=1
  return cnt 
