<a href="https://colab.research.google.com/github/EnzoAndree/ColabBEAST/blob/main/BEAST2_old.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# BEAST2

Notebook Version = 1.0.0b

To report issues, comments or improvements you can do it in the [EnzoAndree/ColabBEAST](https://github.com/EnzoAndree/ColabBEAST) repository.

Made with ❤️ by [@eguerreroaraya](https://twitter.com/eguerreroaraya)

In [None]:
#@title Check Nvidia GPU
import ipywidgets as widgets
import pandas as pd
import psutil
from time import sleep, time
from bokeh.resources import INLINE
from bokeh.plotting import figure, show
from bokeh.io import output_notebook, push_notebook, curdoc
from bokeh.themes import built_in_themes
from bokeh.models import BasicTickFormatter, Legend, NumeralTickFormatter
from pathlib import Path
import shlex, subprocess

def get_compute(name):
  if 'A100' in name:
    return '80'
  elif 'V100' in name:
    return '70'
  elif 'P4' in name:
    return '61'
  elif 'T4' in name:
    return '75'
  elif 'P100' in name:
    return '60'
  elif 'K80' in name:
    return '37'
def get_packagemanager_list(output):
  lines = outpack[4:]
  listofpack = []
  for package in lines:
    listofpack.append(package.split()[0])
  return listofpack

!nvidia-smi
Gcardinfo = !nvidia-smi --query-gpu=gpu_name,memory.total --format=csv,noheader,nounits
Gcardname, Gcardram = Gcardinfo[0].split(', ')

In [None]:
%%capture
#@title Install BEAST2 and Beagle
checkinstall = Path('./BEAST2Beagle_READY')
if not checkinstall.is_file():
    !wget https://github.com/CompEvol/beast2/releases/download/v2.6.6/BEAST.v2.6.6.Linux.tgz
    !tar -zxvf BEAST.v2.6.6.Linux.tgz
    !rm -fr BEAST.v2.6.6.Linux.tgz
    %cd /content
    !apt-get install build-essential autoconf automake libtool git pkg-config
    !wget https://github.com/beagle-dev/beagle-lib/archive/refs/tags/v3.1.2.tar.gz
    !tar -zxvf v3.1.2.tar.gz
    %cd beagle-lib-3.1.2
    # http://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/
    # Nvidia A100 compute_80
    # Nvidia V100 compute_70
    # Nvidia P4 compute_61
    # Nvidia T4 compute_75
    # Nvidia P100 compute_60
    # Nvidia K80 compute_37
    !sed -i 's/-arch compute_30/-gencode=arch=compute_{get_compute(Gcardname)},code=sm_{get_compute(Gcardname)}/' configure.ac
    !./autogen.sh
    !./configure --prefix=$HOME
    !make install
    !make check
    %env LD_LIBRARY_PATH=/usr/lib64-nvidia:/root/lib
    %cd /content
    !touch BEAST2Beagle_READY

In [None]:
#@title Install BEAST2 modules
# !./beast/bin/packagemanager -add CoupledMCMC
# !./beast/bin/packagemanager -add bacter
# !./beast/bin/packagemanager -add NS
outpack = !./beast/bin/packagemanager -list
modlist = get_packagemanager_list(outpack)
typocheck = {x.lower(): x for x in modlist}
modules = '' #@param {type:"string"}
#@markdown - `modules` Specify the extra modules to be installed separated by commas. Leave it blank if you do not need extra modules.
#@markdown  - Use `!./beast/bin/packagemanager -list` to get a list of modules availables. 

to_install = []
if modules != '':
  errorfound = False
  modules = modules.split(',')
  modules = [m.strip() for m in modules]
  for m in modules:
    if m.lower() in typocheck.keys():
      to_install.append(typocheck[m.lower()])
    else:
      errorfound = True
      print(f'{m} is not found in the modulule list {modlist}')
      break
  if not errorfound:
    print(f'This modules will be installed: {to_install}')
    for m in to_install:
      !./beast/bin/packagemanager -add {m}
  


In [None]:
#@title Check Beagle resources
!./beast/bin/beast -beagle_info 

In [None]:
#@title Run BEAST2
curdoc().theme = 'dark_minimal'

Path_to_XML = 'beast/examples/testGTR.xml' #@param {type:"string"}
upload_custom_XML = False #@param {type:"boolean"}
job_name = '4MCtest' #@param {type:"string"}
parallel_jobs =  1 #@param {type:"integer"}
resume = False #@param {type:"boolean"}
overwrite = True #@param {type:"boolean"}
beagle = True #@param {type:"boolean"}
beagle_GPU = True #@param {type:"boolean"}
beagle_double = True #@param {type:"boolean"}

if upload_custom_XML:
  print(f'upload custom XML')
  XML = files.upload()
  Path_to_XML = str(list(XML.keys())[0])
  # print(XML[list(XML.keys())[0]], file=open(list(XML.keys())[0], 'w'))



# Make directories
output_dirs = []
pids = []
procs = []
for run in range(1, parallel_jobs+1):
  name = f'{job_name}_MC{run}_'
  output_dirs.append(name)
  # Make CLI
  cli = './beast/bin/beast'
  if resume:
    cli += ' -resume'
  if overwrite:
    cli += ' -overwrite'
  if beagle:
    cli += ' -beagle'
  if beagle_GPU:
    cli += ' -beagle_GPU'
  if beagle_double:
    cli += ' -beagle_double'
  cli += f' -prefix {job_name}_MC{run}_ {Path_to_XML}'
  print(cli)
  stdout = open(f'{job_name}_MC{run}.out.logger', 'wb')
  stderr = open(f'{job_name}_MC{run}.err.logger', 'wb')
  proc = subprocess.Popen(shlex.split(cli), stdout=stdout, stderr=stderr)
  pids.append(proc.pid)
  procs.append(psutil.Process(proc.pid))


print(f'Running {parallel_jobs} BEAST: {output_dirs}; PID: {pids}')
#print('Please wait 3 seconds')
#sleep(3)

In [None]:
#@title Show the chain logs
colormap = ['#D9598C', '#F1D2E7', '#F3AA51', '#FCF695', '#567ACE', '#B7D3E9', 
            '#BBB0DC', '#DB706C', '#F1C3AA', '#CEE5D5', '#A7E0E1', '#A0522D', 
            '#145250', '#4B0299']

def is_log_ready(logname):
  try:
    df = pd.read_csv(logname, sep='\t',comment='#', index_col=0)
    if len(df) > 4:
      return True
    else:
      return False
  except Exception as e:
    return False
  return False

def find_logs(jobname):
  all_logs = False
  while not all_logs:
    logs = []
    expected_logs = int(parallel_jobs)
    # print(expected_logs)
    for run in range(1, parallel_jobs+1):
      name = f'{job_name}_MC{run}_'
      wkd = Path('./')
      try:
        # print(list(wkd.glob(f'{name}*.log'))[0])
        logs.append(list(wkd.glob(f'{name}*.log'))[0])
      except Exception as e:
        continue
    if expected_logs == len(logs):
      all_logs = True
  # print(logs)
  return logs
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import display, clear_output
%config InlineBackend.figure_format = 'retina'
# matplotlib.rcParams['figure.figsize'] = (13,8)

plt.style.use('dark_background')
fig = plt.figure(figsize=(13,8))
ax = fig.add_subplot(1, 1, 1) 
ax.set_title('BEAST')
pidsrunning = True
pidiszombie = False
logs = find_logs(job_name)
# print(logs)
# Check if all logs are populated
populatedlog = False
while not populatedlog:
  all_log_ready = [is_log_ready(l) for l in logs]
  populatedlog = all(all_log_ready)
zeroindexcol = ''
for i, log in enumerate(logs):
  df = pd.read_csv(log, sep='\t',comment='#', index_col=0)
  burning = round(len(df)*0.10)
  zeroindexcol = str(df.columns[0])
  plt.plot(df.index[burning:], 
           df[df.columns[0]].iloc[burning:], 
           label=log.name,
           color=colormap[i],
           linewidth=2)
si = 0
ti = time()
speed = 0
while not pidiszombie:
  pidsrunning = any(psutil.pid_exists(p) for p in pids)
  pidiszombie = all(p.status() == psutil.STATUS_ZOMBIE for p in procs)
  ax.cla()
  for i, log in enumerate(logs):
    df = pd.read_csv(log.name, sep='\t',comment='#', index_col=0)
    sf = int(df.index[-1])
    tf = time()
    burning = round(len(df)*0.10)
    # print(df.index[burning:])
    ax.plot(df.index[burning:], 
            df[df.columns[0]].iloc[burning:], 
            label=log.name,
            color=colormap[i],
            linewidth=2)
  ax.legend(bbox_to_anchor=(1.04,1), borderaxespad=0)
  # ax.set_title(f'BEAST {zeroindexcol} (current speed: {speed:.1f} {timescale}/1M states)')
  deltastate = sf - si
  deltatime = (tf - ti)/60 # min
  timescale = 'Min'
  # print(deltatime)
  if deltastate != 0:
    if deltatime/deltastate*1_000_000 < 1:
      deltatime *= 60
      timescale = 'Sec'
    speed = deltatime/deltastate*1_000_000
    si = sf
    ti = tf
    ax.set_title(f'BEAST {zeroindexcol} (current speed: {speed:.1f} {timescale}/1M states)')   
  display(fig)
  clear_output(wait = True)
  plt.pause(1)


In [None]:
#@title Download results
from google.colab import files
wkd = Path('./')
outputfiles = wkd.glob(f'{job_name}*')
to_download = ' '.join([x.name for x in list(outputfiles) if not x.name.endswith('.zip')])

!zip -FSr $job_name'.zip' $to_download
files.download(f'{job_name}.zip')