In [None]:
import csv
import time
import re 
from pyarrow import parquet
import pandas as pd
from datetime import datetime
from concurrent.futures import ThreadPoolExecutor
import hostess
from hostess.aws.ec2 import create_launch_template
from hostess.aws.ec2 import Cluster
from hostess.aws.ec2 import ls_instances
from hostess.aws.s3 import Bucket
from hostess.aws.ec2 import Instance, ls_instances
from hostess.serverpool import ServerPool

In [None]:
# SET UP FILES, LOAD FILE NAMES TO RUN 
log_file = f"modelphotom_run.csv"

headers = ["catalog_out", "source_count", "elapsed_time", "name", "ip", "cleaned_err"]

with open(log_file, "x", newline="") as antsonalog:
        writer = csv.writer(antsonalog)
        writer.writerow(headers)
        

In [None]:
log_file = f"modelphotom_run.csv" # if log file already made, don't run above 

df = pd.read_csv("eclipse_leg_band_list.csv")
torun =  list(zip(df['eclipse'], df['leg'], df['band']))

In [None]:
len(torun)

In [None]:
# LAUNCH CLUSTER
bees = Cluster.launch(count=25, template="modelphotom", connect=True)

In [None]:
# GET PRICE
bees.price_per_hour()

In [None]:
# MODIFY MEMORY SETTINGS 
share = bees.command("sudo mount -o remount,size=8G /dev/shm")
share[0].out

In [None]:
# CHECK THAT THAT WORKED 
free = bees.command("df -h /dev/shm")
free[0].out

In [None]:
# MOUNT BUCKET 
c2=bees.command("cd /mnt")
c3=bees.command("sudo rm -r s3")
c4=bees.command("sudo mkdir /mnt/s3")
c5=bees.command("sudo chown -R ubuntu /mnt/s3")
c6=bees.command("goofys backplanetest /mnt/s3")

In [None]:
c6[0].err

In [None]:
# CHECK THAT THAT WORKED 
c2=bees.command("ls /mnt/s3")
c2[0].out

In [None]:
c2 = bees.command("free -h")
c2[0].out

In [None]:
# TELL BEES TO RUN STATS
input_dir = '/mnt/s3/glcat'
# change to test if it's a test
output_dir = '/mnt/s3/glcat'

honey = bees.commandmap(
    [("/home/ubuntu/miniconda3/envs/model/bin/modelphotom", e, leg, band, input_dir, output_dir) for e, leg, band in torun[-3200:]], 
    wait=False, 
    max_concurrent=3,
    task_delay=5.0)

In [None]:
# LOGGING SETUP 
def logger(completed_task, log_file, log_fail):
    # "out:path, sources:25, in 1 sec"
    try:
        output_line = None
        for line in completed_task.out:
            if line.startswith("out:") and "sources:" in line and "in" in line:
                output_line = line
                break
        match = re.search(r"out:(.*?),\s*sources:(\d+),\s*in\s*([\d.]+)\s*sec", output_line)
        catalog_out = match.group(1).strip()
        source_count = int(match.group(2))
        elapsed_time = float(match.group(3))
        ip = bees[completed_task.host].ip
        name = bees[completed_task.host].name
        cleaned_err = "".join(completed_task.err).replace("\\n", "\n").replace("\\t", "\t")
        log_row = [
            catalog_out,
            source_count,
            elapsed_time,
            name,
            ip,
            cleaned_err
        ]
        with open(log_file, "a", newline="") as f:
            writer = csv.writer(f)
            writer.writerow(log_row)
    except Exception as e:
        print(f"Logging failed: {e}")
        print(completed_task.out)
        print(completed_task.err)

        fails = [completed_task.out[-5:], completed_task.err[-5:], completed_task.err, completed_task.out[0:10]]
        with open(log_fail, "a", newline="") as f:
            writer = csv.writer(f)
            writer.writerow(fails)

log_fail = "fail.csv"

def poll_queue(working, callback, poll_rate):
    while len(working.pending) > 0 or not working.completed_queue.empty():
        while not working.completed_queue.empty():
            callback(working.completed_queue.get())
        time.sleep(poll_rate)
        
poll_exc = ThreadPoolExecutor(1)

_polling_future = poll_exc.submit(poll_queue, 
                                  honey,   
                                  lambda item: logger(item, log_file, log_fail),
                                  1)

In [None]:
honey

In [None]:
honey.terminate()

In [None]:
# STOP! ONLY RUN IF YOU WANT IT TO END 

for bee in bees:
    bee.terminate()
    
print([bee.state for bee in bees])

In [None]:
print([bee.state for bee in bees])

In [None]:
print([bee.state for bee in bees])