In [1]:
%load_ext autoreload
%autoreload 2

# UNDER CONSTRUCTION 
# Check run completion

This clode checks if a run is completed by comparing the unique point numbers in the output files to the initial pointlist (pointlist_1.txt). If it is not completed, it outputs a list of points that still need to be run and optionally creates a pointlist textfile with those numbers. It also checks to make sure the files that were generated are approximately the correct size (e.g., were not corrupted during writing).

In [2]:
from check_run import *

import numpy as np
import os
from glob import glob
from collections import Counter

### Load input pointlist and make a pointlist from all output points

In [3]:
project_name = "run_FGRN055-era055_1939-2023"
project_dir = f"/home/nld4814/scratch/{project_name}/"

output_1D = glob(f'{project_dir}output/*1D*')
pointlist_1D = np.sort([int(point[len(f"{project_dir}output/FGRN055_era055_1D_"):-3]) for point in output_1D])
output_2D = glob(f'{project_dir}output/*2D_*')
pointlist_2D = np.sort([int(point[len(f"{project_dir}output/FGRN055_era055_2D_"):-3]) for point in output_2D])
output_2Ddet = glob(f'{project_dir}output/*2Ddetail*')
pointlist_2Ddet = np.sort([int(point[len(f"{project_dir}output/FGRN055_era055_2Ddetail_"):-3]) for point in output_2Ddet])

input_pointlist = set(np.genfromtxt(f"{project_dir}pointlist_1.txt",dtype=int))


### Checks that all points that were run output 1D, 2D, and 2Ddetail files
If any points did not output all files, they are saved in the corresponding lists

In [4]:
print("Do any points not have both 1D and 2D outputs?")
if any(set(pointlist_1D) ^ set(pointlist_2D)):
    print("Not all points output all files")
    diff_1D_2D = set(pointlist_1D) ^ set(pointlist_2D)
else:
    diff_1D_2D = None
    print("Nope! Pointlists match!")

print("Do any points not have both 1D and 2Ddetail outputs?")
if any(set(pointlist_1D) ^ set(pointlist_2Ddet)):
    print("Not all points output all files")
    diff_1D_2Ddet = set(pointlist_1D) ^ set(pointlist_2Ddet)
else:
    diff_1D_2Ddet = None
    print("Nope! Pointlists match!")

print("Do any points not have both 2D and 2Ddetail outputs?")
if any(set(pointlist_2D) ^ set(pointlist_2Ddet)):
    print("Not all points output all files")
    diff_2D_2Ddet = set(pointlist_2D) ^ set(pointlist_2Ddet)
else:
    diff_2D_2Ddet = None
    print("Nope! Pointlists match!")


Do any points not have both 1D and 2D outputs?
Nope! Pointlists match!
Do any points not have both 1D and 2Ddetail outputs?
Not all points output all files
Do any points not have both 2D and 2Ddetail outputs?
Not all points output all files


### Check  that file sizes are the same

In [None]:
# Takes a long time!
def check_filesize(filelist,output_type):

    """
        filelist (list): list of 1D, 2D or 2D detail output files
        output_type (str): "1D", "2D", or "2D detail" (just for printing, no dependencies)
    """
    
    print(f"Working on {output_type}")
    sizes = [os.path.getsize(f) for f in filelist]

    print(f"Are all the {output_type} files the same size?")
    if (len(set(sizes)) == 1):
        print("Yes!")

        return None

    else:
        most_common_size = Counter(sizes).most_common(1)[0][0]
        outlier_files = [f for f, s in zip(filelist, sizes) if s != most_common_size]
        
        return outlier_files


#outlier_1D = check_filesize(output_1D, "1Dc")
#outlier_2D = check_filesize(output_2D, "2D")
#outlier_2Ddet = check_filesize(output_2Ddet, "2Ddet")


Working on 1D
Are all the 1D files the same size?
Yes!
Working on 2D
Are all the 2D files the same size?
Yes!
Working on 2Ddet


### Check if any points that should have run did not run, and outputs a list

In [5]:
# all points that didn't have all output written will have to be re-run
try:
    incomplete_pointlist = diff_1D_2D | diff_1D_2Ddet | diff_2D_2Ddet 
except:
    incomplete_pointlist = set()

# create final output pointlist (all output points minus those that didn't save out correctly)
output_pointlist = set(pointlist_1D) | set(pointlist_2D) | set(pointlist_2Ddet)
output_pointlist = set(pointlist_1D) - incomplete_pointlist

print("Did all points run from the input pointlist run?")
if any(input_pointlist ^ output_pointlist):
    rerun_pointlist = incomplete_pointlist | (input_pointlist ^ output_pointlist)
    print(f"No. {len(rerun_pointlist)} point still need to be run.")
    
else:
   print("All points ran!")

Did all points run from the input pointlist run?
No. 2 point still need to be run.


In [6]:
# If all points haven't ran, save out to a pointlist (if lots of points, this can take some time)

save_location = project_dir
save_name = "pointlist_todo.txt"

if bool(rerun_pointlist):

    with open(f'{project_dir}{save_name}', 'w') as f:
        f.write('\n'.join(map(str, list(rerun_pointlist))) + '\n')