### Directories

- mirror:  Old mirrored PDF files
- mirror-csv:  New (2017) mirrored CSV files

In [None]:
import getpass, glob, os, sys, tempfile, time
from utils.utils import Stat, subprocess_check

In [None]:
sys.path

In [None]:
source_url = "http://www.achd.net/airqual/DailySummary.PDF"
dest_dir = "mirror-csv"
tmp_dir = "tmp"

print("Mirror directory is %s" % os.path.abspath(dest_dir))

In [None]:
Stat.set_service('Mirror ACHD to ESDR')
stat_shortname = 'Mirror CSV'
stat_valid_for_secs = 2 * 3600

Stat.info(f'Starting mirror-achd-csv.ipynb with python {sys.executable} as username {getpass.getuser()} with commandline {" ".join(sys.argv)}', host=stat_shortname, shortname=stat_shortname)


In [None]:
def now():
    return time.strftime('%Y-%m-%d-%H:%M:%S%z')

def find_most_recent_path(dest_dir):
    mirrored_files = sorted(glob.glob(dest_dir + "/AirQualityDataSummary-????-??-??-??:??:??*.csv"))
    if len(mirrored_files) == 0:
        return None
    return mirrored_files[-1]

def mirror_achd_sftp(dest_dir, tmp_dir):
    try:
        os.mkdir(dest_dir)
    except OSError:
        pass
    try:
        os.mkdir(tmp_dir)
    except OSError:
        pass

    tmp_filename = tempfile.NamedTemporaryFile(dir=tmp_dir, delete=False).name

    src = 'pdille@moveit.alleghenycounty.us:/WPRDC/Health\ Department/Air\ Quality\ Daily\ Summary.CSV'
    cmd = ['/projects/sshpass/sshpass',
       '-f%s/achd-ftp-passwd-do-not-check-in.txt' % os.getcwd(),
       'sftp',
       src,
       tmp_filename]
    print(' '.join(cmd))

    subprocess_check(cmd)
    data = open(tmp_filename).read()
    print('%s: Fetched %d bytes from %s to %s\n' % (now(), len(data), src, tmp_filename))

    most_recent_path = find_most_recent_path(dest_dir)

    if most_recent_path and open(most_recent_path).read() == data:
        Stat.info(f"{now()}: Not recording {len(data)} bytes read from {src} because identical to previous file {most_recent_path}", host=stat_shortname, shortname=stat_shortname)
        os.unlink(tmp_filename)
    else:
        dest = "%s/AirQualityDataSummary-%s.csv" % (dest_dir, now())
        os.chmod(tmp_filename, 0o0644)
        os.rename(tmp_filename, dest)
        Stat.up(f"{now()}: Stored {len(data)} bytes read from {src} at path {dest}", host=stat_shortname, shortname=stat_shortname, valid_for_secs=stat_valid_for_secs)


In [None]:
mirror_achd_sftp(dest_dir, tmp_dir)