In [1]:
from resume import my_resume

In [2]:
from IPython.display import display, HTML, Markdown, clear_output, FileLink
from jobs_skills_weights import get_jobs, get_raw_job_details
import skill_weights
import os, shutil, re
from mark_status import mark_status
import requests

import nest_asyncio
nest_asyncio.apply()

import asyncio
from pyppeteer import launch
from threading import Thread
from queue import Queue as ThreadsafeQueue

# Pulling Job Data

In [3]:
jobs = get_jobs()
raw_job_details = get_raw_job_details(jobs)
# job_descriptions = skill_weights.get_job_descriptions(raw_job_details)
job_skill_weights = skill_weights.get_job_skill_weights(raw_job_details)

# Generation

In [4]:
skill_text_weights_con = lambda weights, job_id: weights.loc[job_id, :].groupby(level=0).apply(lambda df: df.xs(df.name).to_dict('index')).to_dict()

In [5]:
pdf_upload_path = "docs/pdf_sources/upload/Zach_Allen_Resume.pdf"
os.makedirs("docs/pdf_sources/upload", exist_ok=True)

In [6]:
# global_skill_text_weights = job_skill_weights.groupby(level=["id", "skill", "skill text"]
#         ).sum().groupby(level=["skill", "skill text"]
#         ).sum().divide(len(jobs)).groupby(level=0
#         ).apply(lambda df: df.xs(df.name).to_dict('index')).to_dict()

In [7]:
async def export_resume(job_id, **kwargs):
    os.makedirs(f"docs/pdf_sources/{job_id}", exist_ok=True)
    pdf_fpath = f"docs/pdf_sources/{job_id}/Zach_Allen_Resume.pdf"
    
    pdf_export_data = await my_resume.export_fitted_pdf(
        pdf_fpath,
        
        highest_threshold=0.05,
        lowest_threshold=-1e-3,
        skill_cost=0.1,
        should_render_all_skills=True,
        max_iters=1,##############################
        
        stylesheet='lighttheme',
        alt_template_prefixes={'*':'pdf'},
        skill_text_weights = skill_text_weights_con(job_skill_weights, job_id),
        trim_skill_html_data=True,
        **kwargs
    )

In [8]:
async def queue_for_page_export_job(page_q, apply_q, job_id):
    page = await page_q.get()
    await export_resume(job_id, pyppeteer_page = page)
    await page_q.put(page)
    page_q.task_done()
    print(f"{job_id}: Exported resume; enqueueing job for application.")
    apply_q.put(job_id)

In [9]:
def start_background_loop(loop) -> None:
    asyncio.set_event_loop(loop)
    loop.run_forever()

In [10]:
async def construct_resumes(jobs, applyQueue, max_parallel=3):
    print("Launching Browser")
    browser = await launch(
        options={
            'headless': True,
            'args': [
                '--no-sandbox',
                '--run-all-compositor-stages-before-draw',
                '--webkit-print-color-adjust-property',
                '--disable-setuid-sandbox',
                '--disable-dev-shm-usage',
                '--disable-accelerated-2d-canvas',
                '--no-first-run',
                '--no-zygote',
                '--single-process',
                '--disable-gpu',
            ],
        },
        handleSIGINT=False,
        handleSIGTERM=False,
        handleSIGHUP=False
    )
    
    try:
        print("Starting page queue")
        pageQueue = asyncio.Queue(maxsize=max_parallel)
        print("Constructing page queue")
        for i in range(max_parallel):
            await pageQueue.put(await browser.newPage())

        print("Queueing for page export")
        export_tasks = []
        for job_id, job_data in jobs.sort_index().iterrows():
            export_tasks.append(
                asyncio.create_task(
                    queue_for_page_export_job(pageQueue, applyQueue, job_id)
                )
            )

    finally:
        await asyncio.gather(*export_tasks)

        print(f"pageQueue ended with {pageQueue.qsize()}             pages.")
        for i in range(max_parallel):
            await pageQueue.get()
            pageQueue.task_done() # We have to count all the elements of the queue in and out
        print(f"pageQueue ended with {pageQueue.qsize()} unaccounted pages.")
        for i in range(pageQueue.qsize()):
            await pageQueue.get()
            pageQueue.task_done()
        await pageQueue.join()
        
        await browser.close()

In [11]:
applyQueue = ThreadsafeQueue()
loop = asyncio.new_event_loop()
t = Thread(target=start_background_loop, args=(loop,), daemon=True)
t.start()

export_task = asyncio.run_coroutine_threadsafe(construct_resumes(jobs, applyQueue), loop)
print("Started Threadsafe")
export_task.add_done_callback(lambda x: print("Export task is Done"))

Started Threadsafe
Launching Browser


In [12]:
for i in range(len(jobs.index)):
    job_id = applyQueue.get()
    job_data = jobs.loc[job_id, :]
    clear_output()
    
    # Display links to job info
    print(job_id)
    print(job_data['company_name'])
    display(HTML(f"<a href='{job_data['url']}'>job</a>"))
    
    pdf_fpath = f"docs/pdf_sources/{job_id}/Zach_Allen_Resume.pdf"
    
    # Copy the output PDF to a single location for easy upload
    try:
        shutil.copyfile(pdf_fpath, pdf_upload_path);
    except shutil.SameFileError: # Sometimes this comes up when re-running a job
        pass
    
    # Wait for feedback on submission status
    status = input("Status:")
    if status == "skip":
        continue;
    if status == "":
        status = "applied"
    
    mark_status(job_id, status_key_or_id = status)
    applyQueue.task_done()

5150e149-c79c-43d3-92b4-a04fc0102780
INRIX


47ac287c-6d04-48a8-880d-7bd2fb65f79f: Exported resume; enqueueing job for application.
4c73950e-c311-4e91-acec-f9a8df9dc71f: Exported resume; enqueueing job for application.
575ebbbe-9dcc-4e67-afb5-3f55418c3bb4: Exported resume; enqueueing job for application.
53e4e72d-e847-4512-aadc-74cb068874a9: Exported resume; enqueueing job for application.
529c5294-4f9c-4893-a38c-cc1f02524973: Exported resume; enqueueing job for application.
60cc30ac-5ef2-4c53-a5db-340ab9c713c7: Exported resume; enqueueing job for application.
5f39653f-69d9-4045-aac9-152b63a54356: Exported resume; enqueueing job for application.
582e7b45-241c-48c2-afa4-f269add19ff9: Exported resume; enqueueing job for application.
7a62be2b-9b6f-4bed-b772-683ea8d5c39d: Exported resume; enqueueing job for application.
685d51ae-39d0-4673-8727-b6f582d639a6: Exported resume; enqueueing job for application.
648ee1af-51e1-4a40-a010-5038e4e81881: Exported resume; enqueueing job for application.
822d9506-4cb1-4dbb-ae1e-01aa46c97388: Expor

KeyboardInterrupt: Interrupted by user

In [13]:
print(f"ApplyQueue closing w/ size {applyQueue.qsize()}")

i = 0
while not applyQueue.empty():
    try:
        drained = applyQueue.get_nowait()
        print(f"ApplyQueue: Drained {drained} (#{i})")
    except Exception as e:
        print(f"ApplyQueue: Can't get_nowait. Finally will mark task done.")
    finally:
        applyQueue.task_done()
        i += 1

while True:
    try:
        applyQueue.task_done()
    except:
        break
    
applyQueue.join()
print("ApplyQueue: Joined")
loop.stop()

ApplyQueue closing w/ size 33
ApplyQueue: Drained 47ac287c-6d04-48a8-880d-7bd2fb65f79f (#0)
ApplyQueue: Drained 4c73950e-c311-4e91-acec-f9a8df9dc71f (#1)
ApplyQueue: Drained 575ebbbe-9dcc-4e67-afb5-3f55418c3bb4 (#2)
ApplyQueue: Drained 53e4e72d-e847-4512-aadc-74cb068874a9 (#3)
ApplyQueue: Drained 529c5294-4f9c-4893-a38c-cc1f02524973 (#4)
ApplyQueue: Drained 60cc30ac-5ef2-4c53-a5db-340ab9c713c7 (#5)
ApplyQueue: Drained 5f39653f-69d9-4045-aac9-152b63a54356 (#6)
ApplyQueue: Drained 582e7b45-241c-48c2-afa4-f269add19ff9 (#7)
ApplyQueue: Drained 7a62be2b-9b6f-4bed-b772-683ea8d5c39d (#8)
ApplyQueue: Drained 685d51ae-39d0-4673-8727-b6f582d639a6 (#9)
ApplyQueue: Drained 648ee1af-51e1-4a40-a010-5038e4e81881 (#10)
ApplyQueue: Drained 822d9506-4cb1-4dbb-ae1e-01aa46c97388 (#11)
ApplyQueue: Drained 8015b8a1-ec12-4e77-a017-abdc0aaff640 (#12)
ApplyQueue: Drained 7e336fe3-dfe8-40f7-ae50-46d29224804e (#13)
ApplyQueue: Drained 93cefcf3-3637-4209-a95a-6f99f2fad711 (#14)
ApplyQueue: Drained 9301b586-cbea-4

In [15]:
export_task.result()

In [17]:
t.join()

In [16]:
print(asyncio.all_tasks())

{<Task pending name='Task-1' coro=<Kernel.dispatch_queue() running at /opt/conda/lib/python3.10/site-packages/ipykernel/kernelbase.py:510> cb=[IOLoop.add_future.<locals>.<lambda>() at /opt/conda/lib/python3.10/site-packages/tornado/ioloop.py:687]>}
