In [1]:
from CurriculumVitae import *
from IPython.display import display, HTML, Markdown, clear_output
from datetime import date
import json
from jobs_skills_weights import get_jobs, get_raw_job_details
import skill_weights, synonyms, skill_cat
import pandas as pd # TODO: Not needed for final export

In [2]:
my_resume = Resume(
    person=Person(name="Zach Allen", pronouns="He/Him", 
                    contact_info=ContactInfo(
                       email="fractalmachinist@gmail.com", 
                       phone="+1 (509)438-8146", 
                       link="https://fractalmachini.st",
                       link2="https://linkedin.com/in/zachallen-fractalmachinist/")),
    #                                                                                                                                                                  # total / unique
    headline=' '.join(["Machine Learning Researcher & Data Engineer with 5 years experience in collaborative AI innovation & infrastructure implementation at scale.", # 27.4% / 13.0%
                       "Deep Learning frameworks: expert in Python & TensorFlow since 2015/2017.",                                                                     # 6.20% / 6.20%
                       "Analyzed, tested, and optimized innovative Machine Learning algorithms for unbalanced datasets (>1000:1), raising rare-case recognition",      # 10.1% / 3.67%
                       "from 15% to 90% with no loss in accuracy.",                                                                                                    #     ^cont
                       "Passionate about collaborating & working closely with multidisciplinary partners to shape the future towards global sustainability.",          # 8.01% / 4.89%
                       "Independently implemented and deployed terabyte-scale AWS genomics pipeline infrastructure in 4 months part-time,",                            # 7.52% / 2.64%
                       "from no prior AWS or genomics experience.",                                                                                                    #     ^cont
                       "Certified in data-driven integrations of business needs, software engineering, project management, and software development best practices.",  # 4.88% / 4.88%
                       "Excellent writing, speaking, presenting, and technical communication skills.",                                                                 # 3.58% / 3.58%
                       ]))

In [3]:
my_resume.education = [Occupation(
    title="Bachelor's in Computer Science", location="Utah, USA (Online)",
    timespan=Between(start=date(2019, 2, 1), end=date(2022, 8, 31)),
    subtitle="Western Governors University", 
    headline="Focused on Machine Learning and Project Management in a fully remote environment",
    skills=my_resume.Skills("Remote Work", "Project Management", "Machine Learning", "Communication"),
    sub_tasks=[
        Effort(title="Diamond Price Prediction Model",
               headline="Documented and implemented a diamond price prediction ML model.",
               website="https://github.com/FractalMachinist/WGU-C968",
               skills=my_resume.Skills("Machine Learning", "Git", "Software Engineering", "Jupyter", "Analysis", "Statistics"),
               achievements=[
                  Achievement(
                       headline="Demonstrated integration of project management and software engineering best practices.",
                       skills=my_resume.Skills("Project Management", "Communication", "Documentation", "Managing Requirements", 
                                               "Data Driven", "End to End")),
                   Achievement(
                       headline="Developed simple Data Pipeline in python/pandas.",
                       skills=my_resume.Skills("Python", "Data", "Software Engineering", "NumPy", "SciPy")),]

        ), 
        Effort(title="Advanced Java Concepts",
               headline="Developed appointment scheduling and customer database tool in Java.",
               website="https://github.com/FractalMachinist/C195-Scheduling-App",
               skills=my_resume.Skills("Java","Software Engineering","Git", "SQL"),
               achievements=[
               Achievement(
                   headline="Encapsulated Observable state and database connection state into a shared wrapper class, streamlining inheritance, error handling, and UI/Database auto-updates.",
                   skills=my_resume.Skills("Java", "JavaFX", "JDBC", "Integration", "API Design")),
               Achievement(
                   headline="Paid technical debt by finalizing project documentation.",
                   skills=my_resume.Skills("Documentation", "Communication", "Ownership", "SDLC", "Project Management"))
               ]),    
    ],
    
    achievements=[
        
        Certification(name="CompTIA Project+",
                      headline="Demonstrated understanding of Project Management roles, processes, and documentation.",
                      issuer="Pearson VUE", issue_date=date(2019, 7, 24),
                      website="https://wsr.pearsonvue.com/testtaker/authenticate/AuthenticateScoreReport.htm",
                      confirmation_info={"Registration":"358639011", "Validation":"155946649"},
                      skills=my_resume.Skills("Project Management", "Communication", "Managing Requirements", "SDLC", 
                                              "Integration", "Agile")),
        
        Certification(name="IT Information Library Foundations Certification (ITIL)", 
                      headline="Demonstrated understanding of designing, deploying, maintaining, and retiring IT resources.",
                      issuer="AXELOS", issue_date=date(2020, 8, 1),
                      skills=my_resume.Skills("Project Management", "Managing Requirements", "SDLC", "Integration")),
        
        Certification(name="Site Development Associate",
                      headline="Demonstrated ability to design and build websites.",
                      issuer="CIW", issue_date=date(2019, 2, 1),
                      skills=my_resume.Skills("Web Development","HTML","CSS","JavaScript",)),
        
        Achievement(headline="Excellence Award for Communication Applications",
                    portfolio_link="https://github.com/FractalMachinist/Rust_Business_Presentation",
                    skills=my_resume.Skills("Communication", "Documentation")),
        
    ])]

In [4]:
my_resume.employment = [
    Occupation(title="Sofware Engineer", location="St. Louis, Missouri, USA", website="https://PlutonBio.com",
               timespan=Between(start=date(2021, 3, 1), end=date(2021, 8, 31)),
               subtitle="Pluton Biosciences", supervisor=Person(name="Dr. Boahemaa Adu-Oppong", pronouns="She/Her",
                                                                contact_info=ContactInfo(email="BAdu-Oppong@plutonbio.com")),
               headline="Data Engineering supporting Bioinformatics Research",
               skills=my_resume.Skills("Remote Work","Big Data","Bioinformatics","Genetics","Multidisciplinary","Motivated"),
               sub_tasks=[
                   Effort(
                       title="AWS Genomics Pipeline",
                       headline="Independently designed, implemented, and deployed Terabyte-scale AWS genomics pipeline in 4 months,"+
                                        " from no prior genomics or AWS experience.",
                       skills=my_resume.Skills("AWS","Cloud Computing",
                                               "Data","Big Data","Scale",
                                               "New Tools","Tooling","Build Tools","Infrastructure","End to End",
                                               "Motivated","Multidisciplinary","Problem Solving","Remote Work","Innovation","Curious","Ownership","Project Management","SDLC"),
                       achievements=[
                           Achievement(
                               # title="Pipeline Design",
                               headline="Designed, implemented, and documented gRPC+ProtoBuf pipeline architecture.",
                               skills=my_resume.Skills("Python","Bash","Infrastructure","Data","Big Data","Scale","Build Tools","Tooling",
                                                       "Integration","Automation","Prototyping","Workflows","Ownership","Innovation","Problem Solving","New Tools"),
                           ),
                           Achievement(
                               # title="Streamlined Pipeline Development Lifecycle",
                               headline="Developed, documented, and utilized custom pipeline-integrated frameworks for automating Computational Genomics tool containerization.",
                               skills=my_resume.Skills("Python","Bash","Software Engineering","Frameworks","Infrastructure","Build Tools","Tooling","SDLC","Automation",
                                                       "Prototyping","Workflows","Documentation","Innovation","Problem Solving","New Tools")
                           ),
                           Achievement(
                               # title="User-Friendly Query Library",
                               headline="Designed, implemented, developed training for, and documented an in-house Python+R library and interface enabling (non-CS) biologists to easily deploy multi-stage genomics queries"+
                                        " without relying on the CS team.",
                               skills=my_resume.Skills("Python","R","API Design","Libraries","Build Tools","Tooling","Automation","Workflows","Documentation","Problem Solving","Value Creation","New Tools")
                           ),
                           Achievement(
                               # title="Aligned Deliverables with Stakeholder Needs",
                               headline="Collaborated with SMEs to gather, interpret, and meet data pipeline requirements."+
                                        " Interfaced with Bioinformatics expert for genomics tool selection.",
                               skills=my_resume.Skills("Managing Requirements", "Communication", "Project Management", "Collaboration","Multidisciplinary")
                           ),
                       ],
                   ),
                       
                   Effort(
                       title="Containerized Deployment & Autoscaling",
                       headline="Researched, containerized, documented, and deployed 10+ Computational Genomics tools to AWS ECS and EC2.",
                       skills=my_resume.Skills("Integration","Bash","Linux","AWS","Containerization", "Scale", "Frameworks", "Infrastructure","Bioinformatics", "Genetics"),
                       achievements=[
                           Achievement(
                               headline="Constructed and tuned EC2&ECS autoscaling, maximizing cost efficiency and cluster responsiveness.",
                               skills=my_resume.Skills("Optimization","AWS","Infrastructure","Budget")
                           ),
                           Achievement(
                               headline="Detected, diagnosed, and resolved compute resource inefficiencies.",
                               skills=my_resume.Skills("Optimization","Software Engineering","Computer Science","Debugging")
                           ),
                           Achievement(
                               # title="Spearheaded SDLC & CI/CD",
                               headline="Led the CS team in adopting SDLC tools like Git, AWS CodeCommit, and Docker / AWS Elastic Container Registry."+
                                           " Championed utilization of Microsoft Teams Kanban tools for project&process management.",
                               skills=my_resume.Skills("Leadership", "SDLC", "Git", "Containerization", "AWS", "Testing", "Integration", "Frameworks", "Project Management","Teamwork", "Workflows")
                           ),
                       ]
                   ),      
                   Achievement(
                       # title="Microbial Taxonomy Visualizations in R",
                       headline="Worked closely with Microbiologists & Geneticists to construct visualizations of microbial taxonomy data across experiments.",
                       skills=my_resume.Skills("R","Communication","Collaboration","Multidisciplinary","Teamwork")
                   ),
               ],
    ),
    
    Occupation(title="Embedded Systems Engineer", location="Allentown, Pennsylvania, USA", website="https://AppliedSeparations.com",
               timespan=Between(start=date(2019, 1, 1), end=date(2020, 1, 25)),
               subtitle="Applied Separations", supervisor=Person(name="Aaron Allen", pronouns="He/Him", 
                                                                contact_info=ContactInfo(email="mnrnln@gmail.com")),
               
               headline="Developed custom C++ / Arduino pump control software",
               achievements=[
                   Achievement(headline="Designed and developed pump control software for chromatography and analytical chemistry systems in C and C++.",
                               skills=my_resume.Skills("C", "C++", "Software Engineering", "SDLC")),
                   
                   Achievement(headline="Wrote a simple heuristic scheduler & virtual threading to manage real-time (60Hz+) pump control, "+
                                        "touch screen input, and data/control communication, all on a single Arduino Mega.",
                               skills=my_resume.Skills("Algorithms", "Optimization", "Software Engineering", "Statistics", "Computer Science")),
                   
                   Achievement(headline="Designed and validated mass delivery tracking, prediction, and smoothing algorithms, to handle nonlinear feedback delay.",
                               skills=my_resume.Skills("Algorithms", "Algebra", "Software Engineering", "Testing", "Computer Science", "Prototyping", "Analysis")),
                   
                   Achievement(headline="Interfaced with separate development team in charge of chromatography control software. Collected and implemented"+
                                        " requirements from instrumentation engineer.",
                               skills=my_resume.Skills("Communication", "Managing Requirements", "Project Management", "SDLC", "Documentation", "Collaboration", 
                                                       "Ownership", "Integration", "Leadership", "Git"))]),
              
    Occupation(title="Service Writer", location="Richland, Washington, USA", website="https://AlphaComputerCenter.com",
               timespan=Between(start=date(2017, 12, 1), end=date(2018, 11, 30)),
               subtitle="Alpha Computer Center", supervisor=Person(name="Frank Ward Jr.", pronouns="He/Him",
                                                                   contact_info=ContactInfo(email="frankjr@alphacomputercenter.com",
                                                                   phone="+1 (509)946-4230")),
               
               headline="Customer service, sales, and technician support",
               achievements=[
                   Achievement(headline="Ensured customers were able to accurately understand and communicate with repair technicians, improving customer"+
                                        " service and reducing diagnostic time.",
                               skills=my_resume.Skills("Communication")),
                   
                   Achievement(headline="Leveraged extensive Linux experience to rapidly identify and repair issues that couldn't be fixed by Mac diagnostic tools.",
                               skills=my_resume.Skills("Linux","Bash")),
                   
                   Achievement(headline="Reduced call frequency with an informative website. See it on "+
                                        "<a href='https://web.archive.org/web/20180113192132/http://www.alphacomputercenter.com/wordpress1/'>web archive</a>.",
                               skills=my_resume.Skills("Web Development","HTML","CSS","JavaScript", "Graphic Design"))]),
              
    Occupation(title="Machine Learning Researcher (Intern)", location="Richland, Washington, USA", website="https://pnnl.gov",
               timespan=Between(start=date(2017, 1, 1), end=date(2017, 5, 31)),
               subtitle="Pacific Northwest National Laboratory", supervisor=Person(name="Dr. Enoch Yeung", pronouns="He/Him",
                                                                                   contact_info=ContactInfo(email="eyeung@ucsb.edu")),
               
               headline="ML Research and Data Engineering intern",
               achievements=[
                   Achievement(headline="Designed and tested novel Neural Network algorithms, architectures, and error formulations for NLP,"+
                                        " image classification, and time-series data classification.",
                               skills=my_resume.Skills("Python", "TensorFlow", "Machine Learning", "Deep Learning", "Neural Network Architectures", 
                                                       "Algorithms", "Algebra", "NLP", "Data", "Curious", "Research", "Frameworks", "Computer Science",
                                                       "MatLab", "NumPy", "Software Engineering")),
                   
                   Achievement(headline="Demonstrated increased test accuracy (15% ⇾ 90% detection with higher Bayesian Confidence)"+
                                        " on unbalanced (>1000:1) datasets, without duplication, augmentation, or batch filtering.",
                               skills=my_resume.Skills("Machine Learning", "Algebra", "Statistics", "Testing", "Research", "Algorithms",
                                                       "Computer Science", "Analysis")),
                   
                   Achievement(headline="Worked independently, balancing multiple projects and deliverables with minimal mentor supervision,"+
                                        " often meeting every two weeks.",
                               skills=my_resume.Skills("Project Management", "Communication", "Collaboration"))])
]

In [5]:
my_resume.projects = [
    Effort(title="Neural Cellular Segmentation",
           headline="Exploring neural cellular automata and attention (NCA+A) for medical image segmentation",
           website="https://github.com/FractalMachinist/NeuralCellularAutomataAttn",
           achievements=[
               Achievement(headline="Developed, tested, and iterated NCA+A models, balancing system resources and model size.",
                           skills=my_resume.Skills("Python", "TensorFlow", "Machine Learning", "Neural Network Architectures", "Research", "Algebra", 
                                                   "Optimization", "Statistics", "NumPy", "Analysis")),
               Achievement(headline="Created multiple tf.Data pipelines with preprocessing and data augmentation steps.",
                           skills=my_resume.Skills("Python", "Data", "Software Engineering", "TensorFlow", "Git", "Frameworks", "Computer Science")),
           ]),
    Effort(title="Interplan",
           headline="Task dependency management from a Graph Database",
           website="https://github.com/FractalMachinist/Interplan",
           achievements=[
               Achievement(headline="Developed a Neo4J+React dependency resolution and task status tracking web app.",
                           skills=my_resume.Skills("Web Development","HTML","CSS","JavaScript", "Testing", "Project Management", "Data Driven", "API Design",
                                                   "Software Engineering")),
               Achievement(headline="Packaged React app and Neo4J database in Docker & Kubernetes for easy migration.",
                           skills=my_resume.Skills("Containerization", "Linux", "Bash", "Neo4J","Apache TinkerPop", "Git", "Frameworks", "Infrastructure"))
           ]),
    
    Effort(title="MarkNotes",
           headline="Intuitive journaling tool designed to encourage long-term review and introspection",
           website="https://github.com/FractalMachinist/MarkNotes",
           achievements=[
               Achievement(headline="Implemented MongoDB and Node API for destructuring, storing, and querying Markdown entries as semi-structured data.",
                           skills=my_resume.Skills("MongoDB", "Data", "Data Driven", "NoSQL", "API Design", "Software Engineering")),
               Achievement(headline="Streamlined interface for usability.",
                            skills=my_resume.Skills("Web Development","HTML","CSS","JavaScript", "Usability", "Graphic Design")),
               Achievement(headline="Packaged React app and MongoDB in Docker & Kubernetes.",
                           skills=my_resume.Skills("Containerization", "Linux", "Bash", "Web Development","HTML","CSS","JavaScript", 
                                                   "Git", "Frameworks", "Infrastructure"))
           ]),
    
    Effort(title="NetTimeLog",
           headline="Minimalist, accurate time tracking",
           # website="https://fractalmachini.st/demos/nettimelog",
           achievements=[
               Achievement(headline="Created time-tracking web app which records what you just completed, so you never estimate"+
                                    " what you will do or how long it will take.",
                           skills=my_resume.Skills("Python", "Web Development","HTML","CSS","JavaScript"))])
]

# Validate Resume Configuration

In [6]:
synonyms._refresh()
[skill for skill in my_resume.skills if skill.name.lower() in synonyms.non_root_synonyms or skill.name.lower() not in skill_cat.skill_to_categories]

[]

# Conditional Resume Construction

In [7]:
jobs = get_jobs(bookmarked=False)
raw_job_details = get_raw_job_details(jobs)
job_descriptions = skill_weights.get_job_descriptions(raw_job_details)

In [8]:
skill_cat._refresh()
synonyms._refresh()
job_skills_data = skill_weights.get_job_skills_data(raw_job_details)
job_skill_weights = skill_weights.get_job_skill_weights(raw_job_details, collapse_categories=False)

## Investigation & Review

In [9]:
job_skills_shares = (job_skills_data[["count"]] / job_skills_data[["count"]].sum()).rename(columns={"count":"share of total"})
skill_shares = job_skills_shares.groupby(level="skill").sum()

resume_skills = pd.DataFrame([{
    "skill":skill.name.lower(),
    "instances":skill.get_num_instances()
} for skill in my_resume.skills]).set_index("skill")

### Investigating Resume Efficiency

This section seems to be revealing significant limitations in the data I'm pulling from teal. Skills like 'C++' and 'Bash' appear to not be present in any job listing.  
  
To handle this, I'm creating a `set` of skills that I'm excluding from analysis.

In [10]:
manually_excluded_skills = {'c++','bash','javafx','jdbc','genetics','apache tinkerpop'}

In [11]:
resume_skill_density = (skill_shares["share of total"] / resume_skills["instances"]).loc[resume_skills.index].sort_values(na_position='first')
resume_skill_density = resume_skill_density.loc[~resume_skill_density.index.isin(manually_excluded_skills)]

In [12]:
resume_skill_density.head()

skill
graphic design          NaN
css                0.000020
html               0.000039
web development    0.000059
new tools          0.000089
dtype: float64

In [13]:
# skill_shares.loc["iteration", :]

### Investingating Missing Skills

In [14]:
# Skills which are **not** in my resume (*forehead*)
missing_job_skills_shares = job_skills_shares[~job_skills_shares.index.get_level_values(2).isin(resume_skills.index)]

print("missing skills                         share of total:", f"""{missing_job_skills_shares.sum().values[0]*100:.1f}%""")
print("missing skills (non-emphasis)          share of total:", f"""{missing_job_skills_shares.query("`teal category` != 'emphasis'").sum().values[0]*100:.1f}%""")
print("missing skills                @ > 0.1% share of total:", f"""{missing_job_skills_shares.groupby(level="skill").sum().query("`share of total` > 0.001").sum().values[0]*100:.1f}%""")
print("missing skills (non-emphasis) @ > 0.1% share of total:", f"""{missing_job_skills_shares.query("`teal category` != 'emphasis'").groupby(level="skill").sum().query("`share of total` > 0.001").sum().values[0]*100:.1f}%""")

missing skills                         share of total: 55.2%
missing skills (non-emphasis)          share of total: 36.8%
missing skills                @ > 0.1% share of total: 27.4%
missing skills (non-emphasis) @ > 0.1% share of total: 10.0%


In [15]:
missing_job_skills_shares.query("`teal category` != 'emphasis'").groupby(level="skill").sum().sort_values("share of total", ascending=False).head(10)

Unnamed: 0_level_0,share of total
skill,Unnamed: 1_level_1
white space,0.006267
global,0.004966
cutting edge,0.00402
computer vision,0.003547
partner,0.003547
paid,0.003311
programs,0.002956
monitoring,0.00272
spark,0.00272
best practices,0.002483


### Review against Job Descriptions

In [16]:
job_skills_data.loc["0124e218-2d33-41e8-907b-5228ea386455", :].sort_values("count")

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,count
teal category,skill,skill text,Unnamed: 3_level_1
general abilities,end to end,End to End,1
functional abilities,containerization,Kubernetes,1
functional abilities,tensorflow,TensorFlow,1
functional abilities,electronics,Electronics,1
functional abilities,forecasting,Forecasting,1
general abilities,recognition,Recognition,1
functional abilities,azure devops,Azure DevOps,1
functional abilities,data,Data Science,1
general abilities,software engineering,Implementation,1
functional abilities,mechatronics,Mechatronics,1


In [17]:
for job_id, skill_text_shares in job_skill_weights.groupby(level=["id","skill","skill text"]).sum().groupby(level="id"):
    skill_text_shares = skill_text_shares.droplevel("id")["share of job"]#.query("`share of job` > 0")
    
    print(job_id)
    
    # display(HTML(job_descriptions.loc[job_id, "job description"]))
    display(job_skill_weights.loc[(job_id, "Webdev"), :])
    display(skill_text_shares.sort_values(ascending=False).head(20))
    
    my_resume.write_html_to_file(
        filepath="docs/test.html",
        skill_text_shares=skill_text_shares,
        alt_template_prefixes={"*":"pdf"}
    )
    
    
    break

0124e218-2d33-41e8-907b-5228ea386455


Unnamed: 0_level_0,Unnamed: 1_level_0,share of job
skill,skill text,Unnamed: 2_level_1
css,CSS,0.00125
html,HTML5,0.02125
javascript,JavaScript,0.00125
web development,Web Development,0.00125


skill                 skill text          
machine learning      AI                      0.083182
data                  Data                    0.064028
communication         Communication Skills    0.048214
containerization      DevOps                  0.042829
python                Python                  0.031875
java                  Java                    0.030833
software engineering  Implementation          0.030625
                      Software Engineering    0.030625
innovation            Innovation              0.028214
data                  Data Science            0.024028
scale                 Scalable                0.023246
big data              Pipelines               0.023194
tensorflow            TensorFlow              0.023182
machine learning      Machine Learning        0.023182
feature engineering   Feature Engineering     0.023182
containerization      Docker                  0.022829
git                   GitHub                  0.022829
containerization      

In [18]:
# JobListing(name="index", skill_weights={
#         "python": 5,
#         "tensorflow":4,
#         "neural networks":3.9,
#         "data engineering":3.8,
#         "machine learning":3.7,
#         "docker":3,"kubernetes":3,
#         "software engineering":2
#     }).export(my_resume, should_render_all=True)

In [19]:
# my_resume.write_html_to_file(
#     filepath="docs/index.html",
#     stylesheet="chalkboard",
#     skill_weights={
#         "python": 5,
#         "tensorflow":4,
#         "neural networks":3.9,
#         "data engineering":3.8,
#         "machine learning":3.7,
#         "docker":3,"kubernetes":3,
#         "software engineering":2
#     },
#     should_render_all=True
# )