In [1]:
%reload_ext autoreload
%autoreload 2

In [2]:
import sys
import os
from pathlib import Path

# Use Path.cwd() instead of __file__ in Notebooks
parent_dir = str(Path.cwd().parent)

if parent_dir not in sys.path:
    sys.path.append(parent_dir)

print(f"Added to path: {parent_dir}")

Added to path: /Users/sawale/Documents/learning/resumer


In [3]:
import os
import instructor
from pydantic import BaseModel
from google import genai
from dotenv import load_dotenv

load_dotenv()


# 2. Initialize the GenAI Client for Vertex AI
native_client = genai.Client(
    vertexai=True,
    project=os.environ.get("GOOGLE_CLOUD_PROJECT"),
    location=os.environ.get("GOOGLE_CLOUD_LOCATION")
)

# 3. Patch the client with Instructor
aclient = instructor.from_genai(native_client, 
mode=instructor.Mode.GENAI_STRUCTURED_OUTPUTS, 
use_async=True)


In [4]:
aclient

<instructor.core.client.AsyncInstructor at 0x12deebdd0>

In [5]:
from resumer import ResumeTailorPipeline

Consider using the pymupdf_layout package for a greatly improved page layout analysis.


In [6]:
pp = ResumeTailorPipeline(
    aclient = aclient, 
    model_name = os.environ.get("GOOGLE_GEMINI_MODEL_NAME"),
    resume_path = "/Users/sawale/Documents/learning/resumer/resumer/demo/Sajil_Awale_CV_2025.pdf", 
    output_dir= "./output/"
)


In [7]:
await pp.generate_tailored_resume(job_url="https://lifeattiktok.com/search/7527589557336869138")

--- Scraping job details from: https://lifeattiktok.com/search/7527589557336869138 ---
--- Extracting job info via LLM ---
--- Loading resume info from disk cache ---
--- Successfully extracted both Resume and Job data ---
--- Adding section: summary ---
--- Adding section: work_experience ---
--- Adding section: education ---
--- Adding section: skill_sections ---
--- Adding section: projects ---
--- Adding section: certifications ---
--- Adding section: achievements ---
--- Adding section: research_works ---
--- Adding section: Exchange Program and Fellowship ---
--- Adding section: Volunteering and Teaching experience ---
--- Adding section: References ---
Error in json_to_latex_pdf: 'NoneType' object is not iterable


In [8]:
from resumer.utils.latex_ops import json_to_latex_pdf
x = json_to_latex_pdf(pp.resume_details, os.path.join(pp.output_dir, "tailored_resume.pdf"))

Error in json_to_latex_pdf: 'NoneType' object is not iterable


In [24]:
pp.resume_details

{'personal_info': {'name': {'segments': [{'type': 'text',
     'content': 'Sajil Awale'}]},
  'location': {'segments': [{'type': 'text',
     'content': 'Huntsville, Alabama'}]},
  'phone': {'segments': [{'type': 'text', 'content': '+1-256-417-3690'}]},
  'email': {'segments': [{'type': 'link',
     'content': 'sajilawale@gmail.com',
     'url': 'mailto:sajilawale@gmail.com'}]},
  'media': {'portfolio': 'https://www.sajilawale.com.np',
   'linkedin': 'https://www.linkedin.com/in/sajilawale/',
   'github': 'https://github.com/AwaleSajil',
   'medium': None,
   'devpost': None}},
 'summary': {'segments': [{'type': 'text',
    'content': "Machine Learning Engineer and Master's candidate with 4+ years of experience specializing in NLP, Large Language Models, and multimodal AI for content understanding and risk identification. Proficient in end-to-end algorithm development, including fine-tuning large models, distributed training, computer vision, and agentic AI evaluation. Eager to contrib

In [22]:
pp.resume_details["custom_sections"].keys()

dict_keys(['Exchange Program and Fellowship', 'Volunteering and Teaching experience', 'References'])

In [23]:
pp.resume_details["custom_sections"]["References"]

[{'title': {'segments': [{'type': 'text',
     'content': 'Tathagata Mukharjee, Professor at University of Alabama in Huntsville'}]},
  'subtitle': {'segments': [{'type': 'text', 'content': 'tm0130@uh.edu'}]},
  'date_description': None,
  'description': None},
 {'title': {'segments': [{'type': 'text',
     'content': 'Stacey Finn, Director of Data Science and Analytics at CedarGate'}]},
  'subtitle': {'segments': [{'type': 'text', 'content': 'safinn5@gmail.com'}]},
  'date_description': None,
  'description': None}]

In [11]:
pp.resume_details["custom_sections"]

{'Exchange Program and Fellowship': [{'title': {'segments': [{'type': 'link',
      'content': 'First Nepal Winter School in AI',
      'url': 'https://photos.app.goo.gl/kBatEMLzQqRJKU37'}]},
   'subtitle': {'segments': [{'type': 'link',
      'content': 'Nepal Applied Mathematics and Informatics Institute for Research (NAAMII)',
      'url': 'https://www.naamii.org.np/'}]},
   'date_description': {'segments': [{'type': 'text',
      'content': '20th - 30th Dec, 2018'}]},
   'description': [{'segments': [{'type': 'text',
       'content': 'Gained foundational knowledge in Deep Learning, probability, statistics, and linear algebra from esteemed professors, crucial for advanced algorithm development.'}]},
    {'segments': [{'type': 'text',
       'content': 'Completed hands-on lab assignments directly applying concepts in computer vision and natural language processing (NLP), aligning with key model development areas for content safety.'}]}]},
  {'title': {'segments': [{'type': 'link',
 

In [12]:
pp.resume_info.model_dump()

{'personal_info': {'name': {'segments': [{'type': 'text',
     'content': 'Sajil Awale'}]},
  'location': {'segments': [{'type': 'text',
     'content': 'Huntsville, Alabama'}]},
  'phone': {'segments': [{'type': 'text', 'content': '+1-256-417-3690'}]},
  'email': {'segments': [{'type': 'link',
     'content': 'sajilawale@gmail.com',
     'url': 'mailto:sajilawale@gmail.com'}]},
  'media': {'portfolio': 'https://www.sajilawale.com.np',
   'linkedin': 'https://www.linkedin.com/in/sajilawale/',
   'github': 'https://github.com/AwaleSajil',
   'medium': None,
   'devpost': None}},
 'summary': {'segments': [{'type': 'text',
    'content': 'Machine Learning Engineer with 4+ years of experience specializing in NLP, Large Language Models, and Agentic AI evaluation. Currently a CS Master’s student (4.0 GPA) at NASA-IMPACT, developing foundational scientific embedding models and benchmarking autonomous research agents. Expert in deploying production-grade ML pipelines for healthcare analytics a

In [13]:
pp.job_info

JobInfo(job_title='Algorithm Engineer Intern (Content Safety)', job_purpose='Develop state-of-the-art computer vision, NLP, and multimodality models and algorithms to protect the platform and users from content and behaviors that violate community guidelines, ultimately enhancing user experience and bringing joy to users worldwide. The role involves participating in the development of cutting-edge content understanding models and optimizing distributed model training frameworks.', keywords=['computer vision', 'NLP', 'multimodality models', 'algorithms', 'content understanding', 'distributed model training', 'multimodal large models', 'few-shot learning', 'zero-shot learning', 'content safety', 'moderation models', 'reinforcement learning', 'data mining', 'Chain-of-Thought (CoT) annotation frameworks', 'risk ranking', 'recall systems', 'algorithm development', 'data processing', 'modeling', 'evaluation', 'PyTorch', 'TensorFlow', 'machine learning', 'deep learning'], job_duties_and_respo

In [14]:
pp.resume_info.model_dump().keys()

dict_keys(['personal_info', 'summary', 'work_experience', 'education', 'skill_sections', 'projects', 'certifications', 'achievements', 'research_works', 'custom_sections', 'keywords'])

In [15]:
# loop through custom sections
for section in getattr(pp.resume_info, "custom_sections"):
    temp = section.section_name
    print(temp.plain_text)


Exchange Program and Fellowship
Volunteering and Teaching experience
References


In [16]:
pp.resume_info.custom_sections[2].model_dump()

{'section_name': {'segments': [{'type': 'text', 'content': 'References'}]},
 'section_detail': [{'title': {'segments': [{'type': 'text',
      'content': 'Tathagata Mukharjee, Professor at University of Alabama in Huntsville'}]},
   'subtitle': {'segments': [{'type': 'text', 'content': 'tm0130@uh.edu'}]},
   'date_description': None,
   'description': None},
  {'title': {'segments': [{'type': 'text',
      'content': 'Stacey Finn, Director of Data Science and Analytics at CedarGate'}]},
   'subtitle': {'segments': [{'type': 'text',
      'content': 'safinn5@gmail.com'}]},
   'date_description': None,
   'description': None}]}

In [17]:
pp.resume_info.custom_sections

[GenericSection(section_name=RichText(segments=[TextSegment(type='text', content='Exchange Program and Fellowship')]), section_detail=[GenericElement(title=RichText(segments=[LinkSegment(type='link', content='Sakura Science Exchange Program', url='https://photos.app.goo.gl/P8gFatguLP5F1kmM9')]), subtitle=RichText(segments=[LinkSegment(type='link', content='Japan Science and Technology Agency', url='https://www.jst.go.jp/EN/')]), date_description=RichText(segments=[TextSegment(type='text', content='16th - 23th Dec, 2019')]), description=[RichText(segments=[TextSegment(type='text', content='Selected as one of the top 3 students for a program at Japan’s National Institute of Technology, Kisarazu. We presented our poster, visited industries, and exchanged ideas and solutions with international peers.')]), RichText(segments=[TextSegment(type='text', content='Participated in sessions covering Japan’s cutting-edge technologies, including Artificial Intelligence and the Internet of Things (IoT

In [18]:
# convert the custom section to structure like other noraml section
custom_output = {}


# loop trhough custom section
for csection in pp.resume_info.custom_sections:
    # setting the key
    key_name = csection.section_name.plain_text
    custom_output[key_name] = csection.model_dump()["section_detail"]
    print(type(custom_output[key_name]))


# custom_output

<class 'list'>
<class 'list'>
<class 'list'>


In [19]:
type(pp.resume_info.model_dump_json(include={"summary"}))

str

In [20]:
pp.resume_info.model_dump_json(include={"work_experience"})

'{"work_experience":[{"role":{"segments":[{"type":"text","content":"Graduate Research Assistant for LLM team"}]},"company":{"segments":[{"type":"link","content":"NASA-IMPACT @ UAH","url":"https://www.earthdata.nasa.gov/about/impact"}]},"location":{"segments":[]},"date_description":{"segments":[{"type":"text","content":"August 2024 - Present"}]},"description":[{"segments":[{"type":"text","content":"Science Keyword Recommender: Built an extreme multi-label classifier for NASA CMR, scaling from 430 to 3,240 science keywords. Used Focal Loss and custom stratified sampling to improve F1 to 0.55, enhancing metadata accuracy and dataset discoverability."}]},{"segments":[{"type":"text","content":"Pre-training Science Embedding Model (Indus-SDE): Pretrained a RoBERTa-based model on 520K NASA documents with extended 1024-token input and Weighted Keyword Based Dynamic Masking. Achieved 78.1% top-1 MLM accuracy, outperforming baselines on keyword tagging, astrophysics, and EJ tasks."}]},{"segments

In [21]:
pp.resume_info.model_dump_json(include={"skill_sections"})

'{"skill_sections":[{"name":{"segments":[{"type":"text","content":"Languages"}]},"skills":[{"segments":[{"type":"text","content":"Python"}]},{"segments":[{"type":"text","content":"C++"}]},{"segments":[{"type":"text","content":"C"}]},{"segments":[{"type":"text","content":"C#"}]},{"segments":[{"type":"text","content":"MATLAB"}]},{"segments":[{"type":"text","content":"SQL"}]}]},{"name":{"segments":[{"type":"text","content":"Machine Learning"}]},"skills":[{"segments":[{"type":"text","content":"Pytorch"}]},{"segments":[{"type":"text","content":"Transformers"}]},{"segments":[{"type":"text","content":"Scikit-Learn"}]},{"segments":[{"type":"text","content":"W&B"}]},{"segments":[{"type":"text","content":"Spacy"}]},{"segments":[{"type":"text","content":"Keras"}]},{"segments":[{"type":"text","content":"OpenCV"}]},{"segments":[{"type":"text","content":"Imbalanced-Learn"}]},{"segments":[{"type":"text","content":"Hyperopt"}]}]},{"name":{"segments":[{"type":"text","content":"Data Analysis Packages"}]