In [1]:
%load_ext autoreload
%autoreload 2

import os, sys
os.chdir("/mnt/arrakis/sietch/Resume")
sys.path.append("./src")

In [3]:
from jinja2 import Environment, FileSystemLoader
from instructor import OpenAISchema
from pydantic import Field, BaseModel
import openai as ai

from typing import List, Optional, Union
from rich import print
import os

from resumeai.models import Bio
from resumeai.openai import patch

patch()

In [7]:
"""Data extraction"""

LATEX_ESCAPE = {
    "&": "\\&",
}

def extract_object(prompt, cls):
    global TEXT

    completion = ai.ChatCompletion.create(
        model="gpt-3.5-turbo-1106",
        functions=[cls.openai_schema],
        function_call={"name": cls.openai_schema["name"]},
        temperature=0.0,
        messages=[
            {
                "role": "system",
                "content": prompt
            },
            {
                "role": "user",
                "content": TEXT
            },
        ]
    )
    
    return cls.from_response(completion)

In [9]:
TEXT="""\
Bio
---
Tony Kabilan Okeke
Philadelphia, PA
Email: tonykabilanokeke@gmail.com
Phone: 646-659-4319
Location: Philadelphia, PA
LinkedIn: [Tony Kabilan Okeke](https://linkedin.com/in/t-k-o)
GitHub: [Kabilan108](https://github.com/Kabilan108)
Portfolio: kabilan108.github.io
Role: Data Scientist/Bioinformatician
"""

bio = extract_object(TEXT, Bio)
print(bio)

In [5]:
"""Jinja2 templating"""

os.chdir("/mnt/arrakis/sietch/Resume")

env = Environment(loader=FileSystemLoader("./templates/TLCresume/"))

rendered_dir = "./rendered"
os.makedirs(rendered_dir, exist_ok=True)

In [70]:
"""Models"""

from pydantic import EmailStr, model_serializer
from typing import Any, Dict


class Bio(OpenAISchema):
    """
    A user's personal information.
    """
    name: str = Field(..., description="The user's name")
    email: EmailStr = Field(..., description="The user's email")
    phone: Optional[str] = Field(None, description="The user's phone number")
    location: Optional[str] = Field(None, description="The user's location as a city, state pair")
    linkedin_user: Optional[str] = Field(None, description="The user's LinkedIn profile URL. Only include the username, not the full URL.")
    github_user: Optional[str] = Field(None, description="The user's GitHub username.")
    portfolio: Optional[str] = Field(None, description="The user's portfolio or personal website URL.")
    role: Optional[str] = Field("", description="The user's current role or position.")
    
    def get_items(self):
        return self.model_dump()


class Degree(OpenAISchema):
    """
    A single degree held by a user.
    """
    title: str = Field(..., description="The title of the degree")
    university: str = Field(..., description="The university that awarded the degree")
    graduation: str = Field(..., description="The date of graduation, in Month YYYY format")


class Education(OpenAISchema):
    """
    A list of degrees and associated information from a user's resume.
    """
    degrees: List[Degree] = Field(..., description="A list of degrees held by the user")
    specializations: Optional[List[str]] = Field(None, description="A list of specializations or minors held by the user")
    gpa: Optional[Union[int, float]] = Field(None, description="The user's GPA")

    def get_items(self):
        return {
            "degrees": self.degrees,
            "specializations": self.specializations,
            "gpa": str(self.gpa)
        }


class Experience(OpenAISchema):
    """
    A single job or internship from a user's resume.
    """
    company: str = Field(..., description="The name of the company")
    position: str = Field(..., description="The position held at the company")
    start_date: str = Field(..., description="The start date of the position")
    end_date: str = Field(..., description="The end date of the position")
    location: str = Field(..., description="The location of the company")
    description: List[str] = Field(..., description="A list of bullet points describing the position")


class Experiences(OpenAISchema):
    """
    A list of all jobs and internships from the user's resume.
    """
    items: List[Experience]

    def get_items(self):
        return self.model_dump()["items"]


class Activity(OpenAISchema):
    """
    A single club or organization item from a resume.
    """
    organization: str = Field(..., description="The name of the organization")
    position: str = Field(..., description="The position held at the organization")
    startDate: str = Field(..., description="The start date of the position")
    endDate: str = Field(..., description="The end date of the position")


class Activities(OpenAISchema):
    """
    A list of clubs or organizations from user's resume. Ordered by start date.
    """
    items: List[Activity]
    
    def get_items(self):
        return self.model_dump()["items"]


class SkillList(OpenAISchema):
    """
    A list of skills of a specific type.
    """
    type: str = Field(..., description="The type of skill", examples=["Programming Languages", "Frameworks", "Databases", "Tools", "Other"])
    skills: List[str] = Field(..., description="A list of skills of the given type")

    @model_serializer
    def serialize_skill_list(self) -> Dict[str, Any]:
        return {
            'type': self.type,
            'skills': ', '.join(self.skills)
        }


class Skills(OpenAISchema):
    """
    A list of all skills from a user's resume, organized by type.
    """
    items: List[SkillList]
    
    def get_items(self):
        return self.model_dump()["items"]


class Resume(OpenAISchema):
    """
    A user's resume.
    """
    bio: Bio = Field(..., description="The user's personal information")
    education: Education = Field(..., description="The user's education")
    experiences: Experiences = Field(..., description="The user's work experience")
    activities: Activities = Field(..., description="The user's extracurricular activities")
    skills: Skills = Field(..., description="The user's skills")

    def get_items(self):
        return {
            "bio": self.bio.get_items(),
            "education": self.education.get_items(),
            "experiences": self.experiences.get_items(),
            "activities": self.activities.get_items(),
            "skills": self.skills.get_items()
        }

    @model_serializer
    def serialize_model(self):
        return {k: v.replace('&', '\\&') if isinstance(v, str) else v for k, v in self.__dict__.items()}

In [7]:
TEXT = """\
Bio
---
Tony Kabilan Okeke
Philadelphia, PA
Email: tonykabilanokeke@gmail.com
Phone: (646) 659-4319
Location: Philadelphia, PA
LinkedIn: [Tony Kabilan Okeke](https://linkedin.com/in/t-k-o)
GitHub: [Kabilan108](https://github.com/Kabilan108)
Portfolio: kabilan108.github.io
Role: Data Scientist/Bioinformatician
"""

bio = extract_object(
    prompt="Extract the user's personal information.",
    cls=Bio
)
print(bio)

template = env.get_template(f"resume.tex.j2")
text = template.render(items=bio.get_items())
print(f"{'-'*80}\n" + text + f"\n{'-'*80}")

In [8]:
TEXT = """\
Service and Leadership
----------------------
- **Drexel Computational Design**
  - Co-founder & Vice President, March 2021 - Present
  - Project Manager, May 2022 - Present
- **Tau Beta Pi Honor Society**
  - Member, December 2021 - Present
- **Drexel Society of Artificial Intelligence**
  - Member, September 2022 - Present
"""

activities = extract_object(
  prompt="Extract the user's extracurricular activities. This includes clubs, organizations, leadership positions, etc.",
  cls=Activities
)
print(activities)

template = env.get_template(f"sections/activities.tex.j2")
text = template.render(items=activities.get_items())
print(f"{'-'*80}\n" + text + f"\n{'-'*80}")

In [9]:
TEXT = """\
Education
---------
Drexel University, Philadelphia, PA
M.Sc. in Biomedical Engineering, Anticipated Graduation: June 2024
B.Sc. in Biomedical Engineering, Anticipated Graduation: June 2024
- Cumulative GPA: 4.0/4.0
- Specializations: Bioinformatics, Neuroengineering, Bioimaging
"""

education = extract_object(
    prompt="Extract the user's education information.",
    cls=Education
)
print(education)

template = env.get_template(f"sections/education.tex.j2")
text = template.render(items=education.get_items())
print(f"{'-'*80}\n" + text + f"\n{'-'*80}")

In [47]:
TEXT = """\
Professional Experience
-----------------------
### IVIVT-Non-Clinical Safety, GlaxoSmithKline (GSK), Computational Biology Co-op, Collegeville, PA
- **Upcoming Term:** April 2023 - September 2023
    Elevated spatial transcriptomics data analysis by integrating advanced machine learning models and 10X Visium technology, enhancing quality control and cellular annotation.
    - Developed software solutions to seamlessly integrate internally generated spatial transcriptomics data with publicly available single-cell data, advancing comprehensive cellular analysis.
    - Orchestrated the internal deployment of PFRED software on GSK's high-performance computing system, streamlining RNA-seq investigations into species-specific cellular responses.
    - Engineered a Python-based bioinformatic tool for automating the import and analysis of tissue-specific gene expression data, accelerating the oval cell hyperplasia tox study.
    - Partnered with the eSTAR working group to build a neural network model that predicts molecular initiating events in liver carcinogenicity, leveraging rat transcriptomics studies.
    - Employed UMAP and t-SNE techniques to cluster HESI compounds, and developed machine learning models for biomarker identification within each cluster, enhancing targeted assessments in carcinogenicity.
    - Spearheaded the development of an interactive spatial transcriptomics analysis platform for GeoMx technology, supporting future acquisition plans and facilitating in-depth, interactive reporting.
- **Previous Term:** April 2022 - September 2022
    - Engineered an interactive web app for statistical analysis in high-content imaging toxicology via Python's `Dash` and `Flask`; scaled computations using `Dask` and deployed on `RStudio Connect`.
    - Authored a Python package encapsulating statistical, visualization, and machine learning tools for high-content imaging.
    - Implemented machine learning algorithms (Decision Trees, Random Forests, SVM) for biomarker discovery in high-content imaging datasets.
    - Enhanced image quality using `Noise2Void` deep learning techniques, contributing to more precise image segmentation pipelines in `CellProfiler`.
    - Implemented feature extraction pipelines for high-content imaging data using `CellProfiler` and `Columbus`.

### Invenio Lab, Hospital of the University of Pennsylvania, Immunology Research Assistant, Philadelphia, PA
- **Term:** March 2021 - August 2022
    - Standardized protocols and performed assays for DNA, RNA, and protein isolation; prepared NGS libraries.
    - Employed unsupervised machine learning to interpret clinical and multi-omic data, presented via `Jupyter` notebooks.
    - Created Python and R scripts for DNA methylation analysis from Illumina microarray data.
    - Conducted comprehensive data analysis on post-cardiopulmonary bypass patients, including differential methylation and pathway enrichment.

### Zhou Lab, Children's Hospital of Philadelphia, Undergraduate Research Intern, Philadelphia, PA
- **Term:** May 2020 - June 2021
    - Collaborated in the development of open-source R packages for DNA methylation analysis.
    - Validated package functionalities using publicly available GEO datasets.
"""

experiences = extract_object(
    prompt="Extract all of the user's professional experiences.",
    cls=Experiences
)
print(experiences)

template = env.get_template(f"sections/experience.tex.j2")
text = template.render(items=experiences.get_items())
print(f"{'-'*80}\n" + text + f"\n{'-'*80}")

In [67]:
TEXT = """\
Technical Skills
----------------
- **Programming Languages:** Python, R, Bash, C++, MATLAB, SQL, AWK, Git, PHP, JavaScript
- **Libraries \& Frameworks:** TensorFlow, Keras, PyTorch, Scikit-learn, Flask, FastAPI, Shiny, Dash, Streamlit
- **Bioinformatics Tools:** Seurat, SquidPy, CellProfiler, Columbus, Loupe Browser
- **Wet Lab Assays:** PCR, qPCR, ELISA, Western Blot, DNA/RNA/Protein Extraction, NGS Library Preparation
"""

skills = extract_object(
    prompt="Extract all of the user's skills.",
    cls=Skills
)
print(skills)

template = env.get_template(f"sections/skills.tex.j2")
text = template.render(items=skills.get_items())
print(f"{'-'*80}\n" + text + f"\n{'-'*80}")

In [77]:
with open('./profile.md', 'r') as f:
    TEXT = '\n'.join(f.readlines())

resume = extract_object(
    prompt="Extract the user's resume from the provided markdown.",
    cls=Resume
)


# templates = ['resume', 'sections/activities', 'section/education', 'sections/experience', 'sections/skills']
# for template in templates:

# template = env.get_template(f"sections/skills.tex.j2")
# text = template.render(items=skills.get_items())
# print(f"{'-'*80}\n" + text + f"\n{'-'*80}")

In [78]:
print(resume)