In [1]:
# !pip install -r requirements.txt -q

In [2]:
# !pip install -U datasets transformers tokenizers pydantic auto_gptq gradio -q

In [3]:
# !pip install git+https://github.com/huggingface/transformers -q

In [4]:
# Import relevant libraries
from transformers import AutoTokenizer, AutoModelForCausalLM
from transformers import BitsAndBytesConfig
from transformers import AutoTokenizer, AutoModelForCausalLM
import transformers
import torch
torch.cuda.empty_cache()
import time
import pandas as pd

import sys
# adding UtilityFuncts to the system path
sys.path.insert(0, '/notebooks/Utility')
import UtilityFuncts as uf

In [5]:
# !pip install huggingface_hub

In [6]:
# !huggingface-cli login

In [7]:


print("Loading LLM into GPU memory")
# it is suggested to pin the revision commit hash and not change it for reproducibility 
# because the uploader might change the model afterwards; you can find the commmit history of 
# llamav2-7b-chat in https://huggingface.co/meta-llama/Llama-2-7b-chat-hf/commits/main
model = "meta-llama/Llama-2-7b-chat-hf"
revision = "0ede8dd71e923db6258295621d817ca8714516d4"
token = ""

tokenizer = AutoTokenizer.from_pretrained(model, padding_side="left", use_auth_token = token)
pipeline = transformers.pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    torch_dtype=torch.bfloat16,
    trust_remote_code=True,
    device_map="auto",
    revision=revision,
    return_full_text=False,
    # use_auth_token = token
)

# Required tokenizer setting for batch inference
pipeline.tokenizer.pad_token_id = tokenizer.eos_token_id



Loading LLM into GPU memory




Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]



In [8]:
# Define parameters to generate text
def gen_text(prompts, use_template=False, **kwargs):
    if use_template:
        full_prompts = [
            PROMPT_FOR_GENERATION_FORMAT.format(advertisement=prompt)
            for prompt in prompts
        ]
    else:
        full_prompts = prompts

    if "batch_size" not in kwargs:
        kwargs["batch_size"] = 1
    
    # the default max length is pretty small (20), which would cut the generated output in the middle, so it's necessary to increase the threshold to the complete response
    if "max_new_tokens" not in kwargs:
        kwargs["max_new_tokens"] = 512

    # configure other text generation arguments, see common configurable args here: 
    #https://huggingface.co/docs/transformers/main_classes/text_generation#transformers.GenerationConfig
    kwargs.update(
        {
            "pad_token_id": tokenizer.eos_token_id,  # Hugging Face sets pad_token_id to eos_token_id by default; setting here to not see redundant message
            "eos_token_id": tokenizer.eos_token_id,
        }
    )
    # print(full_prompts)
    outputs = pipeline(full_prompts, **kwargs)
    
    outputs = [out[0]["generated_text"] for out in outputs]

    return outputs


# Generate work History from csv
# Create work roles dataframe
wr_path = "/notebooks/Inputs/WorkHistory_Roles.csv"
wr_df = pd.read_csv(wr_path) 

# Create Work Achievements dataframe
wa_path = "/notebooks/Inputs/WorkHistory_Achievments.csv"
wa_df = pd.read_csv(wa_path)

In [9]:
# Generate work history list
work_history_list = []

for index, row in wr_df.iterrows():
    # Iterate through the role dataframe
    work_history_list.append(f"Role: {row['Role']}")
    work_history_list.append(f"Company: {row['Company']}")
    work_history_list.append(f"Location: {row['Location']}")
    work_history_list.append(f"Duration: {row['Period']}")
    work_history_list.append("Achievements:")
    _wa_df = wa_df[wa_df['role_id'] == row['role_id']]
    
    # Iterate through the achievements dataframe
    for index_wa, row_wa in _wa_df.iterrows():
        work_history_list.append(f"- {row_wa['achievement']}")
    work_history_list.append("")

# Conver this to a string
work_history = "\n".join(work_history_list)

# COMMAND ----------

# Get job ads dictionary
job_ads = uf.obtain_ad_folder_dict()

# COMMAND ----------



In [10]:
# Run a loop to cover letters for each of the advertisements
# for ad, folder in job_ads.items():
    # Start timer
print(f"Generating cover letter for ad1")

job_ad_path = f"/notebooks/Job Ads/New Ad/ad1.txt"

with open(job_ad_path, "r", encoding='utf-8') as f:
    job_ad = f.read()
    
DEFAULT_SYSTEM_PROMPT = f"""\
You are an intelligent, respectful and honest job seeker. This is your work history: {work_history}. You are applying for a job. Always answer as professionally as possible, while being friendly. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature. Below is an advertisment that describes a job you are interested in applying for. Write a short, succinct, professional cover letter in response to the job, making sure to highlight relevant details from your work history.
"""

PROMPT_FOR_GENERATION_FORMAT = """
<s>[INST]<<SYS>>
{system_prompt}
<</SYS>>

{advertisement}
[/INST]
""".format(
    system_prompt=DEFAULT_SYSTEM_PROMPT,
    advertisement="{advertisement}"
)



Generating cover letter for ad1


In [11]:
    # Generate the text
results = gen_text([job_ad], use_template = True)

This is a friendly reminder - the current text generation call will exceed the model's predefined maximum length (2048). Depending on the model, you may observe exceptions, performance degradation, or nothing at all.


In [14]:
results

["Dear Hiring Manager,\n\nI am writing to express my strong interest in the Data Scientist position at the NSW Department of Education. As a seasoned data scientist with a proven track record of delivering data-driven insights and solutions, I am confident that I can make a valuable contribution to your team.\n\nI am particularly impressed by the Department's commitment to leveraging data and insights to drive its vision, and I am excited about the opportunity to play a role in this effort. I am also drawn to the Department's focus on creating a culture of data-driven decision-making, and I believe that my experience and skills align well with this goal.\n\nIn my current role as a Data Science Manager at Company XYZ, I have led a team of data scientists and analysts in developing cutting-edge machine learning models for predictive maintenance, resulting in a 30% reduction in maintenance costs. I have also implemented a data-driven recommendation system that increased user engagement by

In [16]:
# Save output
output_path = f'/notebooks/Resume Components/ad1_2023-09-09_06-10-41/cover_letter_text.txt'

with open(output_path, 'w') as f:
    f.write(results[0])
