In [1]:

import pandas as pd
import torch
from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Load the LLaMA model
model_id = "meta-llama/Llama-2-7b-chat-hf"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id, device_map='auto', torch_dtype=torch.float16)

Loading checkpoint shards: 100%|██████████| 2/2 [00:02<00:00,  1.42s/it]


In [3]:
# Load the LLaMA model with memory optimization
pipe = pipeline(
    "text-generation",
    model=model,  
    tokenizer = tokenizer,
    torch_dtype=torch.float16, # Use FP16 to reduce memory
     device_map="auto"  # Automatically allocate to available GPU/CPU
)

Device set to use cuda:0


In [5]:
# Load bios
bios_df = pd.read_csv("accountant_samples.csv")


In [17]:
import os

# Load job ads from individual files
job_ads_dir = "job_ads/jobADs_accountant"
job_ads_files = sorted([f for f in os.listdir(job_ads_dir) if f.endswith(".txt")])

job_ads = []
for filename in job_ads_files:
    with open(os.path.join(job_ads_dir, filename), "r", encoding="utf-8") as f:
        job_ads.append(f.read().strip())


In [23]:
# --- Prompt Templates ---
with open("prompt_CV.txt", "r", encoding="utf-8") as f:
    cv_template = f.read()

with open("prompt_CoverLetter.txt", "r", encoding="utf-8") as f:
    cover_letter_template = f.read()

In [7]:
def format_prompt(template, bio, job_ad):
    return f"<s>[INST] {template.format(bio=bio, job_ad=job_ad)} [/INST]"

In [19]:
# Create output directory
os.makedirs("generated_CV_CoverLetter", exist_ok=True)

In [None]:
# --- Generate Outputs ---

for i, (index, row) in enumerate(bios_df.iterrows()):
    bio = row["hard_text"] 
    profession = row.get("profession", "N/A")
    gender = row.get("gender", "N/A")
    job_ad = job_ads[i]  # 1-to-1 mapping

    
  
    cv_prompt = format_prompt(cv_template, bio, job_ad)
    cover_prompt = format_prompt(cover_letter_template, bio, job_ad)
        
    cv_response = pipe(cv_prompt, max_new_tokens=512, temperature=0.0, do_sample=False)[0]["generated_text"]
    cover_response = pipe(cover_prompt, max_new_tokens=512, temperature=0.0, do_sample=False)[0]["generated_text"]
    
        
    print(f"\n--- CV and Cover Letter for Bio #{i+1}, Job Ad #{i+1} ---")
    print(f"\nCV:\n{cv_response}\n")
    print(f"Cover Letter:\n{cover_response}\n")

# Save each CV and Cover Letter in separate files
    with open(f"generated_CV_CoverLetter/cv_{i+1}.txt", "w", encoding="utf-8") as f:
        f.write(cv_response)

    with open(f"generated_CV_CoverLetter/cover_letter_{i+1}.txt", "w", encoding="utf-8") as f:
        f.write(cover_response)

    print(f"Saved CV and Cover Letter for Bio #{i+1}")