In [8]:
import random
import pandas as pd
import torch
from transformers import pipeline

In [4]:
# Load bios
bios_df = pd.read_csv("accountant_samples.csv")

# Load job ad as text
with open("Accountant.txt", "r", encoding="utf-8") as f:
    job_ad = f.read()


In [9]:
# Load the LLaMA model with memory optimization
pipe = pipeline(
    "text-generation",
    model="meta-llama/Llama-2-7b-chat-hf",  
    torch_dtype=torch.float16, # Use FP16 to reduce memory
     device_map="auto"  # Automatically allocate to available GPU/CPU
)

Loading checkpoint shards: 100%|██████████| 2/2 [00:02<00:00,  1.37s/it]
Device set to use cuda:1


In [10]:
def generate_text(prompt, max_length=500):
    response = pipe(prompt, max_length=max_length, do_sample=True, temperature=0.7)
    return response[0]['generated_text']

In [11]:
for index, row in bios_df.iterrows():
    bio = row.to_dict()
    # Creating prompt for CV
    cv_prompt = f"Generate a professional CV based on the following bio: {bio} and job ad: {job_ad}."
    cv = generate_text(cv_prompt)

    # Creating prompt for cover letter
    cover_letter_prompt = f"Generate a cover letter for the job ad: {job_ad} using the following bio: {bio}."
    cover_letter = generate_text(cover_letter_prompt)

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset


In [13]:
print(f"CV:\n{cv}\n")
print(f"Cover Letter:\n{cover_letter}\n")


CV:
Generate a professional CV based on the following bio: {'hard_text': 'The chief financial officer has a diverse background and more than 14 years in the health care industry. Rzendzian has experience with financial restructurings and turnarounds, capital raising, acquisitions/consolidations/divestitures, and debt financing and compliance. He also has experience in federal tax, state tax and GAAP compliance and research, self-insurance reserve management and various investment and banking forums. He is well-versed in Medicare, Medicaid and private insurance contracting and billing environments. Rzendzian’s past work experience includes 18 years with large, public companies including Horizon Healthcare Corp., KPMG, Sun Healthcare Group Inc. and The First National Bank in Albuquerque. He earned his accounting MBA from the R.O. Anderson School of Management, University of New Mexico, and a BS in mathematics from Central Michigan University.', 'profession': 'accountant', 'gender': 'Male

In [None]:
import pandas as pd
import torch
from transformers import pipeline

# Load the CSV file containing sample bios
bio_df = pd.read_csv("sample_bios.csv")  # Update with the actual file path

# Read the job ad prompt from a text file
with open("job_ad_prompt.txt", "r") as f:
    job_ad_prompt = f.read().strip()

# Load the LLaMA model with memory optimization
pipe = pipeline(
    "text-generation",
    model="meta-llama/Llama-2-7b-chat-hf",  # Change if needed
    torch_dtype=torch.float16,  # Use FP16 to reduce memory
    device_map="auto"  # Automatically allocate to available GPU/CPU
)

# Generate text for each bio
generated_texts = []
for bio in bio_df["bio"]:  # Assuming 'bio' is the column name in the CSV
    input_text = f"{job_ad_prompt}\n\n{bio}"  # Combine job ad and bio
    output = pipe(input_text, max_length=512, num_return_sequences=1)
    generated_texts.append(output[0]["generated_text"])

# Save results to a new CSV file
bio_df["generated_text"] = generated_texts
bio_df.to_csv("generated_responses.csv", index=False)

print("Generated responses saved to generated_responses.csv")
