In [1]:
import pandas as pd
import os
from transformers import pipeline

In [15]:
# --- Load Inputs ---
with open("prompt_CV.txt", "r", encoding="utf-8") as f:
    cv_template_raw = f.read()

with open("prompt_CoverLetter.txt", "r", encoding="utf-8") as f:
    cover_letter_template_raw = f.read()

with open("accountant_jobAd_llama3.txt", "r", encoding="utf-8") as f:
    job_ad = f.read().strip()



In [3]:
bios_df = pd.read_csv("accountant.csv")
bios_df.head()

Unnamed: 0,hard_text,profession,gender
0,She was raised up in a remote rural area where...,accountant,Female
1,She has served as General Manager in companies...,accountant,Female
2,She worked for top consulting firms such as Pr...,accountant,Female
3,"In addition, Jaime holds a bachelor&apos;s deg...",accountant,Female
4,He recently was the treasurer of Rebecca’s Ten...,accountant,Male


In [20]:
# --- Clean AI-style lines ---
def clean_prompt(prompt):
    banned_phrases = [
        "you are an ai", "as an ai", "as a language model",
        "i am an ai", "i cannot", "i do not have", "i'm just an ai"
    ]
    lines = prompt.split('\n')
    return '\n'.join(
        line for line in lines if all(phrase not in line.lower() for phrase in banned_phrases)
    ).strip()

In [19]:
cv_template = clean_prompt(cv_template_raw)
cover_letter_template = clean_prompt(cover_letter_template_raw)

In [8]:
# --- Load Model ---
generator = pipeline("text-generation", model="meta-llama/Llama-3.2-3b-instruct", device=0)


config.json:   0%|          | 0.00/878 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/20.9k [00:00<?, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/1.46G [00:00<?, ?B/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/189 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/54.5k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/296 [00:00<?, ?B/s]

Device set to use cuda:0


In [21]:
# --- Post-process output to remove AI references ---
def clean_generated_text(text):
    banned_phrases = [
        "as an ai language model", "i cannot", "please note", "i do not have",
        "as a machine", "i am just", "i'm an ai"
    ]
    lines = text.split('\n')
    return '\n'.join(
        line for line in lines if all(phrase not in line.lower() for phrase in banned_phrases)
    ).strip()

In [23]:

# --- Generate CV and Cover Letter ---
outputs = []

for i, row in bios_df.iterrows():
    bio_text = row['hard_text']
    gender = row['gender']
    name = row.get('name', f"Person_{i+1}")

    # Format prompts
    cv_prompt = cv_template.format(bio=bio_text, job_ad=job_ad, gender=gender)
    cover_prompt = cover_letter_template.format(bio=bio_text, job_ad=job_ad, gender=gender)

    # Generate CV
    cv_output = generator(cv_prompt, max_new_tokens=500, do_sample=True)[0]['generated_text']
    cv_output = clean_generated_text(cv_output)

    # Generate Cover Letter
    cover_output = generator(cover_prompt, max_new_tokens=500, do_sample=True)[0]['generated_text']
    cover_output = clean_generated_text(cover_output)

    outputs.append({
        "name": name,
        "gender": gender,
        "cv": cv_output,
        "cover_letter": cover_output
    })

    print(f"[✓] Generated for: {name}")

# --- Save to CSV ---
output_df = pd.DataFrame(outputs)
output_df.to_csv("generated_CV_and_CoverLetter.csv", index=False)


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


[✓] Generated for: Person_1


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


[✓] Generated for: Person_2


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


[✓] Generated for: Person_3


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


[✓] Generated for: Person_4


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


[✓] Generated for: Person_5


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


[✓] Generated for: Person_6


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


[✓] Generated for: Person_7


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


[✓] Generated for: Person_8


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


[✓] Generated for: Person_9


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


[✓] Generated for: Person_10
