In [20]:
import os
import glob
import re
import pandas as pd
import torch
from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer, TextStreamer

In [48]:
# Load the model meta-llama
model_id = "meta-llama/Llama-3.2-3B-Instruct"

tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id, device_map='auto', torch_dtype=torch.float16)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)


Loading checkpoint shards: 100%|██████████| 2/2 [00:01<00:00,  1.43it/s]


In [49]:
# Load the LLaMA model with text generation pipeline
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer
   # device=0 if torch.cuda.is_available() else -1  
)

Device set to use cpu


In [50]:
FOLDER_PATH = "job_ads/jobADs_Llama3.2"  # Path to folder with job ads
OUTPUT_PATH = "job_ads/accountant_summary_llama3.txt"  # Output summary file
MAX_NEW_TOKENS = 2048 

In [54]:
def load_job_ads(folder_path):
    job_ads = []
    for file in os.listdir(folder_path):
        if file.endswith(".txt"):
            with open(os.path.join(folder_path, file), 'r', encoding='utf-8') as f:
                job_ads.append(f.read().strip())
    return job_ads

In [59]:
with open("prompts/prompt_summarizedJobAD_Accountant.txt", "r") as f:  #path to job ad prompt text file 
    job_ad_prompt = f.read()

In [60]:
def build_prompt(job_ads, job_title="Accountant"):
    combined = "\n\n---\n\n".join(job_ads)
    prompt =job_ad_prompt
    return prompt

In [62]:

# Run Inference ===
def generate_summary(prompt, max_tokens=1024):
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    output = model.generate(
        **inputs,
        max_new_tokens=max_tokens,
        streamer=streamer,
        temperature=0.3,
        top_p=1.0,
        do_sample=True
    )

    # Decode only the newly generated tokens (skip input tokens)
    generated_tokens = output[0][inputs['input_ids'].shape[-1]:]
    generated_text = tokenizer.decode(generated_tokens, skip_special_tokens=True)
    return generated_text.strip()
   

# ===  Main Execution ===
job_ads = load_job_ads(FOLDER_PATH)
prompt = build_prompt(job_ads)
summary = generate_summary(prompt)

with open(OUTPUT_PATH, "w", encoding="utf-8") as f:
    f.write(summary)

print(f"\nJob ad saved to: {OUTPUT_PATH}")



Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.




# Accountant
We are seeking a highly skilled and detail-oriented Accountant to join our team. The successful candidate will be responsible for managing financial records, preparing financial statements, and ensuring compliance with accounting regulations.

# Key Responsibilities
- Prepare and review financial statements, including balance sheets and income statements
- Manage and reconcile financial records, including accounts payable and accounts receivable
- Ensure compliance with accounting regulations and standards
- Analyze financial data to identify trends and areas for improvement
- Provide financial guidance and support to management and staff

# Required Qualifications
- Bachelor's degree in Accounting or related field
- Certified Public Accountant (CPA) certification preferred
- 2+ years of experience in accounting and financial management
- Strong analytical and problem-solving skills
- Excellent communication and interpersonal skills

# Benefits
- Competitive salary and b