<a href="https://colab.research.google.com/github/Kevin091295/Finetune_Resume_Q-A/blob/main/Inference.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install torch


Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5

In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel

def load_model_and_tokenizer(hf_model_id, token=None):

    # Load tokenizer
    tokenizer = AutoTokenizer.from_pretrained(hf_model_id, token=token)
    tokenizer.pad_token = tokenizer.eos_token
    tokenizer.padding_side = "right"

    # Load base model without quantization
    base_model_name = "meta-llama/Llama-3.2-3B-Instruct"
    base_model = AutoModelForCausalLM.from_pretrained(
        base_model_name,
        device_map="auto",  # Will use GPU if available, CPU otherwise
        torch_dtype=torch.float16,
        token=token
    )

    # Load the fine-tuned PEFT model from Hugging Face
    model = PeftModel.from_pretrained(base_model, hf_model_id, token=token)
    model.eval()

    return model, tokenizer

def generate_answer(model, tokenizer, resume_text, question, max_length=100):
    """
    Generate an answer given a resume and question.

    Args:
        model: Loaded fine-tuned model
        tokenizer: Loaded tokenizer
        resume_text (str): The resume text/context
        question (str): The question to answer
        max_length (int): Maximum length of generated output
    """
    # Format the prompt according to training format
    prompt = f"""<|begin_of_text|><|user|>
I have a resume:
{resume_text}

Question: {question}<|end_of_text|>"""

    # Tokenize the input
    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)
    input_ids = inputs["input_ids"].to(model.device)
    attention_mask = inputs["attention_mask"].to(model.device)

    # Generate the answer
    with torch.no_grad():
        outputs = model.generate(
            input_ids=input_ids,
            attention_mask=attention_mask,
            max_new_tokens=max_length,
            temperature=0.7,
            top_p=0.9,
            do_sample=True,
            pad_token_id=tokenizer.eos_token_id
        )

    # Decode the generated tokens
    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # Extract the answer (remove the prompt part)
    answer_start = generated_text.find("Question:") + len(f"Question: {question}")
    answer = generated_text[answer_start:].strip()

    return answer

def main():
    # Replace with your Hugging Face model ID
    hf_model_id = "KevinPanchal/FineTune_Resume_Kaggle"  # e.g., "john_doe/resume-agent-model"

    # If your model is private, provide your Hugging Face token
    hf_token = "HF_Token"  # Optional, set to None if public model

    # Load model and tokenizer from Hugging Face
    print("Loading model and tokenizer from Hugging Face...")
    model, tokenizer = load_model_and_tokenizer(hf_model_id, token=hf_token)

    # Example resume and question
    sample_resume = """JT_Walkin Data Entry Operator (night Shift),CO_MM Media Pvt Ltd,JR_Fresher Keyskills English Typing Part Time Data Entry Selection Process Desired Candidate Profile ,JL_Bangalore,ED_Postgraduate,EX_0 - 1 yrs,SK_ITES,IN_Media , Entertainment , Internet,PR_1,50,000 - 2,25,000 PA"""
    sample_question = "What is job provided by MM Media Pvt Ltd company?"

    # Generate answer
    print("\nGenerating answer...")
    answer = generate_answer(model, tokenizer, sample_resume, sample_question)

    print(f"\nResume: {sample_resume}")
    print(f"Question: {sample_question}")
    print(f"Answer: {answer}")

if __name__ == "__main__":
    main()

Loading model and tokenizer from Hugging Face...


tokenizer_config.json:   0%|          | 0.00/54.6k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/325 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/878 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/20.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/1.46G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/189 [00:00<?, ?B/s]

adapter_config.json:   0%|          | 0.00/865 [00:00<?, ?B/s]



adapter_model.safetensors:   0%|          | 0.00/97.3M [00:00<?, ?B/s]


Generating answer...

Resume: JT_Walkin Data Entry Operator (night Shift),CO_MM Media Pvt Ltd,JR_Fresher Keyskills English Typing Part Time Data Entry Selection Process Desired Candidate Profile ,JL_Bangalore,ED_Postgraduate,EX_0 - 1 yrs,SK_ITES,IN_Media , Entertainment , Internet,PR_1,50,000 - 2,25,000 PA
Question: What is job provided by MM Media Pvt Ltd company?
Answer: <|assistant|>
Walkin Data Entry Operator (night Shift)


In [None]:
def main():
    # Replace with your Hugging Face model ID
    hf_model_id = "KevinPanchal/FineTune_Resume_Kaggle"  # e.g., "john_doe/resume-agent-model"

    # If your model is private, provide your Hugging Face token
    hf_token = "HF_Token"  # Optional, set to None if public model

    # Load model and tokenizer from Hugging Face
    print("Loading model and tokenizer from Hugging Face...")
    model, tokenizer = load_model_and_tokenizer(hf_model_id, token=hf_token)

    # Example resume and question
    sample_resume= """JT_Data Analyst,CO_ASI Analytics,JR_Entry Level Keyskills SQL Python Power BI Selection Process Desired Candidate Profile ,JL_Ahmedabad,ED_Bachelor's Degree,EX_1 - 2 yrs,SK_Data Analytics,IN_IT , Software,PR_3,50,000 - 6,00,000 PA,Name_Kevin Panchal"""
    sample_question = "What qualifications are necessary for a CO_ASI Analytics?"

    # Generate answer
    print("\nGenerating answer...")
    answer = generate_answer(model, tokenizer, sample_resume, sample_question)

    print(f"\nResume: {sample_resume}")
    print(f"Question: {sample_question}")
    print(f"Answer: {answer}")

if __name__ == "__main__":
    main()

Loading model and tokenizer from Hugging Face...


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]


Generating answer...

Resume: JT_Data Analyst,CO_ASI Analytics,JR_Entry Level Keyskills SQL Python Power BI Selection Process Desired Candidate Profile ,JL_Ahmedabad,ED_Bachelor's Degree,EX_1 - 2 yrs,SK_Data Analytics,IN_IT , Software,PR_3,50,000 - 6,00,000 PA,Name_Kevin Panchal
Question: What qualifications are necessary for a CO_ASI Analytics?
Answer: <|assistant|>
Bachelors Degree,


In [None]:
def main():
    # Replace with your Hugging Face model ID
    hf_model_id = "KevinPanchal/FineTune_Resume_Kaggle"  # e.g., "john_doe/resume-agent-model"

    # If your model is private, provide your Hugging Face token
    hf_token = "HF_Token"  # Optional, set to None if public model

    # Load model and tokenizer from Hugging Face
    print("Loading model and tokenizer from Hugging Face...")
    model, tokenizer = load_model_and_tokenizer(hf_model_id, token=hf_token)

    # Example resume and question
    sample_resume= """JT_Data Analyst,CO_ASI Analytics,JR_Entry Level Keyskills SQL Python Power BI Selection Process Desired Candidate Profile ,JL_Ahmedabad,ED_Bachelor's Degree,EX_1 - 2 yrs,SK_Data Analytics,IN_IT , Software,PR_3,50,000 - 6,00,000 PA,Name_Kevin Panchal"""
    sample_question = "What is salary provided by CO_ASI Analytics?"

    # Generate answer
    print("\nGenerating answer...")
    answer = generate_answer(model, tokenizer, sample_resume, sample_question)

    print(f"\nResume: {sample_resume}")
    print(f"Question: {sample_question}")
    print(f"Answer: {answer}")

if __name__ == "__main__":
    main()

Loading model and tokenizer from Hugging Face...


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]


Generating answer...

Resume: JT_Data Analyst,CO_ASI Analytics,JR_Entry Level Keyskills SQL Python Power BI Selection Process Desired Candidate Profile ,JL_Ahmedabad,ED_Bachelor's Degree,EX_1 - 2 yrs,SK_Data Analytics,IN_IT , Software,PR_3,50,000 - 6,00,000 PA,Name_Kevin Panchal
Question: What is salary provided by CO_ASI Analytics?
Answer: <|assistant|>
3,50,000 - 6,00,000 PA


In [None]:
def main():
    # Replace with your Hugging Face model ID
    hf_model_id = "KevinPanchal/FineTune_Resume_Kaggle"  # e.g., "john_doe/resume-agent-model"

    # If your model is private, provide your Hugging Face token
    hf_token = "HF_Token"  # Optional, set to None if public model

    # Load model and tokenizer from Hugging Face
    print("Loading model and tokenizer from Hugging Face...")
    model, tokenizer = load_model_and_tokenizer(hf_model_id, token=hf_token)

    # Example resume and question
    sample_resume= "My name is Kevin Panchal. I have done my masters from Cleveland State University. Right now I am learning LLM but I am finding very hard to learn this"
    sample_question = "From which university Kevin has complted masters?"

    # Generate answer
    print("\nGenerating answer...")
    answer = generate_answer(model, tokenizer, sample_resume, sample_question)

    print(f"\nResume: {sample_resume}")
    print(f"Question: {sample_question}")
    print(f"Answer: {answer}")

if __name__ == "__main__":
    main()

Loading model and tokenizer from Hugging Face...


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]


Generating answer...

Resume: My name is Kevin Panchal. I have done my masters from Cleveland State University. Right now I am learning LLM but I am finding very hard to learn this
Question: From which university Kevin has complted masters?
Answer: <|assistant|>
Cleveland State University


In [None]:
def main():
    # Replace with your Hugging Face model ID
    hf_model_id = "KevinPanchal/FineTune_Resume_Kaggle"  # e.g., "john_doe/resume-agent-model"

    # If your model is private, provide your Hugging Face token
    hf_token = "HF_Token"  # Optional, set to None if public model

    # Load model and tokenizer from Hugging Face
    print("Loading model and tokenizer from Hugging Face...")
    model, tokenizer = load_model_and_tokenizer(hf_model_id, token=hf_token)

    # Example resume and question
    sample_resume= "My name is Kevin Panchal. I have done my masters from Cleveland State University. Right now I am learning LLM but I am finding very hard to learn this"
    sample_question = "Which thing is hard for Kevin?"

    # Generate answer
    print("\nGenerating answer...")
    answer = generate_answer(model, tokenizer, sample_resume, sample_question)

    print(f"\nResume: {sample_resume}")
    print(f"Question: {sample_question}")
    print(f"Answer: {answer}")

if __name__ == "__main__":
    main()

Loading model and tokenizer from Hugging Face...


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]


Generating answer...

Resume: My name is Kevin Panchal. I have done my masters from Cleveland State University. Right now I am learning LLM but I am finding very hard to learn this
Question: Which thing is hard for Kevin?
Answer: <|assistant|>
learning LLM


In [None]:
def main():
    # Replace with your Hugging Face model ID
    hf_model_id = "KevinPanchal/FineTune_Resume_Kaggle"  # e.g., "john_doe/resume-agent-model"

    # If your model is private, provide your Hugging Face token
    hf_token = "HF_Token"  # Optional, set to None if public model

    # Load model and tokenizer from Hugging Face
    print("Loading model and tokenizer from Hugging Face...")
    model, tokenizer = load_model_and_tokenizer(hf_model_id, token=hf_token)

    # Example resume and question
    sample_resume= """Name: Alice Johnson
                    Email: alice.johnson@example.com
                    Phone: (555) 123-4567
                    Education: BS Computer Science, University of Tech, 2019
                    Work Experience:
                    - Software Developer, TechCorp, Jan 2020 - Present
                      - Built scalable web applications using Python and Django
                      - Optimized database queries, reducing load time by 30%
                    - Intern, CodeLabs, Summer 2018
                      - Assisted in debugging software and writing unit tests
                    Skills: Python, Django, JavaScript, Git, SQL"""

    sample_question = "Can You please give me email id of Alice?"

    # Generate answer
    print("\nGenerating answer...")
    answer = generate_answer(model, tokenizer, sample_resume, sample_question)

    print(f"\nResume: {sample_resume}")
    print(f"Question: {sample_question}")
    print(f"Answer: {answer}")

if __name__ == "__main__":
    main()

Loading model and tokenizer from Hugging Face...


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]


Generating answer...

Resume: Name: Alice Johnson
                    Email: alice.johnson@example.com
                    Phone: (555) 123-4567
                    Education: BS Computer Science, University of Tech, 2019
                    Work Experience:
                    - Software Developer, TechCorp, Jan 2020 - Present
                      - Built scalable web applications using Python and Django
                      - Optimized database queries, reducing load time by 30%
                    - Intern, CodeLabs, Summer 2018
                      - Assisted in debugging software and writing unit tests
                    Skills: Python, Django, JavaScript, Git, SQL
Question: Can You please give me email id of Alice?
Answer: <|assistant|>
alice.johnson@example.com
