In [3]:
import pandas as pd
from llama_cpp import Llama

# Load Llama/Mistral model
llm = Llama(model_path="/Users/jameelamer/.cache/huggingface/hub/models--TheBloke--Mistral-7B-Instruct-v0.1-GGUF/snapshots/731a9fc8f06f5f5e2db8a0cf9d256197eb6e05d1/mistral-7b-instruct-v0.1.Q4_K_M.gguf", n_ctx=4096)

# Sample text
text = """OrganiZo is a digital asset management platform. It helps businesses organize files efficiently.
Users can reset their password by clicking 'Forgot Password' on the login page.
The platform supports multiple languages for better accessibility."""

# Create prompt
prompt = f"""
Extract at least 3 important questions from the following text:

{text}

Format the output as a list of questions only, without additional text.
"""

# Generate response
response = llm(prompt)
questions = response["choices"][0]["text"].strip().split("\n")

# Create DataFrame
df = pd.DataFrame({"question": questions, "answer": [text] * len(questions)})

# Save to CSV
#df.to_csv("questions.csv", index=False)
df.head(10)
print("Questions generated and saved to questions.csv")


llama_model_load_from_file_impl: using device Metal (Apple M1 Pro) - 10922 MiB free
llama_model_loader: loaded meta data with 20 key-value pairs and 291 tensors from /Users/jameelamer/.cache/huggingface/hub/models--TheBloke--Mistral-7B-Instruct-v0.1-GGUF/snapshots/731a9fc8f06f5f5e2db8a0cf9d256197eb6e05d1/mistral-7b-instruct-v0.1.Q4_K_M.gguf (version GGUF V2)
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = mistralai_mistral-7b-instruct-v0.1
llama_model_loader: - kv   2:                       llama.context_length u32              = 32768
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   4:                          llama.block_count u32              = 32
llama_model_loader: - kv  

Questions generated and saved to questions.csv


In [4]:
df.head()

Unnamed: 0,question,answer
0,1. What is OrganiZo?,OrganiZo is a digital asset management platfor...
1,2. How does Organ,OrganiZo is a digital asset management platfor...


In [5]:
text = """
Users can reset their password by clicking 'Forgot Password' on the login page.
The platform supports multiple languages for better accessibility."""

# Create prompt
prompt = f"""
Extract at least 3 important questions from the following text:

{text}

Format the output as a list of questions only, without additional text.
"""

# Generate response
response = llm(prompt)
questions = response["choices"][0]["text"].strip().split("\n")

# Create DataFrame
df = pd.DataFrame({"question": questions, "answer": [text] * len(questions)})

# Save to CSV
#df.to_csv("questions.csv", index=False)
df.head(10)

Llama.generate: 17 prefix-match hit, remaining 48 prompt tokens to eval
llama_perf_context_print:        load time =    5545.78 ms
llama_perf_context_print: prompt eval time =    4528.06 ms /    48 tokens (   94.33 ms per token,    10.60 tokens per second)
llama_perf_context_print:        eval time =    1130.95 ms /    15 runs   (   75.40 ms per token,    13.26 tokens per second)
llama_perf_context_print:       total time =    5662.39 ms /    63 tokens


Unnamed: 0,question,answer
0,1. How can users reset their password?,\nUsers can reset their password by clicking '...
1,2. What languages does,\nUsers can reset their password by clicking '...


In [7]:
from transformers import pipeline

qa_pipeline = pipeline("question-answering")

text = """Python is a programming language. What is Python used for? It is used for web development, AI, and more. Can I use Python for machine learning? Yes, it is widely used in ML. How do I install Python?"""

def extract_questions(text):
    sentences = text.split(". ")
    return [sent for sent in sentences if "?" in sent]

questions = extract_questions(text)
print(questions)

No model was supplied, defaulted to distilbert/distilbert-base-cased-distilled-squad and revision 564e9b5 (https://huggingface.co/distilbert/distilbert-base-cased-distilled-squad).
Using a pipeline without specifying a model name and revision in production is not recommended.
Device set to use mps:0


[]


In [12]:
from transformers import pipeline

# Load the question generation model
qg_pipeline = pipeline("text2text-generation", model="iarfmoose/t5-base-question-generator")

def generate_questions(text, num_questions=5):
    """
    Generates questions from input text.
    
    :param text: Input text for generating questions
    :param num_questions: Number of questions to generate
    :return: List of generated questions
    """
    questions = qg_pipeline(
        text,
        max_length=128,
        num_return_sequences=num_questions,
        num_beams=num_questions,  # Enables beam search
        early_stopping=True
    )

    return [q["generated_text"] for q in questions]

# Example text (Replace this with any document content)
text = """
Python is a widely-used programming language for web development, AI, data science, and automation. 
It was created by Guido van Rossum and first released in 1991. 
Python is known for its simplicity, readability, and large community support.
"""

# Generate questions
questions = generate_questions(text)

# Print the generated questions
print("\nGenerated Questions:")
for idx, q in enumerate(questions, 1):
    print(f"{idx}. {q}")


Device set to use mps:0



Generated Questions:
1. What is Python? It is a popular programming language for web development, AI, and data science.
2. What is Python? It is a popular programming language for web development, data science, and automation.
3. What is a popular programming language for web development, AI, and automation?
4. What is a popular programming language for web development, data science, and automation?
5. What is the most popular programming language in the world?


In [15]:
from transformers import pipeline

# Load models
qg_pipeline = pipeline("text2text-generation", model="iarfmoose/t5-base-question-generator")
qa_pipeline = pipeline("question-answering", model="deepset/roberta-base-squad2")

def generate_questions(text, num_questions=10):
    """
    Generates meaningful questions from input text.
    """
    questions = qg_pipeline(
        text,
        max_length=128,
        num_return_sequences=num_questions,
        num_beams=num_questions,
        early_stopping=True
    )
    return [q["generated_text"] for q in questions]

def answer_questions(text, questions):
    """
    Answers generated questions based on input text.
    """
    answers = []
    for q in questions:
        answer = qa_pipeline(question=q, context=text)
        answers.append((q, answer["answer"]))
    return answers

# Example text (Replace with document content)
text = """
Python is a widely-used programming language for web development, AI, data science, and automation. 
It was created by Guido van Rossum and first released in 1991. 
Python is known for its simplicity, readability, and large community support.
"""
text = """OrganiZo is a digital asset management platform. It helps businesses organize files efficiently.
Users can reset their password by clicking 'Forgot Password' on the login page.
The platform supports multiple languages for better accessibility."""

# Generate questions
questions = generate_questions(text)

# Get answers
qa_pairs = answer_questions(text, questions)

# Print results
print("\nGenerated Questions & Answers:")
for idx, (q, a) in enumerate(qa_pairs, 1):
    print(f"{idx}. Q: {q}\n   A: {a}\n")


Device set to use mps:0
Device set to use mps:0



Generated Questions & Answers:
1. Q: How can I reset my password? ? OrganiZo is a digital asset management platform.
   A: by clicking 'Forgot Password' on the login page

2. Q: What is OrganiZo? ? OrganiZo is a digital asset management platform.
   A: It helps businesses organize files efficiently

3. Q: How can I reset my password? ? OrganiZo helps businesses organize files efficiently.
   A: by clicking 'Forgot Password' on the login page

4. Q: What is OrganiZo? It is a digital asset management platform for businesses.
   A: helps businesses organize files efficiently

5. Q: is a digital asset management platform that helps businesses organize files efficiently.
   A: OrganiZo

6. Q: is a digital asset management platform that helps businesses organize files efficiently?
   A: OrganiZo

7. Q: What is OrganiZo? It is a digital asset management platform.
   A: It helps businesses organize files efficiently

8. Q: is a digital asset management platform that helps businesses organize 