In [None]:
!pip install transformers
!pip install sentencepiece
!pip install scikit-learn



In [None]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

model_name = "google/flan-t5-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)


In [None]:
import torch

def remove_similar_questions(questions, threshold=0.7):
    # Remove duplicate and highly similar questions
    vectorizer = TfidfVectorizer().fit_transform(questions)
    similarity_matrix = cosine_similarity(vectorizer)

    unique_questions = []
    seen_indices = set()

    for i in range(len(questions)):
        if i in seen_indices:
            continue
        unique_questions.append(questions[i])
        for j in range(i + 1, len(questions)):
            if similarity_matrix[i][j] > threshold:
                seen_indices.add(j)
    return unique_questions

def generate_unique_questions(device_name, num_attempts=15, top_n=5):
    prompt = f"Generate buying-related questions for the electronic device: {device_name}"
    inputs = tokenizer(prompt, return_tensors="pt")

    outputs = model.generate(
        **inputs,
        max_length=50,
        do_sample=True,
        top_k=50,
        top_p=0.95,
        temperature=0.8,
        num_return_sequences=num_attempts
    )

    raw_questions = [tokenizer.decode(out, skip_special_tokens=True).strip() for out in outputs]

    # Filter out very short ones and duplicates
    filtered_questions = [q for q in raw_questions if len(q.split()) > 4 and '?' in q]
    unique_questions = remove_similar_questions(filtered_questions)

    return unique_questions[:top_n]


In [None]:
while True:
    device = input("Enter an electronic device (or 'exit' to quit): ")
    if device.lower() == 'exit':
        break

    questions = generate_unique_questions(device)
    print("\nGenerated Questions:")
    for i, q in enumerate(questions, 1):
        print(f"{i}. {q}")
    print("-" * 50 + "\n")


Generated Questions:
1. What is the name of the wireless headphones that you purchase?
2. What is the most common accessory for a headphone?
3. What type of device would you need in order to play the song?
4. What are the features of a Headphone?
5. How many inches is the maximum width?
--------------------------------------------------

