<a href="https://colab.research.google.com/github/DishaKushwah/custom-quiz-generator/blob/main/short_answer_generator.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
## SHORT ANSWERS
%pip install --upgrade transformers
import torch
from transformers import (
    AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForQuestionAnswering,
    pipeline, T5ForConditionalGeneration, T5Tokenizer
)
import spacy
import numpy as np
from sentence_transformers import SentenceTransformer
import re
import nltk
from nltk.tokenize import sent_tokenize
from typing import List, Dict, Tuple, Optional
import random
from dataclasses import dataclass
import json




In [7]:
@dataclass
class ShortAnswerQuestion:
    """Data class for short answer questions."""
    question: str
    answer: str
    context_sentence: str
    question_type: str
    difficulty: str
    confidence: float
    keywords: List[str]
    expected_length: str

class AdvancedShortAnswerGenerator:
    def __init__(self):
        """Initialize with state-of-the-art models for question generation."""
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        # Load the best question generation model - T5-large fine-tuned for QG
        self.qg_model_name = "valhalla/t5-base-qg-hl"
        # Use AutoTokenizer and AutoModelForSeq2SeqLM for broader compatibility
        self.qg_tokenizer = AutoTokenizer.from_pretrained(self.qg_model_name)
        self.qg_model = AutoModelForSeq2SeqLM.from_pretrained(self.qg_model_name).to(self.device)

        # Load FLAN-T5 for better question generation
        self.flan_model_name = "google/flan-t5-base"
        self.flan_tokenizer = AutoTokenizer.from_pretrained(self.flan_model_name)
        self.flan_model = AutoModelForSeq2SeqLM.from_pretrained(self.flan_model_name).to(self.device)


        # Load DeBERta for high-quality answer extraction
        self.qa_model_name = "deepset/roberta-base-squad2"
        self.qa_tokenizer = AutoTokenizer.from_pretrained(self.qa_model_name)
        self.qa_model = AutoModelForQuestionAnswering.from_pretrained(self.qa_model_name).to(self.device)

        # Load sentence transformer for semantic analysis
        print("Loading sentence transformer...")
        self.sentence_model = SentenceTransformer('all-mpnet-base-v2')

        # Load spaCy for advanced NLP
        try:
            self.nlp = spacy.load("en_core_web_sm")
        except OSError:
            print("Please install spaCy English model: python -m spacy download en_core_web_sm")
            self.nlp = None

        try:
            nltk.download('punkt', quiet=True)
            nltk.download('stopwords', quiet=True)
            nltk.download('averaged_perceptron_tagger', quiet=True)
            nltk.download('punkt_tab', quiet=True) # Added download for punkt_tab
        except:
            pass

        # Question type templates (can be used as fallback or for prompt engineering)
        self.question_templates = {
            'factual': [
                "What is {}?",
                "What does {} mean?",
                "What are the characteristics of {}?",
                "Define {}.",
                "Explain {}."
            ],
            'analytical': [
                "How does {} work?",
                "Why is {} important?",
                "What is the significance of {}?",
                "How does {} relate to {}?",
                "What are the implications of {}?"
            ],
            'comparative': [
                "Compare {} and {}.",
                "What are the differences between {} and {}?",
                "How does {} differ from {}?",
                "What are the similarities between {} and {}?"
            ],
            'causal': [
                "What caused {}?",
                "What are the effects of {}?",
                "How did {} lead to {}?",
                "What resulted from {}?"
            ],
            'procedural': [
                "How do you {}?",
                "What are the steps to {}?",
                "Describe the process of {}.",
                "What is the procedure for {}?"
            ]
        }

    def extract_key_concepts(self, text: str) -> Dict:
        """Extract key concepts and entities from text."""
        if not self.nlp:
            return {"entities": [], "concepts": [], "sentences": []}

        doc = self.nlp(text)

        entities = []
        for ent in doc.ents:
            # Include more entity types for broader question generation
            if ent.label_ in ['PERSON', 'ORG', 'GPE', 'EVENT', 'WORK_OF_ART', 'LAW', 'LANGUAGE', 'DATE', 'CARDINAL', 'ORDINAL', 'NORP', 'LOC', 'PRODUCT']:
                entities.append({
                    'text': ent.text,
                    'label': ent.label_,
                    'start': ent.start_char,
                    'end': ent.end_char
                })

        # Extract key concepts (noun phrases)
        concepts = []
        for chunk in doc.noun_chunks:
            # Adjust length for slightly longer concepts
            if 2 <= len(chunk.text.split()) <= 5:
                concepts.append({
                    'text': chunk.text,
                    'pos': chunk.root.pos_,
                    'start': chunk.start_char,
                    'end': chunk.end_char
                })

        # Extract sentences
        sentences = [sent.text.strip() for sent in doc.sents if len(sent.text.split()) >= 10] # Increased minimum sentence length

        return {
            "entities": entities,
            "concepts": concepts,
            "sentences": sentences
        }

    def generate_question_with_t5(self, context: str, answer: str, difficulty: str = "medium", question_type: str = "factual") -> str:
        """Generate question using T5 model with prepend approach, considering difficulty."""
        # Incorporate difficulty into the prompt
        prompt_prefix = f"generate {difficulty} {question_type} question:"
        input_text = f"{prompt_prefix} context: {context} \\n {answer}"

        inputs = self.qg_tokenizer.encode_plus(
            input_text,
            max_length=512,
            truncation=True,
            padding=True,
            return_tensors="pt"
        ).to(self.device)

        # Adjust generation parameters based on difficulty (simple heuristic)
        max_length = 100
        num_beams = 5
        temperature = 0.7
        if difficulty == "easy":
            max_length = 80
            temperature = 0.6
        elif difficulty == "hard":
            max_length = 120
            temperature = 0.9
            num_beams = 8

        with torch.no_grad():
            outputs = self.qg_model.generate(
                inputs["input_ids"],
                attention_mask=inputs["attention_mask"],
                max_length=max_length,
                num_beams=num_beams,
                temperature=temperature,
                do_sample=True,
                early_stopping=True,
                no_repeat_ngram_size=2
            )

        question = self.qg_tokenizer.decode(outputs[0], skip_special_tokens=True)
        return question.strip()

    def generate_question_with_flan(self, context: str, answer: str, difficulty: str = "medium", question_type: str = "factual") -> str:
        """Generate question using FLAN-T5 model, considering difficulty."""
        # Incorporate difficulty into the prompt
        prompt = f"""Given the following context, generate a concise {difficulty}-level short answer question where the answer is '{answer}':

Context: {context}

Question:"""

        inputs = self.flan_tokenizer(
            prompt,
            max_length=512,
            truncation=True,
            padding=True,
            return_tensors="pt"
        ).to(self.device)

        # Adjust generation parameters based on difficulty (simple heuristic)
        max_length = 150
        num_beams = 4
        temperature = 0.8
        if difficulty == "easy":
            max_length = 120
            temperature = 0.7
        elif difficulty == "hard":
            max_length = 180
            temperature = 0.9
            num_beams = 6


        with torch.no_grad():
            outputs = self.flan_model.generate(
                inputs["input_ids"],
                attention_mask=inputs["attention_mask"],
                max_length=max_length,
                num_beams=num_beams,
                temperature=temperature,
                do_sample=True,
                early_stopping=True
            )

        question = self.flan_tokenizer.decode(outputs[0], skip_special_tokens=True)
        return question.strip()

    def classify_question_difficulty(self, question: str, answer: str, context: str) -> str:
        """Classify question difficulty based on complexity and context."""
        question_lower = question.lower()
        question_words = question_lower.split()
        answer_words = answer.lower().split()

        # Keyword indicators
        easy_keywords = ['what', 'who', 'when', 'where', 'name', 'list', 'define']
        medium_keywords = ['how', 'why', 'explain', 'describe', 'role', 'purpose']
        hard_keywords = ['analyze', 'evaluate', 'synthesize', 'impact', 'implication', 'relationship']

        easy_score = sum(1 for word in easy_keywords if word in question_words)
        medium_score = sum(1 for word in medium_keywords if word in question_words)
        hard_score = sum(1 for word in hard_keywords if word in question_words)

        # Answer length
        answer_length_score = 0
        if len(answer_words) > 15:
            answer_length_score = 3
        elif len(answer_words) > 8:
            answer_length_score = 2
        elif len(answer_words) > 3:
            answer_length_score = 1

        # Context complexity (simple measure: average sentence length)
        sentences = sent_tokenize(context)
        avg_sentence_length = np.mean([len(s.split()) for s in sentences]) if sentences else 0

        context_complexity_score = 0
        if avg_sentence_length > 25:
            context_complexity_score = 2
        elif avg_sentence_length > 18:
            context_complexity_score = 1

        # --- Enhanced Linguistic Features ---
        if self.nlp:
            doc_question = self.nlp(question)
            doc_answer = self.nlp(answer)


            # 1. Part-of-speech tagging (weights based on complexity)
            # Corrected way to get POS counts
            pos_counts_question = {}
            for token in doc_question:
                pos_counts_question[token.pos_] = pos_counts_question.get(token.pos_, 0) + 1

            pos_counts_answer = {}
            for token in doc_answer:
                 pos_counts_answer[token.pos_] = pos_counts_answer.get(token.pos_, 0) + 1


            pos_score = (
                pos_counts_question.get(spacy.parts_of_speech.ADJ, 0) * 0.6 + # Further Increased weight for Adjectives
                pos_counts_question.get(spacy.parts_of_speech.ADV, 0) * 0.7 + # Further Increased weight for Adverbs
                pos_counts_question.get(spacy.parts_of_speech.VERB, 0) * 0.5 + # Further Increased weight for Verbs
                pos_counts_answer.get(spacy.parts_of_speech.ADJ, 0) * 0.5 +
                pos_counts_answer.get(spacy.parts_of_speech.NOUN, 0) * 0.4 # Further Increased weight for Nouns in answer
            )


            # 2. Dependency parsing complexity (simple measure: average dependency depth) - Higher depth means more complex syntax
            dep_depths_question = [len(list(token.ancestors)) for token in doc_question]
            avg_dep_depth_question = np.mean(dep_depths_question) if dep_depths_question else 0
            dep_score = avg_dep_depth_question * 1.2 # Significantly increased weight


            # 3. Named entity recognition - More entities can indicate more specific/complex questions
            num_entities_question = len(doc_question.ents)
            num_entities_answer = len(doc_answer.ents)
            entity_score = (num_entities_question * 1.2 + num_entities_answer * 1.5) # Significantly increased weight for entities


            # 4. Lexical diversity (Type-Token Ratio) - Lower TTR might indicate simpler language, higher TTR more complex
            question_tokens = [token.text.lower() for token in doc_question if token.is_alpha]
            answer_tokens = [token.text.lower() for token in doc_answer if token.is_alpha]

            question_ttr = len(set(question_tokens)) / len(question_tokens) if question_tokens else 0
            answer_ttr = len(set(answer_tokens)) / len(answer_tokens) if answer_tokens else 0

            # Inverse TTR for scoring (lower TTR = higher score for easy, higher TTR = higher score for hard)
            # Let's use TTR directly and adjust weights
            ttr_score = (question_ttr * 3.0 + answer_ttr * 2.5) # Significantly increased weight


            # Combine linguistic features into a single score
            linguistic_score = pos_score + dep_score + entity_score + ttr_score

        else:
            linguistic_score = 0

        # Combine all scores with adjusted weights
        # Increased weights for hard keywords, answer length, context complexity, and linguistic features
        total_score = (
            hard_score * 7 + # Further Higher weight for hard keywords
            medium_score * 4 + # Increased weight for medium keywords
            easy_score * 1.5 + # Slightly increased weight for easy keywords
            answer_length_score * 3.0 + # Further Higher weight for answer length
            context_complexity_score * 3.0 + # Further Higher weight for context complexity
            linguistic_score * 2.5 # Significantly increased weight for combined linguistic features
        )

        # Refined thresholds based on adjusted scoring
        # These thresholds will likely need tuning based on testing
        if total_score > 28: # Adjusted thresholds slightly down
            return "hard"
        elif total_score > 14: # Adjusted thresholds slightly down
            return "medium"
        else:
            return "easy"


    def determine_question_type(self, question: str) -> str:
        """Determine the type of question based on its content."""
        question_lower = question.lower()

        if any(word in question_lower for word in ['what is', 'what are', 'define', 'who is', 'who are', 'when is', 'when did', 'where is', 'where are']):
            return "factual"
        elif any(word in question_lower for word in ['how does', 'how to', 'why is', 'why do', 'explain', 'describe']):
            return "analytical"
        elif any(word in question_lower for word in ['compare', 'contrast', 'differ', 'similarities', 'differences']):
            return "comparative"
        elif any(word in question_lower for word in ['cause', 'effect', 'result', 'lead to', 'consequence']):
            return "causal"
        elif any(word in question_lower for word in ['steps', 'process', 'procedure', 'how to']):
            return "procedural"
        else:
            # Use a classifier for better accuracy if available, otherwise default
            # try:
            #     classification = self.question_classifier(question)[0]
            #     # Map classifier labels to our types (requires mapping based on chosen classifier)
            #     # This is a placeholder; actual mapping depends on the chosen classifier
            #     return 'factual' # Default for now
            # except:
                 return "factual"

    def extract_keywords(self, text: str) -> List[str]:
        """Extract keywords from text using NLP."""
        if not self.nlp:
            return []

        doc = self.nlp(text)
        keywords = []
        stopwords = set(nltk.corpus.stopwords.words('english'))

        for token in doc:
            if (token.pos_ in ['NOUN', 'PROPN', 'ADJ', 'VERB'] and # Include verbs
                token.text.lower() not in stopwords and
                not token.is_punct and
                len(token.text) > 2):
                keywords.append(token.text)

        # Prioritize multi-word concepts if they exist
        multi_word_keywords = [chunk.text for chunk in doc.noun_chunks if len(chunk.text.split()) > 1 and len(chunk.text.split()) <= 3]
        keywords = multi_word_keywords + keywords

        return list(set(keywords))

    def validate_question_answer_pair(self, question: str, expected_answer: str, context: str) -> Dict:
        """Validate if the question can be answered correctly from the context using the QA model."""
        try:
            # Use QA model and tokenizer explicitly
            inputs = self.qa_tokenizer(question, context, add_special_tokens=True, return_tensors="pt", truncation=True, max_length=512)
            input_ids = inputs["input_ids"].to(self.device)
            attention_mask = inputs["attention_mask"].to(self.device)

            with torch.no_grad():
                outputs = self.qa_model(input_ids=input_ids, attention_mask=attention_mask)

            answer_start_scores = outputs.start_logits
            answer_end_scores = outputs.end_logits

            # Get the most likely answer span
            answer_start = torch.argmax(answer_start_scores)
            answer_end = torch.argmax(answer_end_scores) + 1

            # Convert tokens to predicted answer string
            predicted_answer = self.qa_tokenizer.decode(input_ids[0, answer_start:answer_end], skip_special_tokens=True)

            # Calculate a confidence score (using max of start and end logits)
            confidence = torch.max(torch.softmax(answer_start_scores, dim=-1)) + torch.max(torch.softmax(answer_end_scores, dim=-1)) / 2.0


            # Calculate semantic similarity between expected and predicted answers
            # Handle potential errors if encoding fails
            try:
                expected_embedding = self.sentence_model.encode([expected_answer])
                predicted_embedding = self.sentence_model.encode([predicted_answer])
                similarity = np.dot(expected_embedding[0], predicted_embedding[0]) / (
                    np.linalg.norm(expected_embedding[0]) * np.linalg.norm(predicted_embedding[0])
                )
            except Exception as e:
                print(f"Error encoding answers for similarity: {e}")
                similarity = 0.0 # Default to 0 similarity on error


            # Check if answers are semantically similar or one contains the other
            contains_check = (
                expected_answer.lower().strip() in predicted_answer.lower().strip() or
                predicted_answer.lower().strip() in expected_answer.lower().strip()
            )

            # Consider similarity and containment for validation
            is_valid = (similarity > 0.7 and confidence > 0.4) or (contains_check and confidence > 0.5)

            return {
                "is_valid": is_valid,
                "confidence": confidence.item(), # Convert tensor to float
                "similarity": similarity,
                "predicted_answer": predicted_answer,
                "expected_answer": expected_answer
            }

        except Exception as e:
            # Catch specific errors from pipeline if possible
            print(f"Error during QA validation: {e}")
            return {
                "is_valid": False,
                "confidence": 0.0,
                "similarity": 0.0,
                "predicted_answer": "",
                "expected_answer": expected_answer,
                "error": str(e)
            }

    def determine_expected_length(self, answer: str) -> str:
        """Determine expected answer length category based on word count."""
        word_count = len(answer.split())

        if word_count <= 5:
            return "brief (few words)"
        elif word_count <= 15:
            return "short (1-2 sentences)"
        elif word_count <= 30:
            return "medium (2-4 sentences)"
        else:
            return "long (paragraph+)"


    def generate_comprehensive_questions(self, context: str, num_questions: int = 8, difficulty: str = "medium") -> List[ShortAnswerQuestion]:
        """Generate comprehensive set of short answer questions, considering difficulty."""
        questions = []
        generated_pairs = set() # To avoid duplicate question-answer pairs

        # Extract key information
        key_info = self.extract_key_concepts(context)

        # Combine potential answers from entities and concepts
        potential_answers = [e['text'] for e in key_info['entities']] + [c['text'] for c in key_info['concepts']]
        random.shuffle(potential_answers) # Shuffle to mix entity and concept based questions

        # Determine number of attempts per answer based on difficulty
        attempts_per_answer_map = {"easy": 4, "medium": 8, "hard": 12} # Increased attempts for all difficulties again
        attempts_per_answer = attempts_per_answer_map.get(difficulty, 8)

        answers_processed = 0

        for answer in potential_answers:
            if len(questions) >= num_questions:
                break

            # Skip if answer is too short or too long
            if len(answer.split()) < 2 or len(answer.split()) > 10:
                continue

            answers_processed += 1
            if answers_processed > num_questions * 20: # Further increased limit to try more answers
                 print(f"Reached maximum answer processing attempts ({num_questions * 20}). Stopping.")
                 break
            for attempt in range(attempts_per_answer):
                if len(questions) >= num_questions:
                    break

                # Choose which model to use (can alternate or use both)
                if attempt % 2 == 0:
                    question = self.generate_question_with_t5(context, answer, difficulty=difficulty)
                else:
                    question = self.generate_question_with_flan(context, answer, difficulty=difficulty)

                # Basic cleaning and validation before full QA check
                question = question.strip()
                if not question or not question.endswith('?') or len(question.split()) < 5:
                    continue

                # Ensure question is unique
                q_a_pair = (question, answer)
                if q_a_pair in generated_pairs:
                    continue

                # Validate question-answer pair
                validation = self.validate_question_answer_pair(question, answer, context)

                # Filtering based on difficulty and validation confidence
                confidence_threshold = 0.3 # Base threshold

                # Adjust confidence threshold based on requested difficulty
                if difficulty == "easy":
                    confidence_threshold = 0.25 # Slightly lower threshold for easy questions
                elif difficulty == "hard":
                     confidence_threshold = 0.35 # Slightly higher threshold for hard questions


                if validation["is_valid"] and validation["confidence"] > confidence_threshold: # Apply confidence threshold
                    # Determine question type and classified difficulty
                    question_type = self.determine_question_type(question)
                    classified_difficulty = self.classify_question_difficulty(question, answer, context) # Classify generated question's actual difficulty

                    # Add the question if its classified difficulty is the requested one or one level below
                    # This allows some flexibility while aiming for the target difficulty
                    difficulty_levels = ["easy", "medium", "hard"]
                    requested_index = difficulty_levels.index(difficulty)
                    classified_index = difficulty_levels.index(classified_difficulty)

                    # Accept if classified difficulty is at or one level below requested difficulty
                    if classified_index >= requested_index or (requested_index > 0 and classified_index == requested_index - 1):

                        keywords = self.extract_keywords(f"{question} {answer}")
                        expected_length = self.determine_expected_length(answer)

                        saq = ShortAnswerQuestion(
                            question=question,
                            answer=answer,
                            context_sentence=context[:200] + "..." if len(context) > 200 else context,
                            question_type=question_type,
                            difficulty=classified_difficulty, # Use classified difficulty
                            confidence=validation["confidence"],
                            keywords=keywords[:5],
                            expected_length=expected_length
                        )
                        questions.append(saq)
                        generated_pairs.add(q_a_pair) # Add to history
                        # If we found a question of the requested difficulty, move to the next answer
                        if classified_difficulty == difficulty:
                            break


        # Fallback: Generate questions directly from sentences if not enough generated
        if len(questions) < num_questions:
             print(f"Warning: Could not generate {num_questions} questions of the requested difficulty. Adding fallback questions.")
             for sentence in key_info["sentences"]:
                 if len(questions) >= num_questions:
                     break

                 # Generate a question based on the sentence (can use template or model)
                 # Simple template fallback
                 question = f"What is discussed in the sentence: \"{sentence[:50]}...\"?"
                 answer = sentence # The sentence itself is the "answer" in this case

                 # Validate (less strict for fallback)
                 validation = self.validate_question_answer_pair(question, answer, context)

                 # Even if not perfectly valid, added as a fallback if needed and unique
                 q_a_pair = (question, answer)
                 if q_a_pair not in generated_pairs:
                     difficulty = "easy" # Fallback questions are usually easy
                     question_type = "factual"
                     keywords = self.extract_keywords(sentence)[:5]
                     expected_length = self.determine_expected_length(answer)

                     saq = ShortAnswerQuestion(
                         question=question,
                         answer="Key points from the sentence.", # Placeholder answer
                         context_sentence=sentence,
                         question_type=question_type,
                         difficulty=difficulty,
                         confidence=validation["confidence"] if validation["is_valid"] else 0.1, # Low confidence for fallback
                         keywords=keywords,
                         expected_length="short (1-2 sentences)"
                     )
                     questions.append(saq)
                     generated_pairs.add(q_a_pair)


        # Sort by confidence (or potentially by classified difficulty later) and return
        questions.sort(key=lambda x: x.confidence, reverse=True)
        return questions[:num_questions]

def main():
    """Main function to demonstrate the advanced SAQ generator."""
    generator = AdvancedShortAnswerGenerator()

    # Sample context about machine learning
    sample_context = """
    Machine learning is a subset of artificial intelligence that enables computers to learn and improve
    from experience without being explicitly programmed. It involves algorithms that can identify patterns
    in data and make predictions or decisions based on those patterns. There are three main types of machine
    learning: supervised learning, unsupervised learning, and reinforcement learning. Supervised learning
    uses labeled training data to learn a mapping from inputs to outputs. Popular supervised learning
    algorithms include linear regression, decision trees, and neural networks. Unsupervised learning finds
    hidden patterns in data without labeled examples, using techniques like clustering and dimensionality
    reduction. Reinforcement learning involves an agent learning to make decisions by receiving rewards
    or penalties for actions taken in an environment. Deep learning, a subset of machine learning, uses
    artificial neural networks with multiple layers to model complex patterns in data. Applications of
    machine learning include image recognition, natural language processing, recommendation systems,
    and autonomous vehicles. The field has seen rapid growth due to increased computational power,
    large datasets, and improved algorithms.
    """

    print("Advanced Short Answer Question Generator")
    print("=" * 60)

    # Get user input
    user_context = input("Enter your context (or press Enter to use sample): ").strip()
    if not user_context:
        user_context = sample_context
        print("Using sample context about machine learning...")

    try:
        num_questions = int(input("Number of questions to generate (default 6): ") or "6")
    except ValueError:
        num_questions = 6
    print(f"\nGenerating {num_questions} short answer questions...")

    # Generate questions, passing the difficulty
    questions = generator.generate_comprehensive_questions(user_context, num_questions)

    # Display results
    if questions:
        for i, q in enumerate(questions, 1):
            print(f"\nQuestion {i}: [CLASSIFIED: {q.difficulty.upper()}] ({q.question_type})") # Display classified difficulty
            print(f"Q: {q.question}")
            print(f"A: {q.answer}")
            print(f"Expected Length: {q.expected_length}")
    else:
        print("No high-quality questions could be generated from the provided context.")
        print("Try providing a longer, more detailed context with specific information.")

    print("\nGeneration complete!")

if __name__ == "__main__":
    main()

Loading sentence transformer...
Advanced Short Answer Question Generator
Enter your context (or press Enter to use sample): 
Using sample context about machine learning...
Number of questions to generate (default 6): 4

Generating 4 short answer questions...

Question 1: [CLASSIFIED: MEDIUM] (factual)
Q: What type of data is deep learning used to model?
A: complex patterns
Expected Length: brief (few words)

Question 2: [CLASSIFIED: MEDIUM] (factual)
Q: what is the first type of machine learning?
A: reinforcement learning
Expected Length: brief (few words)

Question 3: [CLASSIFIED: MEDIUM] (factual)
Q: Machine learning is a subset of AI that allows computers to learn and improve from experience without being explicitly programmed?
A: decision trees
Expected Length: brief (few words)

Question 4: [CLASSIFIED: MEDIUM] (factual)
Q: Machine learning is a subset of artificial intelligence that enables computers to learn without being explicitly programmed?
A: a subset
Expected Length: brief