In [28]:
import spacy
from spacy.lang.en.stop_words import STOP_WORDS
from spacy import displacy
from spacy.matcher import PhraseMatcher

# Define the course outcomes and questions
course_outcomes = ['By the end of this course, students should be able to design and develop software applications using modern software engineering techniques',
                    'Students should be able to analyze and evaluate algorithms and data structures',
                    'Students should be able to communicate effectively and collaborate in team settings']

questions = ['Write a Python program to implement a linked list data structure',
            'What is the time complexity of the quicksort algorithm?',
            'How would you communicate a technical concept to a non-technical audience?']

# Load the NLP model and add the course outcomes and questions to the nlp pipeline
nlp = spacy.load("en_core_web_sm")
nlp.add_pipe("sentencizer")

# Define a function to preprocess the text
def preprocess(text):
    doc = nlp(text)
    tokens = [token.lemma_.lower() for token in doc if not token.is_stop and not token.is_punct and not token.is_space]
    return ' '.join(tokens)

# Preprocess the course outcomes and questions
course_outcomes = [preprocess(outcome) for outcome in course_outcomes]
questions = [preprocess(question) for question in questions]

# Define a function to extract keywords
def extract_keywords(text, nlp, max_keywords=10, min_token_freq=2, min_keyword_len=2):
    doc = nlp(text)
    # Get the frequency of each token and its lemma in the document
    freq = {}
    for token in doc:
        if not token.is_stop and not token.is_punct and not token.is_space:
            for form in (token.lemma_, token.text):
                if form not in freq:
                    freq[form] = 0
                freq[form] += 1
    # Sort the tokens by frequency
    sorted_tokens = sorted(freq, key=freq.get, reverse=True)
    # Extract the keywords based on the sorted tokens
    keywords = []
    for token in sorted_tokens:
        if len(token) < min_keyword_len:
            continue
        if freq[token] < min_token_freq or len(keywords) >= max_keywords:
            break
        keywords.append(token)
    return keywords


def map_questions_to_outcomes(course_outcomes, questions, nlp, max_keywords=10, min_token_freq=2, min_keyword_len=2):
    # Preprocess the course outcomes and questions
    course_outcomes = [preprocess(outcome) for outcome in course_outcomes]
    questions = [preprocess(question) for question in questions]
    # Extract the keywords for each course outcome
    keywords = {}
    for i, outcome in enumerate(course_outcomes):
        keywords[f'CO{i+1}'] = extract_keywords(outcome, nlp, max_keywords, min_token_freq, min_keyword_len)
    # Create a PhraseMatcher for each course outcome
    matchers = {}
    for key in keywords:
        matcher = PhraseMatcher(nlp.vocab, attr='LOWER')
        patterns = [nlp(keyword) for keyword in keywords[key]]
        matcher.add(key, None, *patterns)
        matchers[key] = matcher
    # Match the keywords from the course outcomes to the questions
    mapped_questions = []
    for question in questions:
        question_doc = nlp(question)
        for key in matchers:
            matches = matchers[key](question_doc)
            if matches:
                mapped_questions.append(f"{key}:{question}")
    return mapped_questions

    

mapped_questions = map_questions_to_outcomes(course_outcomes, questions, nlp)

print(mapped_questions)


['CO2:write python program implement link list datum structure', 'CO2:time complexity quicksort algorithm', 'CO3:communicate technical concept non technical audience']
