In [1]:
import re
from collections import defaultdict

In [None]:
class IRSystem:
    def __init__(self):
        self.documents = {}
        self.dictionary = set()
        self.inverted_index = defaultdict(set)

    def add_document(self, doc_id, text):
        self.documents[doc_id] = text

    def build_index(self):
        self.dictionary.clear()
        self.inverted_index.clear()
        for doc_id, content in self.documents.items():
            clean = re.sub(r"[^a-z0-9\s]", " ", content.lower())
            tokens = clean.split()
            for token in tokens:
                self.dictionary.add(token)
                self.inverted_index[token].add(doc_id)
        for term, postings in self.inverted_index.items():
            self.inverted_index[term] = sorted(postings)
        return self.inverted_index

    def boolean_query(self, query):
        q = query.lower()
        tokens = q.split()
        if not tokens:
            return []

        if tokens[0] == "not":
            term = tokens[1]
            all_docs = set(self.documents.keys())
            return sorted(all_docs - set(self.inverted_index.get(term, [])))

        if "and" in tokens:
            terms = [t for t in tokens if t not in ("and", "or", "not")]
            result = set(self.inverted_index.get(terms[0], []))
            for term in terms[1:]:
                result &= set(self.inverted_index.get(term, []))
            return sorted(result)

        if "or" in tokens:
            terms = [t for t in tokens if t not in ("and", "or", "not")]
            result = set()
            for term in terms:
                result |= set(self.inverted_index.get(term, []))
            return sorted(result)

        return sorted(self.inverted_index.get(q, []))

In [3]:
text_docs = {
    1: "Online learning platforms have transformed modern education by providing flexible access to digital study materials.",
    2: "Artificial Intelligence enables personalized learning experiences and adaptive tutoring systems for students.",
    3: "Cybersecurity awareness programs are essential to protect educational institutions from data breaches.",
    4: "Machine learning algorithms help researchers analyze large educational datasets to improve academic outcomes.",
    5: "Virtual classrooms support remote learning and increase student participation in online discussions.",
    6: "Cloud computing allows schools to store learning resources securely and access them from any location.",
    7: "Educational technology tools help teachers create interactive lessons and improve classroom engagement.",
    8: "Robotics education encourages problem-solving skills and prepares students for future technological careers.",
    9: "Data analytics is used by universities to track student performance and identify learners who need support.",
    10: "Digital libraries provide access to vast academic resources and support self-paced learning for students.",
}

In [4]:
ir = IRSystem()
for doc_id, text in text_docs.items():
    ir.add_document(doc_id, text)

ir.build_index()

defaultdict(set,
            {'online': [1, 5],
             'learning': [1, 2, 4, 5, 6, 10],
             'platforms': [1],
             'have': [1],
             'transformed': [1],
             'modern': [1],
             'education': [1, 8],
             'by': [1, 9],
             'providing': [1],
             'flexible': [1],
             'access': [1, 6, 10],
             'to': [1, 3, 4, 6, 9, 10],
             'digital': [1, 10],
             'study': [1],
             'materials': [1],
             'artificial': [2],
             'intelligence': [2],
             'enables': [2],
             'personalized': [2],
             'experiences': [2],
             'and': [2, 5, 6, 7, 8, 9, 10],
             'adaptive': [2],
             'tutoring': [2],
             'systems': [2],
             'for': [2, 8, 10],
             'students': [2, 8, 10],
             'cybersecurity': [3],
             'awareness': [3],
             'programs': [3],
             'are': [3],
             'e

In [None]:
queries = queries = [
    "online AND learning",
    "education OR technology",
    "NOT cybersecurity",
    "research AND data",
    "cloud OR digital",
    "machine AND learning",
    "student OR performance",
    "robotics AND careers",
    "analytics OR datasets",
    "virtual AND classrooms"
]

print("Query Results:\n")
for q in queries:
    result = ir.boolean_query(q)
    doc_names = [list(text_docs.keys())[i-1] for i in result]
    print(f"{q} -> Documents: {result}")

Query Results:

online AND learning -> Documents: [1, 5]
education OR technology -> Documents: [1, 7, 8]
NOT cybersecurity -> Documents: [1, 2, 4, 5, 6, 7, 8, 9, 10]
research AND data -> Documents: []
cloud OR digital -> Documents: [1, 6, 10]
machine AND learning -> Documents: [4]
student OR performance -> Documents: [5, 9]
robotics AND careers -> Documents: [8]
analytics OR datasets -> Documents: [4, 9]
virtual AND classrooms -> Documents: [5]
