In [36]:
import numpy as np
import pandas as pd

class BooleanRetrieval:
    def __init__(self):
        self.index = {}
        self.documents_matrix = None
        self.documents = {}

    def index_document(self, doc_id, text):
        self.documents[doc_id] = text
        terms = text.lower().split()
        print("Document -", doc_id, terms)

        for term in terms:
            if term not in self.index:
                self.index[term] = set()
            self.index[term].add(doc_id)

    def create_documents_matrix(self):
        terms = list(self.index.keys())
        num_docs = len(self.documents)
        num_terms = len(terms)

        self.documents_matrix = np.zeros((num_docs, num_terms), dtype=int)

        for i, (doc_id, text) in enumerate(self.documents.items()):
            doc_terms = text.lower().split()
            for term in doc_terms:
                if term in self.index:
                    term_id = terms.index(term)
                    self.documents_matrix[i, term_id] = 1

    def print_documents_matrix_table(self):
        df = pd.DataFrame(self.documents_matrix, columns=self.index.keys())
        print("\nDocument-Term Matrix:\n")
        print(df)

    def print_all_terms(self):
        print("\nAll terms in the documents:")
        print(list(self.index.keys()))

    def boolean_search(self, query):
        query = query.lower().split()
        result = set(self.documents.keys())

        i = 0
        while i < len(query):
            term = query[i]

            if term == "and":
                i += 1
                next_term = query[i]
                result = result.intersection(self.index.get(next_term, set()))

            elif term == "or":
                i += 1
                next_term = query[i]
                result = result.union(self.index.get(next_term, set()))
            elif term == "not":
                i += 1
                next_term = query[i]
                result = result.difference(self.index.get(next_term, set()))
            else:
                result = self.index.get(term, set())
            i += 1
        return result


# Main Program
if __name__ == "__main__":

    indexer = BooleanRetrieval()

    documents = {
        1: "Python is a programming language",
        2: "Information retrieval deals with finding information",
        3: "Boolean models are used in information retrieval"
    }

    # Indexing documents
    for doc_id, text in documents.items():
        indexer.index_document(doc_id, text)

    # Create document-term matrix
    indexer.create_documents_matrix()
    indexer.print_documents_matrix_table()
    indexer.print_all_terms()

    # Boolean Search
    query = input("\nEnter your boolean query: ")
    results = indexer.boolean_search(query)

    if results:
        print(f"\nResults for '{query}': {results}")
    else:
        print("\nNo results found for the query.")

Document - 1 ['python', 'is', 'a', 'programming', 'language']
Document - 2 ['information', 'retrieval', 'deals', 'with', 'finding', 'information']
Document - 3 ['boolean', 'models', 'are', 'used', 'in', 'information', 'retrieval']

Document-Term Matrix:

   python  is  a  programming  language  information  retrieval  deals  with  \
0       1   1  1            1         1            0          0      0     0   
1       0   0  0            0         0            1          1      1     1   
2       0   0  0            0         0            1          1      0     0   

   finding  boolean  models  are  used  in  
0        0        0       0    0     0   0  
1        1        0       0    0     0   0  
2        0        1       1    1     1   1  

All terms in the documents:
['python', 'is', 'a', 'programming', 'language', 'information', 'retrieval', 'deals', 'with', 'finding', 'boolean', 'models', 'are', 'used', 'in']

Enter your boolean query: python or information 

Results for 'pyth