### 1. a) Create a **_Term Document Incidence Matrix_** for the given documents

In [9]:
import os

In [19]:
class TermDocIndex:
    
    def __init__(self):
        self.mat = []
        self.terms = []
        self.docs = []
        
    def buildIndex(self):
        files = os.listdir('Cranfield Data Set')
        for i in range(1, 21):
            with open(f"Cranfield Data Set/{files[i]}") as f:
                text = f.read()
                text = text.lower()
                self.docs.append(text)
                self.terms.extend(text.split())
        self.terms = sorted(list(set(self.terms)))
        
        self.mat = [[0 for j in range(20)] for i in range(len(self.terms))]
        
        for i in range(len(self.terms)):
            for j in range(len(self.docs)):
                if self.terms[i] in self.docs[j]:
                    self.mat[i][j] = 1

In [11]:
# defines the operations of stack data structure
class Stack:
    def __init__(self):
        self._stack = []
        
    def push(self, item):
        self._stack.append(item)
        
    def isEmpty(self):
        return not self._stack

    def pop(self):
        if(self.isEmpty()):
            return None
        return self._stack.pop()

    def peek(self):
        if(self.isEmpty()):
            return None
        return self._stack[-1]

In [12]:
# infix to postfix converter
class InfixToPostfix:
    def __init__(self, infix):
        self.infix = infix
        self.postfix = []
        self.stack = Stack()
        self.precedence = {"(": 0, "or": 1, "and": 2, "not": 3}
        self.operators = ["and", "or", "not", "(", ")"]

    def convert(self):
        tokens = self.infix.split(" ")
        for token in tokens:
            if token not in self.operators:
                self.postfix.append(token)
            elif token == "(":
                self.stack.push(token)
            elif token == ")":
                while self.stack.peek() != "(":
                    self.postfix.append(self.stack.pop())
                self.stack.pop()
            else:
                while not self.stack.isEmpty() and self.precedence[self.stack.peek()] >= self.precedence[token]:
                    self.postfix.append(self.stack.pop())
                self.stack.push(token)
        while not self.stack.isEmpty():
            self.postfix.append(self.stack.pop())
        return self.postfix

-----

## 1. b) Process boolean queries using Term Docuemnt Incidence Matrix

In [52]:
class Query:
    
    def __init__(self, query):
        self.query = query
        self.termIndex = TermDocIndex()
        self.query = self.query.replace('(', '( ')
        self.query = self.query.replace(')', ' )')
        self.termIndex.buildIndex()
        
    def solve(self, vals, op):
        ans = []
        if op == 'not':
            ans = [1 if vals[i]==0 else 0 for i in range(20)]
        elif op == 'and':
            ans = [vals[0][i] & vals[1][i] for i in range(20)]
        elif op == 'or':
            ans = [vals[0][i] | vals[1][i] for i in range(20)]
        return ans
    
    def processQuery(self):
        infix = InfixToPostfix(self.query)
        postfix = infix.convert()
        boolean_operators = ['not', 'and', 'or']
        query_Index = []
        
        for p in postfix:
            if p not in boolean_operators:
                if p in self.termIndex.terms:
                    query_Index.append(self.termIndex.mat[self.termIndex.terms.index(p)])
                else:
                    query_Index.append([0 for i in range(20)])
            else:
                if p == 'not':
                    term = query_Index.pop()
                    query_Index.append(self.solve(term, p))
                else:
                    terms = [query_Index.pop(), query_Index.pop()]
                    query_Index.append(self.solve(terms, p))
        print("Documents retrieved: ")
        for i in range(len(query_Index[0])):
            if query_Index[0][i] != 0:
                print(self.termIndex.docs[i])
                print()

In [57]:
query = input("Enter your query:")
q = Query(query)
q.processQuery()

Enter your query: laminar and (turbulent and flow)


Documents retrieved: 
experiments were performed in the 12 in supersonic wind tunnel of the jet propulsion laboratory of the california institute of technology to investigate the effect of three dimensional roughness elements (spheres) on boundary layer transition on a tained at local mach numbers of 190 271 and 367 by varying trip size position spacing and reynolds number per inch  the results indicate that (1) transition from laminar to turbulent flow induced by three dimensional roughness elements begins when the double row of spiral vortices trailing each element contaminates and breaks down the surrounding field of vorticity (2) transition appears rather suddenly becoming more violent with increasing roughness height relative to the boundary layer thickness (3) after the breakdown of the vorticity field the strength of the spiral vortices may still persist in the sublayer of the ensuing turbulent flow (4) lateral spacing of roughness elements has little effect upon the initial bre