In [1]:
# Example: Building a simple inverted index and querying it

documents = {
    1: "annual revenue growth in europe",
    2: "europe sales stable last quarter",
    3: "quarterly report on growth and revenue"
}

def tokenize(text):
    return text.lower().split()

# Build inverted index: term -> sorted list of doc_ids
inverted_index = {}
for doc_id, text in documents.items():
    terms = tokenize(text)
    for term in terms:
        if term not in inverted_index:
            inverted_index[term] = []
        inverted_index[term].append(doc_id)

# Querying the inverted index: a simple AND query
query = "europe revenue"
query_terms = tokenize(query)

# Get postings lists for each term
posting_lists = [set(inverted_index.get(term, [])) for term in query_terms if term in inverted_index]

if posting_lists:
    candidate_docs = set.intersection(*posting_lists)
else:
    candidate_docs = set()

print("Query:", query)
print("Candidate documents:", candidate_docs)  # Expected: doc_id 1 is a strong candidate


Query: europe revenue
Candidate documents: {1}
