In [1]:
import os
import re
import json
from collections import defaultdict

# Node class for linked list
class ListNode:
    def __init__(self, doc_name):
        self.doc_name = doc_name
        self.next = None

# Linked list class
class LinkedList:
    def __init__(self):
        self.head = None

    def append(self, doc_name):
        if not self.head:
            self.head = ListNode(doc_name)
        else:
            current = self.head
            while current.next:
                if current.doc_name == doc_name:  # Avoid duplicates
                    return
                current = current.next
            if current.doc_name != doc_name:
                current.next = ListNode(doc_name)

    def to_list(self):
        result = []
        current = self.head
        while current:
            result.append(current.doc_name)
            current = current.next
        return result

# Function to tokenize and normalize the text
def tokenize(text):
    tokens = re.findall(r'\b\w+\b', text.lower())
    return tokens

# Building the inverted index using linked lists
def build_inverted_index(directory):
    inverted_index = defaultdict(LinkedList)

    for filename in os.listdir(directory):
        if filename.endswith(".txt"):
            file_path = os.path.join(directory, filename)
            with open(file_path, 'r', encoding='utf-8') as file:
                content = file.read()
                tokens = tokenize(content)

                for token in tokens:
                    inverted_index[token].append(filename)

    return inverted_index

# Convert linked lists in the index to a JSON serializable format
def convert_index_to_serializable(index):
    serializable_index = {}
    for token, linked_list in index.items():
        serializable_index[token] = linked_list.to_list()
    return serializable_index

# Save the inverted index to a JSON file
def save_inverted_index(index, output_file):
    serializable_index = convert_index_to_serializable(index)
    with open(output_file, 'w', encoding='utf-8') as file:
        json.dump(serializable_index, file, indent=4)

# Main process
if __name__ == "__main__":
    docs_directory = "/docs"  # Update this to the correct path if needed
    output_file = "inverted_index_linked_list.json"
    
    index = build_inverted_index(docs_directory)
    save_inverted_index(index, output_file)
    print(f"Inverted index saved to {output_file}")


FileNotFoundError: [WinError 3] The system cannot find the path specified: '/docs'