<a href="https://colab.research.google.com/github/QueeneDelmarva/Thesis/blob/main/Thesis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Thesis Code**

## Importing Libraries

In [1]:
# Connect to Gdrive
from google.colab import drive

# Connect to Gsheets
import gspread
from oauth2client.service_account import ServiceAccountCredentials

# Preprocessing
import re
import nltk

import string
from google.colab import auth
from google.auth import default

## Connecting to Gdrive

In [2]:
# Mount Google Drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


## Connecting to GSheets

In [3]:
# Define the scope and create the credentials using the file in your Google Drive
scope = ['https://spreadsheets.google.com/feeds', 'https://www.googleapis.com/auth/drive']
creds = ServiceAccountCredentials.from_json_keyfile_name('/content/drive/MyDrive/skripsi-394804-a98ea9715aa4.json', scope)

# Authenticate with gspread
client = gspread.authorize(creds)

# Open a specific Google Sheet by its title or URL
sheet = client.open_by_key('1RlZ8-XwwEueuCAq4nXGwq3ZmPDE-_0gP3PsroKeNFGo').sheet1

# ## Read the data from the Gsheets
# # 1. Read individual cells
# cell_value = sheet.cell(3, 5).value
# print(cell_value)

# # 2. Read all values from the first worksheet
# data = sheet.get_all_values()

# # Print the data
# for row in data:
#     print(row)

## Preprocessing Data

In [4]:
def preprocess(word):
    # Separate words based on non-alphabetic characters
    words = re.findall(r'\b(?:[a-zA-Z0-9]+)\b', word)

    # Convert words to lowercase
    lowercase_words = [w.lower() for w in words]

    return lowercase_words

## **Radix Trie**
---
Classes:
*   Node
*   Function

### Radix Trie Node

In [5]:
# Defines a class representing a single node in the radix trie, holding information about its children and whether it marks the end of a word.
class RadixTrieNode:
    def __init__(self):
        self.children = {}
        self.is_end_of_word = False

### Radix Trie Function

In [14]:
# Defines a class that implements a radix trie data structure, allowing efficient storage and retrieval of a set of strings with common prefixes.
class RadixTrie:
    def __init__(self):
        self.root = RadixTrieNode()

    # Adds a word to the Radix Trie by creating or traversing the appropriate nodes for each character in the word.
    def insert(self, word):
        node = self.root
        for char in word:
            if char not in node.children:
                node.children[char] = RadixTrieNode()
            node = node.children[char]
        node.is_end_of_word = True

    # Finds all words in the Radix Trie that have the given prefix, returning them as a list of results.
    def search(self, prefix):
        node = self.root
        for char in prefix:
            if char not in node.children:
                return []  # Prefix not found, return an empty list
            node = node.children[char]
        return self.collect_words(node, prefix)

    # Recursively explores the Radix Trie's child nodes
    def collect_words(self, node, current_prefix):
        results = []
        if node.is_end_of_word:
            results.append(current_prefix)
        for char, child in node.children.items():
            word = current_prefix + char
            results.extend(self.collect_words(child, word))
        return results

    def get_all_words(self):
        return self.collect_words(self.root, "")

### **Main Function**

In [21]:
def main():
    # Read all values from the first worksheet
    data = sheet.get_all_values()

    # Build the Radix Trie from the data
    trie = RadixTrie()
    for row in data:
        for word in row:
            preprocessed_words = preprocess(word)
            for preprocessed_word in preprocessed_words:
                if preprocessed_word:  # Skip empty words after preprocessing
                    trie.insert(preprocessed_word)

    # Print the nodes and their children after inserting all the words
    # print("Nodes and Their Children:")
    results = trie.search("h")
    print(results)
    # trie.search("")

    # Print the original words and their preprocessed words
    for row in data:
        for word in row:
            preprocessed_words = preprocess(word)
            # print("Word:", word, "Preprocessed Words:", preprocessed_words)

if __name__ == "__main__":
    main()

['has', 'her', 'hotel']
