<a href="https://colab.research.google.com/github/Monisha25a/Amd_contrnt/blob/main/Editor.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from collections import defaultdict
from sklearn.feature_extraction.text import TfidfVectorizer
from transformers import pipeline, set_seed

# ---------- Autocompletion Classes ----------
class TrieNode:
    def __init__(self):
        self.children = {}
        self.is_end = False

class Trie:
    def __init__(self):
        self.root = TrieNode()

    def insert(self, word):
        node = self.root
        for ch in word:
            node = node.children.setdefault(ch, TrieNode())
        node.is_end = True

    def _autocomplete(self, node, prefix, results):
        if node.is_end:
            results.append(prefix)
        for ch, child in node.children.items():
            self._autocomplete(child, prefix + ch, results)

    def autocomplete(self, prefix):
        node = self.root
        for ch in prefix:
            if ch not in node.children:
                return []
            node = node.children[ch]
        results = []
        self._autocomplete(node, prefix, results)
        return results

class NgramModel:
    def __init__(self, n):
        self.n = n
        self.ngrams = defaultdict(list)

    def train(self, corpus):
        tokens = corpus.split()
        for i in range(len(tokens) - self.n):
            key = tuple(tokens[i:i+self.n-1])
            self.ngrams[key].append(tokens[i+self.n-1])

    def predict(self, context):
        key = tuple(context.split()[-(self.n-1):])
        return self.ngrams.get(key, [])

class TFIDFAutocompleter:
    def __init__(self, documents):
        self.vectorizer = TfidfVectorizer()
        self.docs = documents
        self.X = self.vectorizer.fit_transform(documents)

    def suggest(self, query):
        query_vec = self.vectorizer.transform([query])
        similarities = (self.X * query_vec.T).toarray()
        ranked = sorted(zip(similarities, self.docs), reverse=True)
        return [doc for sim, doc in ranked if sim > 0]

class GPT2Autocompleter:
    def __init__(self):
        self.generator = pipeline('text-generation', model='gpt2')
        set_seed(42)

    def generate(self, prompt):
        results = self.generator(prompt, max_length=30, num_return_sequences=1)
        return results[0]['generated_text']

# ---------- CLI Fallback (since tkinter not supported) ----------
def main():
    corpus = [
        "hello world",
        "hi there",
        "how are you doing",
        "hello how are you",
        "house on the hill",
        "hover over the map",
        "helicopter is flying"
    ]

    trie = Trie()
    for sentence in corpus:
        for word in sentence.split():
            trie.insert(word)

    ngram = NgramModel(3)
    ngram.train(" ".join(corpus))

    tfidf = TFIDFAutocompleter(corpus)
    gpt2 = GPT2Autocompleter()

    print("\nChoose Autocompletion Method:\n1. Trie\n2. N-gram\n3. TF-IDF\n4. GPT-2\n")

    while True:
        choice = input("Enter method (1-4) or 'q' to quit: ").strip()
        if choice == 'q':
            break

        query = input("Enter your query: ").strip()

        if choice == '1':
            print("Suggestions:", trie.autocomplete(query))
        elif choice == '2':
            print("Next word suggestions:", ngram.predict(query))
        elif choice == '3':
            print("Relevant completions:", tfidf.suggest(query))
        elif choice == '4':
            print("Generated completion:", gpt2.generate(query))
        else:
            print("Invalid choice. Try again.")

if __name__ == '__main__':
    main()

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Device set to use cpu



Choose Autocompletion Method:
1. Trie
2. N-gram
3. TF-IDF
4. GPT-2

Enter method (1-4) or 'q' to quit: 1
Enter your query: he
Suggestions: ['hello', 'helicopter']
Enter method (1-4) or 'q' to quit: 2
Enter your query: hello
Next word suggestions: []
Enter method (1-4) or 'q' to quit: 3
Enter your query: helicopter
Relevant completions: ['helicopter is flying']
Enter method (1-4) or 'q' to quit: 4
Enter your query: helicopter


Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Generated completion: helicopteras. A few people suggested that the use of moths was an additional way to support food. The larvae were raised and were fed
Enter method (1-4) or 'q' to quit: q
