<a href="https://colab.research.google.com/github/Rajmanandhar206/SHADOW_FOX/blob/main/Autocorrection_Keyboard.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
!pip install pyspellchecker

Collecting pyspellchecker
  Downloading pyspellchecker-0.8.2-py3-none-any.whl.metadata (9.4 kB)
Downloading pyspellchecker-0.8.2-py3-none-any.whl (7.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.1/7.1 MB[0m [31m46.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pyspellchecker
Successfully installed pyspellchecker-0.8.2


In [4]:
import re
from collections import defaultdict, Counter
from spellchecker import SpellChecker
import math

In [5]:
class AutocorrectKeyboard:
    def __init__(self, ngram_n=3):
        self.ngram_n = ngram_n  # Using trigrams by default
        self.ngrams = defaultdict(Counter)
        self.vocab = set()
        self.spell = SpellChecker()
        self.total_words = 0

    def train(self, text_corpus):
        """Train the n-gram model on a text corpus"""
        # Preprocess the text
        sentences = self._preprocess_text(text_corpus)

        # Build n-grams
        for sentence in sentences:
            words = sentence.split()
            self.total_words += len(words)
            self.vocab.update(words)

            # Create n-grams of all orders up to ngram_n
            for n in range(1, self.ngram_n + 1):
                for i in range(len(words) - n + 1):
                    context = tuple(words[i:i+n-1])
                    next_word = words[i+n-1]
                    self.ngrams[context][next_word] += 1

    def _preprocess_text(self, text):
        """Clean and split text into sentences"""
        # Convert to lowercase
        text = text.lower()
        # Remove special characters except basic punctuation
        text = re.sub(r"[^a-zA-Z0-9\.,!?']", " ", text)
        # Split into sentences (very simple split)
        sentences = re.split(r"[.!?]", text)
        # Remove empty sentences and strip whitespace
        sentences = [s.strip() for s in sentences if s.strip()]
        return sentences


In [6]:
 def get_candidates(self, word):
        """Get spelling correction candidates for a word"""
        # Known word - no correction needed
        if word in self.vocab:
            return [word]

        # Get possible corrections
        candidates = self.spell.candidates(word)
        # Filter to only words in our vocabulary
        vocab_candidates = [w for w in candidates if w in self.vocab] if candidates else []

        # If no candidates found in vocab, try similar words
        if not vocab_candidates:
            similar_words = self.spell.correction(word)
            if similar_words and similar_words in self.vocab:
                vocab_candidates = [similar_words]

        # If still no candidates, return the original word
        return vocab_candidates if vocab_candidates else [word]


In [7]:
def predict_next_word(self, previous_words, num_suggestions=3):
        """Predict the next word given previous words"""
        if not previous_words:
            return []

        # Get the appropriate context (last n-1 words)
        context = tuple(previous_words[-(self.ngram_n-1):]) if len(previous_words) >= self.ngram_n-1 else tuple(previous_words)

        # Get possible next words and their counts
        next_words = self.ngrams.get(context, Counter())

        # Calculate probabilities (with smoothing)
        total = sum(next_words.values()) + len(self.vocab)  # Add-one smoothing
        suggestions = []

        for word, count in next_words.most_common(num_suggestions):
            prob = (count + 1) / total
            suggestions.append((word, prob))

        # If we don't have enough suggestions, back off to lower-order n-grams
        if len(suggestions) < num_suggestions and len(context) > 1:
            backoff_suggestions = self.predict_next_word(previous_words[1:], num_suggestions - len(suggestions))
            suggestions.extend(backoff_suggestions)

        # Remove duplicates and sort by probability
        unique_suggestions = {}
        for word, prob in suggestions:
            if word not in unique_suggestions or prob > unique_suggestions[word]:
                unique_suggestions[word] = prob

        # Sort by probability and return top suggestions
        sorted_suggestions = sorted(unique_suggestions.items(), key=lambda x: x[1], reverse=True)
        return [word for word, prob in sorted_suggestions[:num_suggestions]]


In [8]:
def process_input(self, text):
        """Process user input with autocorrect and predictions"""
        words = text.lower().split()
        if not words:
            return {"corrected_text": "", "next_word_predictions": []}

        # Autocorrect the last word
        last_word = words[-1]
        corrected_last_word = self.get_candidates(last_word)[0]

        corrected_words = words[:-1] + [corrected_last_word]
        corrected_text = " ".join(corrected_words)

        # Get next word predictions
        predictions = self.predict_next_word(corrected_words)

        return {
            "corrected_text": corrected_text,
            "next_word_predictions": predictions,
            "current_word_correction": corrected_last_word if corrected_last_word != last_word else None
        }


In [16]:
import re
from collections import defaultdict, Counter

class AutocorrectKeyboard:
    def __init__(self, ngram_n=2):
        self.ngram_n = ngram_n
        self.ngrams = defaultdict(Counter)
        self.vocab = set()
        self.word_counts = Counter()

    def train(self, text_corpus):
        sentences = self._preprocess_text(text_corpus)

        for sentence in sentences:
            words = sentence.split()
            self.vocab.update(words)
            self.word_counts.update(words)

            for i in range(len(words) - self.ngram_n + 1):
                context = tuple(words[i:i+self.ngram_n-1])
                next_word = words[i+self.ngram_n-1]
                self.ngrams[context][next_word] += 1

    def _preprocess_text(self, text):
        text = text.lower()
        text = re.sub(r"[^a-z0-9\s]", "", text)
        sentences = re.split(r"[.!?]", text)
        return [s.strip() for s in sentences if s.strip()]

    def _get_edit_distance(self, word1, word2):
        if word1 == word2:
            return 0
        if len(word1) < len(word2):
            return self._get_edit_distance(word2, word1)
        if len(word2) == 0:
            return len(word1)

        previous_row = range(len(word2) + 1)
        for i, c1 in enumerate(word1):
            current_row = [i + 1]
            for j, c2 in enumerate(word2):
                insertions = previous_row[j + 1] + 1
                deletions = current_row[j] + 1
                substitutions = previous_row[j] + (c1 != c2)
                current_row.append(min(insertions, deletions, substitutions))
            previous_row = current_row

        return previous_row[-1]

    def get_correction(self, word):
        if word in self.vocab:
            return word

        closest_word = min(
            self.vocab,
            key=lambda vocab_word: (
                self._get_edit_distance(word, vocab_word),
                -self.word_counts[vocab_word]
            )
        )

        if self._get_edit_distance(word, closest_word) <= 2:
            return closest_word
        return word

    def predict_next_word(self, previous_words, num_suggestions=3):
        if not previous_words:
            return []

        context = tuple(previous_words[-(self.ngram_n-1):])
        possible_words = self.ngrams.get(context, Counter())

        suggestions = [word for word, count in possible_words.most_common(num_suggestions)]

        if len(suggestions) < num_suggestions and len(context) > 0:
            shorter_context = context[1:] if len(context) > 1 else tuple()
            backoff_suggestions = self.predict_next_word(list(shorter_context),
                                                       num_suggestions - len(suggestions))
            suggestions.extend(backoff_suggestions)

        return suggestions[:num_suggestions]

    def process_input(self, text):
        words = text.lower().split()
        if not words:
            return {
                "corrected_text": "",
                "next_word_predictions": [],
                "current_word_correction": None
            }

        last_word = words[-1]
        corrected_last_word = self.get_correction(last_word)

        corrected_words = words[:-1] + [corrected_last_word]
        corrected_text = " ".join(corrected_words)

        predictions = self.predict_next_word(corrected_words)

        return {
            "corrected_text": corrected_text,
            "next_word_predictions": predictions,
            "current_word_correction": corrected_last_word if corrected_last_word != last_word else None
        }

# Example usage with interactive input
if __name__ == "__main__":
    # Training corpus
    corpus = """
    the quick brown fox jumps over the lazy dog
    the quick fox is very fast the lazy dog sleeps all day
    a quick brown dog can jump over fences the lazy fox watches
    the quick and the lazy the quick brown fox is faster than the lazy dog
    hello world this is a test of the autocorrect system
    """

    keyboard = AutocorrectKeyboard(ngram_n=3)
    keyboard.train(corpus)

    print("Autocorrect Keyboard System")
    print("Type a sentence and press Enter (or 'quit' to exit)")

    while True:
        user_input = input("\nYour input: ").strip()
        if user_input.lower() in ['quit', 'exit']:
            break

        result = keyboard.process_input(user_input)

        print("\nResults:")
        if result['current_word_correction']:
            last_word = user_input.split()[-1]
            print(f"Corrected '{last_word}' to '{result['current_word_correction']}'")

        print(f"Full corrected text: {result['corrected_text']}")
        print("Next word suggestions:", ", ".join(result['next_word_predictions']))

Autocorrect Keyboard System
Type a sentence and press Enter (or 'quit' to exit)

Your input: don

Results:
Corrected 'don' to 'dog'
Full corrected text: dog
Next word suggestions: 

Your input: sleop

Results:
Corrected 'sleop' to 'sleeps'
Full corrected text: sleeps
Next word suggestions: 


KeyboardInterrupt: Interrupted by user