In [None]:
!pip install spacy pyspellchecker
!python -m spacy download en_core_web_sm

In [None]:
from spellchecker import SpellChecker

spell = SpellChecker()
spell.word_frequency.load_words(["customWord1", "customWord2"])

In [None]:
!python -m spacy download en_core_web_md
import spacy
nlp = spacy.load("en_core_web_md")

In [None]:
import spacy
from spellchecker import SpellChecker
import torch
from transformers import GPT2Tokenizer, GPT2LMHeadModel
from sklearn.metrics import precision_score, recall_score, f1_score

# Load pre-trained models
nlp = spacy.load("en_core_web_md")
spell = SpellChecker()
model_name = "gpt2"
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
model = GPT2LMHeadModel.from_pretrained(model_name)

# Function for Autocomplete
def autocomplete(text, max_length=10):
    """Predict the next words based on input text using GPT-2."""
    input_ids = tokenizer.encode(text, return_tensors="pt")
    with torch.no_grad():
        outputs = model.generate(input_ids, max_length=max_length, num_return_sequences=1, do_sample=True)

    auto_complete_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return auto_complete_text[len(text):].strip()

# Function for Spellchecking
def spell_check(word):
    """Return the most likely corrected word and suggestions."""
    corrected_word = spell.correction(word)
    suggestions = spell.candidates(word)
    return corrected_word, suggestions if suggestions else set()

# Function to evaluate prediction performance for Autocomplete and Spellchecking
def evaluate_predictions(true_words, predicted_words):
    """Calculate Precision, Recall, and F1 Score."""
    # Ensure both lists have the same length by padding with empty strings
    true_words = true_words[:len(predicted_words)]
    precision = precision_score(true_words, predicted_words, average='micro')
    recall = recall_score(true_words, predicted_words, average='micro')
    f1 = f1_score(true_words, predicted_words, average='micro')
    return precision, recall, f1

# Function to perform Named Entity Recognition (NER) per word
def named_entity_recognition_per_word(text):
    """Identify named entities for each word in the text using SpaCy."""
    doc = nlp(text)
    word_entities = {}
    for token in doc:
        if token.ent_type_ != "":  # Check if the word has an entity label
            word_entities[token.text] = token.ent_type_
        else:
            word_entities[token.text] = "None"
    return word_entities

# NLP Pipeline
def nlp_pipeline(user_input, true_words):
    """Perform Autocomplete (with GPT-2), Spellchecking, and NER."""
    words = user_input.split()

    # Autocomplete suggestions
    gpt2_predictions = autocomplete(user_input)
    formatted_suggestions = [word.strip() for word in gpt2_predictions.split() if word]

    # Spellcheck results
    spellcheck_results = []
    corrected_words = []
    for word in words:
        corrected_word, spell_suggestions = spell_check(word)
        corrected_words.append(corrected_word)
        spellcheck_results.append((word, corrected_word, list(spell_suggestions)))

    # NER results per word
    ner_results = named_entity_recognition_per_word(user_input)

    # Evaluate predictions for spellchecking
    precision, recall, f1 = evaluate_predictions(true_words, corrected_words)

    return {
        "autocomplete_suggestions": formatted_suggestions,
        "spellcheck_results": spellcheck_results,
        "ner_results": ner_results,
        "precision": precision,
        "recall": recall,
        "f1_score": f1
    }

# Main execution
if __name__ == "__main__":
    while True:
        user_input = input("Enter a word or sentence (or type 'exit' to quit): ")

        if user_input.lower() == 'exit':
            print("Exiting the program.")
            break  # This will break the while loop and exit the program

        true_words = user_input.split()

        results = nlp_pipeline(user_input, true_words)

        print("\nResults:")
        print("1. Autocomplete Suggestions:", results['autocomplete_suggestions'])

        print("2. Spellcheck results:")
        for original_word, corrected_word, suggestions in results['spellcheck_results']:
            print(f"   - {original_word}: corrected to '{corrected_word}', suggestions: {suggestions}")

        print("3. Named Entity Recognition (NER) Results:")
        if results['ner_results']:
            for word, entity in results['ner_results'].items():
                print(f"   - {word}: {entity}")
        else:
            print("   - No named entities found.")

        print("4. Evaluation Metrics for Spellchecking:")
        print(f"   - Precision: {results['precision']:.4f}")
        print(f"   - Recall: {results['recall']:.4f}")
        print(f"   - F1 Score: {results['f1_score']:.4f}")
        print("\n") instead of using gpt2 for text suggestion can you generate some random dataset of 1000 features only for auto complete