In [1]:
# Importing required libraries
from transformers import GPT2LMHeadModel, GPT2Tokenizer
from deep_translator import GoogleTranslator


  torch.utils._pytree._register_pytree_node(


In [2]:
def load_gpt2_model():
    """
    Load the GPT-2 model and tokenizer for language generation.
    """
    print("Loading GPT-2 model (this might take a moment)...")
    tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
    model = GPT2LMHeadModel.from_pretrained("gpt2")
    model.eval()
    print("GPT-2 model loaded!\n")
    return tokenizer, model


In [3]:
def display_supported_languages():
    """
    Displays the supported languages for translation.
    """
    language_list = [
        "english", "tamil", "kannada", "hindi", "telugu", "french",
        "german", "spanish", "italian", "russian", "japanese", "korean", "chinese"
    ]
    print("🌐 Supported Languages:")
    print(", ".join(language_list))
    return language_list


In [4]:
def translate_text(source_text, source_lang, target_lang):
    """
    Translate source text to English if needed and return the translated text.
    Also translate back to target language after generating the predicted text.
    """
    if source_lang != "english":
        text_in_english = GoogleTranslator(source=source_lang, target='english').translate(source_text)
        print(f"🔁 Translated to English: {text_in_english}")
    else:
        text_in_english = source_text
    return text_in_english


In [5]:
def predict_next_words(text_in_english, tokenizer, model):
    """
    Use GPT-2 to predict the next words based on the given text.
    """
    input_ids = tokenizer.encode(text_in_english, return_tensors='pt')
    output = model.generate(input_ids, max_length=input_ids.shape[1] + 5, do_sample=True)
    predicted_text = tokenizer.decode(output[0], skip_special_tokens=True)
    print(f"🧠 Next word prediction: {predicted_text}")
    return predicted_text


In [6]:
def translate_back_to_target_language(predicted_text, target_lang):
    """
    Translate the GPT-2 predicted text back to the target language if it's not English.
    """
    if target_lang != "english":
        translated_output = GoogleTranslator(source='english', target=target_lang).translate(predicted_text)
        print(f"🌍 Final Output in {target_lang.title()}: {translated_output}")
    else:
        print(f"🌍 Final Output in English: {predicted_text}")


In [7]:
def validate_languages(source_lang, target_lang, language_list):
    """
    Validates whether the entered source and target languages are in the supported languages list.
    """
    if source_lang not in language_list:
        print(f"❗ Invalid source language '{source_lang}', defaulting to English.")
        source_lang = "english"
    if target_lang not in language_list:
        print(f"❗ Invalid target language '{target_lang}', defaulting to English.")
        target_lang = "english"
    return source_lang, target_lang


In [8]:
def main():
    # Load the model and tokenizer once
    tokenizer, model = load_gpt2_model()

    # Display the supported languages
    language_list = display_supported_languages()

    # Get user input
    source_text = input("\n📝 Enter a sentence: ")
    source_lang = input("🔠 Enter source language: ").strip().lower()
    target_lang = input("🌍 Enter target language: ").strip().lower()

    # Validate the languages
    source_lang, target_lang = validate_languages(source_lang, target_lang, language_list)

    # Translate the source text to English if necessary
    text_in_english = translate_text(source_text, source_lang, target_lang)

    # Predict the next words using GPT-2
    predicted_text = predict_next_words(text_in_english, tokenizer, model)

    # Translate the predicted text back to the target language
    translate_back_to_target_language(predicted_text, target_lang)


In [9]:
# Run the main function to execute the entire process
if __name__ == "__main__":
    main()


Loading GPT-2 model (this might take a moment)...
GPT-2 model loaded!

🌐 Supported Languages:
english, tamil, kannada, hindi, telugu, french, german, spanish, italian, russian, japanese, korean, chinese

📝 Enter a sentence: My name is Diya
🔠 Enter source language: English
🌍 Enter target language: Kannada


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


🧠 Next word prediction: My name is Diyaev, and I'm
🌍 Final Output in Kannada: ನನ್ನ ಹೆಸರು ದಿಯಾವ್, ಮತ್ತು ನಾನು
