In [3]:
pip install transformers sentencepiece requests


Note: you may need to restart the kernel to use updated packages.


In [4]:
pip install sacremoses

Collecting sacremoses
  Downloading sacremoses-0.1.1-py3-none-any.whl (897 kB)
Installing collected packages: sacremoses
Successfully installed sacremoses-0.1.1
Note: you may need to restart the kernel to use updated packages.


To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


In [1]:
from transformers import pipeline

# Load a text generation pipeline
generator = pipeline("text-generation", model="gpt2")

# Define a prompt to guide the generation
prompt = "The importance of education in society cannot be overstated. Education"

# Generate a paragraph of approximately 100 words
output = generator(
    prompt,
    max_length=150,  # Adjust max_length to get close to 100 words
    num_return_sequences=1,
    no_repeat_ngram_size=2,
    truncation=True
)

# Extract the generated text
generated_text = output[0]['generated_text']

# Ensure the paragraph is exactly 100 words
words = generated_text.split()[:100]
paragraph = ' '.join(words)

# Print the result
print("Generated Paragraph (100 words):")
print(paragraph)
print(f"\nTotal words: {len(paragraph.split())}")

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Generated Paragraph (100 words):
The importance of education in society cannot be overstated. Education is the basic part of a family, which is why I love having her. We need a teacher who is passionate in her passion for teaching and who loves all of us. She is part and parcel of every woman's education and every person's mother-in-law's child-care experience. I have always believed in the importance and the pride that women feel and thrive as mothers. It is that sense of pride and joy that makes us proud and make us think about our mothers, her talents and her aspirations. I want

Total words: 100


In [2]:
import requests
from transformers import pipeline
import difflib
# Ensure the text is exactly 100 words
sample_text=paragraph
#sample_text = ' '.join(sample_text.split()[:100])

# Save original text to a file
with open("original_text.txt", "w", encoding="utf-8") as f:
    f.write(sample_text)

# Initialize the first translation model (Helsinki-NLP/opus-mt-en-es)
translator1 = pipeline("translation_en_to_es", model="Helsinki-NLP/opus-mt-en-es")

# Initialize the second translation model (Facebook NLLB-200)
translator2 = pipeline("translation", model="facebook/nllb-200-distilled-600M", tokenizer="facebook/nllb-200-distilled-600M")

# Step 2: Translate using the first model (Helsinki-NLP/opus-mt-en-es)
translated_spanish_1 = translator1(sample_text)[0]['translation_text']

# Save first translated text to a file
with open("translated_text_1.txt", "w", encoding="utf-8") as f:
    f.write(translated_spanish_1)

# Step 3: Translate using the second model (Facebook NLLB-200)
translated_spanish_2 = translator2(sample_text, src_lang="eng_Latn", tgt_lang="spa_Latn")[0]['translation_text']

# Save second translated text to a file
with open("translated_text_2.txt", "w", encoding="utf-8") as f:
    f.write(translated_spanish_2)

# Step 4: Compute differences between the two translated texts
diff = list(difflib.unified_diff(translated_spanish_1.split(), translated_spanish_2.split(), 
                                 fromfile="Helsinki-NLP_Opus", tofile="Facebook_NLLB", lineterm=''))

# Save the difference to a file
with open("difference.txt", "w", encoding="utf-8") as f:
    f.write("\n".join(diff))

# Print confirmation message
print("Translation and comparison completed. Check the output files:")
print("- original_text.txt")
print("- translated_text_1.txt")
print("- translated_text_2.txt")
print("- difference.txt")


Translation and comparison completed. Check the output files:
- original_text.txt
- translated_text_1.txt
- translated_text_2.txt
- difference.txt


In [3]:
diff

['--- Helsinki-NLP_Opus',
 '+++ Facebook_NLLB',
 '@@ -20,15 +20,18 @@',
 ' una',
 ' familia,',
 ' por',
 '-eso',
 '+lo',
 '+que',
 ' me',
 ' encanta',
 ' tenerla.',
 ' Necesitamos',
 ' una',
 ' maestra',
 '+que',
 '+sea',
 ' apasionada',
 '-en',
 '+por',
 ' su',
 ' pasión',
 ' por',
 '@@ -37,11 +40,10 @@',
 ' y',
 ' que',
 ' nos',
 '-ama',
 '+ame',
 ' a',
 ' todos.',
 '-Ella',
 '-es',
 '+Es',
 ' parte',
 ' integral',
 ' de',
 '@@ -86,10 +88,10 @@',
 ' orgullo',
 ' y',
 ' alegría',
 '+el',
 ' que',
 ' nos',
 ' hace',
 '-sentir',
 ' orgullosos',
 ' y',
 ' nos',
 '@@ -103,3 +105,54 @@',
 ' y',
 ' sus',
 ' aspiraciones.',
 '+Quiero',
 '+que',
 '+la',
 '+gente',
 '+se',
 '+sienta',
 '+orgullosa',
 '+de',
 '+ella',
 '+y',
 '+que',
 '+la',
 '+familia',
 '+se',
 '+sienta',
 '+orgullosa',
 '+de',
 '+ella',
 '+y',
 '+que',
 '+la',
 '+familia',
 '+se',
 '+sienta',
 '+feliz',
 '+de',
 '+ella',
 '+y',
 '+que',
 '+la',
 '+familia',
 '+se',
 '+sienta',
 '+feliz',
 '+de',
 '+ella',
 '+y',
 '+que',
 '+