## Language Translation

In [2]:
!pip install pandas
!pip install numpy
!pip install ibm_watson
!pip install PyPDF2
!pip install python-dotenv



In [3]:
import pandas as pd
import numpy as np
# coding=utf-8
import json
import os

In [4]:
from ibm_watson import LanguageTranslatorV3
from ibm_cloud_sdk_core.authenticators import IAMAuthenticator

In [5]:
from dotenv import load_dotenv
load_dotenv('.env')

True

### Connecting to IBM Watson API and authenticating it using key and service url provided

In [6]:
IAM_KEY = os.getenv('IAM_KEY')
SERVICE_URL = os.getenv('SERVICE_URL')

In [7]:
authenticator = IAMAuthenticator(IAM_KEY)
language_translator = LanguageTranslatorV3(
    version='2018-05-01',
    authenticator=authenticator)
language_translator.set_service_url(SERVICE_URL)



            On 10 June 2023, IBM announced the deprecation of the Natural Language Translator service.
            The service will no longer be available from 8 August 2022. As of 10 June 2024, the service will reach its End of Support
            date. As of 10 December 2024, the service will be withdrawn entirely and will no longer be
            available to any customers.
            


### Sample translation to Tamil Language

In [8]:
translation = language_translator.translate(
    text='Hello, how are you today?',
    model_id='en-ta').get_result()
print(json.dumps(translation, indent=2, ensure_ascii=False))

{
  "translations": [
    {
      "translation": "ஹலோ, நீங்கள் எப்படி இன்று?"
    }
  ],
  "word_count": 8,
  "character_count": 25
}


### Let's see what all languages does the IBM watson have provision for

In [9]:
# List identifiable languages
languages = language_translator.list_identifiable_languages().get_result()
print(json.dumps(languages, indent=2))

{
  "languages": [
    {
      "language": "af",
      "name": "Afrikaans"
    },
    {
      "language": "ar",
      "name": "Arabic"
    },
    {
      "language": "az",
      "name": "Azerbaijani"
    },
    {
      "language": "ba",
      "name": "Bashkir"
    },
    {
      "language": "be",
      "name": "Belarusian"
    },
    {
      "language": "bg",
      "name": "Bulgarian"
    },
    {
      "language": "bn",
      "name": "Bengali"
    },
    {
      "language": "ca",
      "name": "Catalan"
    },
    {
      "language": "cs",
      "name": "Czech"
    },
    {
      "language": "cv",
      "name": "Chuvash"
    },
    {
      "language": "cy",
      "name": "Welsh"
    },
    {
      "language": "da",
      "name": "Danish"
    },
    {
      "language": "de",
      "name": "German"
    },
    {
      "language": "el",
      "name": "Greek"
    },
    {
      "language": "en",
      "name": "English"
    },
    {
      "language": "eo",
      "name": "Esperanto"
    },
 

### Let's identify the language of a provided text
Here it will provide the confidence of the text belonging to a language

In [10]:
# # Identify
language = language_translator.identify("J'm appelle Indu").get_result()
print(json.dumps(language, indent=2))

{
  "languages": [
    {
      "language": "fr",
      "confidence": 0.9508838920268827
    },
    {
      "language": "it",
      "confidence": 0.02656904610038754
    },
    {
      "language": "en",
      "confidence": 0.0055847794477802745
    },
    {
      "language": "da",
      "confidence": 0.0054322701023351355
    },
    {
      "language": "tr",
      "confidence": 0.002931324186508045
    },
    {
      "language": "mt",
      "confidence": 0.0025007951265433852
    },
    {
      "language": "nl",
      "confidence": 0.0011342700788339716
    },
    {
      "language": "nb",
      "confidence": 0.0008454521579163408
    },
    {
      "language": "et",
      "confidence": 0.0006524521733303212
    },
    {
      "language": "de",
      "confidence": 0.000561793541177038
    },
    {
      "language": "af",
      "confidence": 0.0004298977668060697
    },
    {
      "language": "nn",
      "confidence": 0.0003763147802362794
    },
    {
      "language": "ro",
      "con

### List of language translation models available

In [11]:
# # List models
models = language_translator.list_models(
     source='en').get_result()
print(json.dumps(models, indent=2))

{
  "models": [
    {
      "model_id": "en-ar",
      "source": "en",
      "target": "ar",
      "base_model_id": "",
      "domain": "general",
      "customizable": true,
      "default_model": true,
      "owner": "",
      "status": "available",
      "name": "en-ar",
      "training_log": null
    },
    {
      "model_id": "en-bg",
      "source": "en",
      "target": "bg",
      "base_model_id": "",
      "domain": "general",
      "customizable": true,
      "default_model": true,
      "owner": "",
      "status": "available",
      "name": "en-bg",
      "training_log": null
    },
    {
      "model_id": "en-bn",
      "source": "en",
      "target": "bn",
      "base_model_id": "",
      "domain": "general",
      "customizable": true,
      "default_model": true,
      "owner": "",
      "status": "available",
      "name": "en-bn",
      "training_log": null
    },
    {
      "model_id": "en-bs",
      "source": "en",
      "target": "bs",
      "base_model_id": "",
 

In [17]:
# # Create model
# with open('glossary.tmx', 'rb') as glossary:
#     response = language_translator.create_model(
#         base_model_id='en-es',
#         name='custom-english-to-spanish',
#         forced_glossary=glossary).get_result()
#     print(json.dumps(response, indent=2))

# # Delete model
# response = language_translator.delete_model(model_id='<YOUR MODEL ID>').get_result()
# print(json.dumps(response, indent=2))

# # Get model details
# model = language_translator.get_model(model_id='<YOUR MODEL ID>').get_result()
# print(json.dumps(model, indent=2))

## Translation Lab for an art piece description

In [12]:
#### Document Translation ####
# List Documents that have been submitted for translation
result = language_translator.list_documents().get_result()
print(json.dumps(result, indent=2))

{
  "documents": [
    {
      "document_id": "570bcb90-6e63-4b32-b0fa-3cb114fbd652",
      "filename": "_content_Hollis_Sigler.pdf",
      "model_id": "en-te",
      "source": "en",
      "target": "te",
      "status": "available",
      "created": "2024-09-06T22:56:35Z",
      "completed": "2024-09-06T22:56:39Z",
      "word_count": 72,
      "character_count": 455
    },
    {
      "document_id": "b196d73e-22b0-47b0-ae30-70036ee9434f",
      "filename": "_content_Hollis_Sigler.pdf",
      "model_id": "en-es",
      "source": "en",
      "target": "es",
      "status": "available",
      "created": "2024-09-06T22:58:03Z",
      "completed": "2024-09-06T22:58:05Z",
      "word_count": 72,
      "character_count": 455
    },
    {
      "document_id": "f39b0389-1494-4d4b-a43f-7865a06fc483",
      "filename": "._constitution.pdf",
      "model_id": "en-ta",
      "source": "en",
      "target": "ta",
      "status": "available",
      "created": "2024-09-09T17:03:23Z",
      "complete

### Translate the document provided to FRENCH

In [19]:
# Translate Document
SOURCE_FILE = './input_docs/art1-description.pdf'
#Translating to French
with open(SOURCE_FILE, 'rb') as file:
    result = language_translator.translate_document(
        file=file,
        file_content_type='application/pdf',
        filename=SOURCE_FILE,
        model_id='en-fr').get_result()
    print(json.dumps(result, indent=2))

{
  "document_id": "f63010e0-170f-4d2a-89d1-82d913357162",
  "filename": "._input_docs_art1-description.pdf",
  "model_id": "en-fr",
  "source": "en",
  "target": "fr",
  "status": "processing",
  "created": "2024-09-09T20:20:58Z"
}


In [20]:
# Document Status Check
DOCUMENT_ID = result['document_id']
result = language_translator.get_document_status(
    document_id=DOCUMENT_ID).get_result()
print(json.dumps(result, indent=2))

{
  "document_id": "f63010e0-170f-4d2a-89d1-82d913357162",
  "filename": "._input_docs_art1-description.pdf",
  "model_id": "en-fr",
  "source": "en",
  "target": "fr",
  "status": "available",
  "created": "2024-09-09T20:20:58Z",
  "completed": "2024-09-09T20:21:00Z",
  "word_count": 172,
  "character_count": 1014
}


In [21]:
# Translated Document
with open('translated.pdf', 'wb') as f:
    result = language_translator.get_translated_document(
        document_id=DOCUMENT_ID,
        accept='application/pdf').get_result()
    f.write(result.content)

In [22]:
# Delete Document
language_translator.delete_document(document_id=DOCUMENT_ID)

<ibm_cloud_sdk_core.detailed_response.DetailedResponse at 0x113d02750>

### Translate the document provided to TAMIL

In [23]:
#Translating to Tamil
with open(SOURCE_FILE, 'rb') as file:
    result = language_translator.translate_document(
        file=file,
        file_content_type='application/pdf',
        filename=SOURCE_FILE,
        model_id='en-ta').get_result()
    print(json.dumps(result, indent=2))

{
  "document_id": "342c6a43-53d4-4a35-a92b-55b6f6569bb9",
  "filename": "._input_docs_art1-description.pdf",
  "model_id": "en-ta",
  "source": "en",
  "target": "ta",
  "status": "processing",
  "created": "2024-09-09T20:22:21Z"
}


In [25]:
# Document Status Check
DOCUMENT_ID = result['document_id']
result = language_translator.get_document_status(
    document_id=DOCUMENT_ID).get_result()
print(json.dumps(result, indent=2))

{
  "document_id": "342c6a43-53d4-4a35-a92b-55b6f6569bb9",
  "filename": "._input_docs_art1-description.pdf",
  "model_id": "en-ta",
  "source": "en",
  "target": "ta",
  "status": "available",
  "created": "2024-09-09T20:22:21Z",
  "completed": "2024-09-09T20:22:27Z",
  "word_count": 172,
  "character_count": 1014
}


In [26]:
# Translated Document
with open('translated_tamil.pdf', 'wb') as f:
    result = language_translator.get_translated_document(
        document_id=DOCUMENT_ID,
        accept='application/pdf').get_result()
    f.write(result.content)

References:
1. https://watson-developer-cloud.github.io/swift-sdk/services/LanguageTranslatorV3/Classes/LanguageTranslator.html#/s:20LanguageTranslatorV30aB0C13listDocuments7headers17completionHandlerySDyS2SGSg_y15IBMSwiftSDKCore12RestResponseVyAA12DocumentListVGSg_AI0K5ErrorOSgtctF



### The above code didn't quite translate the entire text. Let's see if this is because of the PDF format. We will send the plain text to translate.

In [32]:
# Translate text
text = '''Joan Snyder. 
Born 1940, Highland Park, New Jersey, currently resides in New York. 
Things Have Tears and We Know Suffering. 
1983-1984. Woodcut with oil paint on paper. Gift of Polly and Mark Addison to the Polly and Mark Addison. 
Collection, 91.04.75. In her woodcuts, Snyder layers complex surfaces, symbols, and text to express emotion. 
In the bottom right corner, a figure suckles an elongated breast dotted with patches of grey and purple flesh. 
Just above, Snyder chiseled a stick figure, an icon she used to symbolize the suffering of children. 
Marring the surface of the wood block, gouged lines create the sensation of the print itself shedding tears. 
Carved between the linework is a phrase from Ancient Roman poet Virgil's Aeneid that roughly translates to the title of this image: 
"things have tears and we know suffering." 
Through these layers, Snyder creates a narrative of childhood pain. She suggests the passing of wounds from parents to children, 
communicating a story of generational trauma, memory, and loss.'''

try:
    translation_result = language_translator.translate(
        text=text,
        model_id='en-ta'  # Ensure this is the correct model ID
    ).get_result()

    # Extract the translation only
    translation = translation_result['translations'][0]['translation']

    # Write the translation to a file
    with open('translation_output_tamil.txt', 'w', encoding='utf-8') as file:
        file.write(translation)
        print("Translation written to 'translation_output_tamil.txt'")

except Exception as e:
    print(f"Error during translation: {e}")


Translation written to 'translation_output_tamil.txt'
