In [34]:
from googletrans import Translator
import asyncio
import pandas as pd
import re

In [35]:
def remove_string_up_to_given_sign(text, sign):
    """
    Remove translated context from text.

    Arguments:
        text(string): A string in which translated context need to be removed
        sign: Seperation sign between context and pure text\

    Return:
        text_cleared(string) Cleared text without context
        """
    text_cleared = re.sub(r'^.*?: ', '', text)
    return text_cleared
    

In [36]:
async def translate_to_given_language(text, destination_language):
    """ 
    Translate the given text to the specified destination language and clear the context from translated text. 
    Arguments: 
        text (str): The text to translate. 
        dest_language (str): The language code for the destination language (e.g., 'es' for Spanish, 'fr' for French). 
    Returns: 
        str: The translated text with context removed. 
    """
    translator = Translator()
    translation = await translator.translate(text, dest=destination_language)
    cleared_translated_text = remove_string_up_to_given_sign(translation.text, ':')
    return cleared_translated_text

In [37]:
async def translate_dataset_with_context(dataset, context, destination_language): 
    """ Translate a dataset of text with context included to the specified destination language. 
    Arguments: 
        dataset (list): A list of text to translate. 
        context (string): A context to help translator with catch up context of translated dataset
        destination_language (str): The language code for the destination language (e.g., 'pl' for Polish). 
        
    Returns: list: A list of translated text. 
    """ 
    
    tasks = [translate_to_given_language(f'{context} {text}', destination_language) for text in dataset] 
    translated_dataset = await asyncio.gather(*tasks) 
    
    return translated_dataset

In [38]:
# File name to read
file_name_read = 'AlarmTranslation.csv'

# File name to save
file_name_save = 'AlarmTranslation_pl.txt'

In [39]:
# Load data to dataset
df = pd.read_csv(file_name_read, sep=';',  encoding='utf-8')

In [40]:
df.head(5)

Unnamed: 0,AlarmCode,AlarmDescription(it),AlarmDescription(en),AlarmDescription(Pl),Instructions(it),Instructions(en),Instructions(Pl),ZoneDescription(it),ZoneDescription(en),ZoneDescription(Pl),VariabileAllarme,Gravita,CodicePosizione,CodiceCategoria
0,0000_00,Carro 1 allarme conteggio salita-discesa,Hoist 1 Up-Down counter error,,Controlla Finecorsa Carro Alto - Basso - Conte...,Check hoist up down and counter limit switch,,Carro 1,Hoist 1,,All_0000_00,3.0,,Carro
1,0000_01,Carro 1 manca finecorsa alto o basso in trasla...,Hoist 1 translation up or down limit switch mi...,,Controlla finecorsa Carro Alto - Basso,Check hoist up down limit switch,,Carro 1,Hoist 1,,All_0000_01,3.0,,Carro
2,0000_02,Carro 1 collisione avanti,Hoist 1 collision forward,,Controlla finecorsa collisione - sistema conte...,Check collision limit switch- position countin...,,Carro 1,Hoist 1,,All_0000_02,3.0,,Carro
3,0000_03,Carro 1 extracorsa indietro,Hoist 1 collision backward,,Controlla finecorsa collisione - sistema conte...,Check collision limit switch- position countin...,,Carro 1,Hoist 1,,All_0000_03,3.0,,Carro
4,0000_04,Carro 1 allarme inverter,Hoist 1 inverter error,,Controlla inverter movimentazione carro,Check hoist movimentation inverter,,Carro 1,Hoist 1,,All_0000_04,3.0,,Carro


In [41]:
df.columns

Index(['AlarmCode', 'AlarmDescription(it)', 'AlarmDescription(en)',
       'AlarmDescription(Pl)', 'Instructions(it)', 'Instructions(en)',
       'Instructions(Pl)', 'ZoneDescription(it)', 'ZoneDescription(en)',
       'ZoneDescription(Pl)', 'VariabileAllarme', 'Gravita', 'CodicePosizione',
       'CodiceCategoria'],
      dtype='object')

In [42]:
extracted_df = df['ZoneDescription(en)']
extracted_df.head(10)

0    Hoist 1
1    Hoist 1
2    Hoist 1
3    Hoist 1
4    Hoist 1
5    Hoist 1
6    Hoist 1
7    Hoist 1
8    Hoist 1
9    Hoist 1
Name: ZoneDescription(en), dtype: object

In [43]:
df.columns

Index(['AlarmCode', 'AlarmDescription(it)', 'AlarmDescription(en)',
       'AlarmDescription(Pl)', 'Instructions(it)', 'Instructions(en)',
       'Instructions(Pl)', 'ZoneDescription(it)', 'ZoneDescription(en)',
       'ZoneDescription(Pl)', 'VariabileAllarme', 'Gravita', 'CodicePosizione',
       'CodiceCategoria'],
      dtype='object')

In [44]:
# Context to inject
context_inject = "translate plc alarm:"

In [45]:
batch_size = 5
max_file = len(extracted_df)
max_file = 9
counter = 0
column_translated = []
for i in range(0, max_file, batch_size):
    batch_df = extracted_df[i:i + batch_size]
    batch_column_translated = await translate_dataset_with_context(batch_df,context_inject, 'pl')
    column_translated.extend(batch_column_translated)  # Use extend instead of append
    counter = counter + batch_size
    print(f"translated rows: {counter}")

#for text in column_translated:
    #print(text)

translated rows: 5
translated rows: 10


In [46]:
df_temp = pd.DataFrame(column_translated, columns=['ZoneDescription(Pl)'])
df_temp.head(10)

Unnamed: 0,ZoneDescription(Pl)
0,wciągnik 1
1,wciągnik 1
2,wciągnik 1
3,wciągnik 1
4,wciągnik 1
5,wciągnik 1
6,wciągnik 1
7,wciągnik 1
8,wciągnik 1
9,wciągnik 1


In [47]:
df_temp.to_csv("df_temp3.csv", sep=';', index=False)

In [48]:
# Now column_translated should match the length of the DataFrame
df['ZoneDescription(Pl)'] = column_translated

ValueError: Length of values (10) does not match length of index (3372)

In [None]:
df.head(10)

In [None]:
# Save dataset to file
df.to_csv(file_name_save, sep=';', index=False, encoding='utf-8')

In [None]:
# Load data to dataset
#df_pl = pd.read_csv("AlarmTranslation_pl.csv", sep=';', encoding='utf-8')
#df_pl.head(10)