In [None]:
!pip install googletrans

In [145]:
import pandas as pd
from deep_translator import GoogleTranslator

# Define a function to translate based on word type (nature)
def translate_with_nature(french_word, nature):
    if not french_word:
        return "Error: Empty input"

    # Return the word directly based on its nature
    # No additional contextual phrases are added
    text_to_translate = french_word

    try:
        # Translate the word in context
        translation = GoogleTranslator(source='fr', target='en').translate(text_to_translate)
        return translation  # Return the full translation
    except Exception as e:
        print(f"Exception: {str(e)} for word: {french_word}")
        return f"Error: {str(e)}"

# Load the Excel file
input_path = 'C:\\Users\\khomo\\data_tshikama_xls-1 (1).xlsx'
df = pd.read_excel(input_path)

# Check if 'FRANCAIS' and 'NATURE' columns exist
if 'FRANCAIS' in df.columns and 'NATURE' in df.columns:
    # Translate only the first 50 rows of the specified columns considering their nature
    df.loc[:3001, 'ENGLISH_TEMP'] = df.loc[:3001].apply(
        lambda row: translate_with_nature(row['FRANCAIS'], row['NATURE']), axis=1
    )
    
    # Insert the 'ENGLISH' column next to the 'FRANCAIS' column
    col_index = df.columns.get_loc('FRANCAIS')
    df.insert(col_index + 1, 'ENGLISH', df.pop('ENGLISH_TEMP'))
else:
    print("Error: 'FRANCAIS' or 'NATURE' column not found in DataFrame")

# Save the updated DataFrame to a new Excel file (only for the test)
output_path = 'C:\\Users\\khomo\\lexicon_translation_test.xlsx'
df.to_excel(output_path, index=False)

print(f"Translation completed and saved to {output_path}")


Exception: nan --> text must be a valid text with maximum 5000 character,otherwise it cannot be translated for word: nan
Translation completed and saved to C:\Users\khomo\lexicon_translation_test.xlsx


In [151]:
import pandas as pd
from deep_translator import GoogleTranslator

# Define a function to translate from English to other languages
def translate_english_to_other_languages(english_word, target_language):
    if not english_word:
        return "Error: Empty input"
    try:
        translation = GoogleTranslator(source='en', target=target_language).translate(english_word)
        return translation  # Return the full translation to the target language
    except Exception as e:
        print(f"Exception: {str(e)} for word: {english_word}")
        return f"Error: {str(e)}"

# Load the Excel file containing English translations
input_path = 'C:\\Users\\khomo\\lexicon_translation_test.xlsx'
df = pd.read_excel(input_path)

# Check if the 'ENGLISH' column exists
if 'ENGLISH' in df.columns:
    # Specify target languages for translation
    target_languages = {
        'Sesotho': 'st',  # Sesotho
        'Sepedi': 'nso',   # Sepedi
        'Afrikaans': 'af',  # Afrikaans
        'Xhosa': 'xh',     # Xhosa
        'Zulu': 'zu'       # Zulu
    }

    # Translate English to other languages and add as new columns
    for lang_name, lang_code in target_languages.items():
        print(f"Translating to {lang_name}...")
        df[lang_name] = df['ENGLISH'].apply(lambda x: translate_english_to_other_languages(x, lang_code))
        print(f"Translation to {lang_name} completed.")

else:
    print("Error: 'ENGLISH' column not found in DataFrame")

# Save the updated DataFrame to a new Excel file
output_path = 'C:\\Users\\khomo\\lexicon_all_languages.xlsx'
df.to_excel(output_path, index=False)

print(f"All translations completed and saved to {output_path}")


Translating to Sesotho...
Translation to Sesotho completed.
Translating to Sepedi...
Exception: ('Connection aborted.', ConnectionResetError(10054, 'An existing connection was forcibly closed by the remote host', None, 10054, None)) for word: Herbs
Exception: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')) for word: repeated
Translation to Sepedi completed.
Translating to Afrikaans...
Translation to Afrikaans completed.
Translating to Xhosa...
Translation to Xhosa completed.
Translating to Zulu...
Exception: Request exception can happen due to an api connection error. Please check your connection and try again for word: leave
Translation to Zulu completed.
All translations completed and saved to C:\Users\khomo\lexicon_all_languages.xlsx


In [197]:
import pandas as pd
import random

# Load the previously translated DataFrame
input_path = 'C:\\Users\\khomo\\lexicon_all_languages.xlsx'
df = pd.read_excel(input_path)

# Separate words by sentiment
positive_words = df[df['SENTIMENT'] == 'Positif']['ENGLISH'].dropna().tolist()
negative_words = df[df['SENTIMENT'] == 'Negatif']['ENGLISH'].dropna().tolist()
neutral_words = df[df['SENTIMENT'] == 'Neutre']['ENGLISH'].dropna().tolist()

# Set the number of sentences to generate for each sentiment
num_sentences_per_sentiment = 10
sentences = []

# Function to generate sentences for a given sentiment
def generate_sentences(word_list, sentiment_label):
    for _ in range(num_sentences_per_sentiment):
        # Randomly select sentence length between 2 and 10
        sentence_length = random.randint(2, 10)

        # Check if there are enough words in the list to sample
        if len(word_list) >= sentence_length:
            sentence = ' '.join(random.sample(word_list, sentence_length))
        else:
            # If not enough words, use as many as possible
            sentence = ' '.join(word_list)

        # Capitalize the first word and add a period at the end
        sentences.append(f"{sentence.capitalize()}. (Sentiment: {sentiment_label})")

# Generate sentences for each sentiment type
if positive_words:
    generate_sentences(positive_words, 'Positif')
else:
    print("No positive words found.")

if negative_words:
    generate_sentences(negative_words, 'Negatif')
else:
    print("No negative words found.")

if neutral_words:
    generate_sentences(neutral_words, 'Neutre')
else:
    print("No neutral words found.")

# Save sentences to a new text file
output_path = 'C:\\Users\\khomo\\generated_sentences.txt'
with open(output_path, 'w') as f:
    for sentence in sentences:
        f.write(sentence + '\n')

print(f"{num_sentences_per_sentiment * 3} sentences generated and saved to {output_path}.")


30 sentences generated and saved to C:\Users\khomo\generated_sentences.txt.


In [199]:
import pandas as pd
import random

# Load the dataset
file_path = 'C:\\Users\\khomo\\lexicon_all_languages.xlsx'  # Update with your file path
df = pd.read_excel(file_path)

# Ensure the DataFrame contains necessary columns
expected_columns = ['CILUBA', 'ENGLISH', 'FRANCAIS', 'Sesotho', 'Sepedi', 'Afrikaans', 'Xhosa', 'Zulu', 'SCORE', 'SENTIMENT']
missing_columns = [col for col in expected_columns if col not in df.columns]

if missing_columns:
    raise ValueError(f"The dataset is missing the following columns: {', '.join(missing_columns)}")

# Create translation lexicon and scoring dictionary from the dataset
translation_lexique = dict(zip(df['ENGLISH'].str.lower(), 
                               df[['CILUBA', 'FRANCAIS', 'Sesotho', 'Sepedi', 'Afrikaans', 'Xhosa', 'Zulu']].fillna('').apply(lambda x: x.tolist(), axis=1)))
score_lexique = dict(zip(df['ENGLISH'].str.lower(), df['SCORE']))  # English to score

def translate_text_using_lexicon(text, translation_lexique):
    words = text.lower().split()  # Split input text into words
    translations = {lang: [] for lang in ['CILUBA', 'FRANCAIS', 'Sesotho', 'Sepedi', 'Afrikaans', 'Xhosa', 'Zulu']}  # Create a dict for translations
    scores = []  # To hold scores for each word
    translated_indices = set()  # Keep track of which words have been translated

    # Start checking from longest possible phrases to the individual words
    for start in range(len(words)):
        if start in translated_indices:  # Skip if the word has already been translated
            continue
        
        for end in range(start + 1, len(words) + 1):
            phrase = ' '.join(words[start:end])  # Create the phrase from words[start:end]
            if phrase in translation_lexique:
                translated_words = translation_lexique[phrase]  # Get the translations for the whole phrase
                for i, lang in enumerate(translations.keys()):
                    translations[lang].append(translated_words[i] if i < len(translated_words) else '')  # Handle missing translations
                scores.append(score_lexique.get(phrase, 0))  # Get score for the whole phrase
                translated_indices.update(range(start, end))  # Mark all involved indices as translated
                break  # Stop checking once we find a translation for this phrase
        else:
            # If no translation was found for any phrases starting from this word, check the single word
            single_word = words[start]
            if start not in translated_indices:  # Check only if not already translated
                translated_words = translation_lexique.get(single_word, [''] * len(translations))  # Get translations for the single word
                for i, lang in enumerate(translations.keys()):
                    translations[lang].append(translated_words[i] if i < len(translated_words) else '')  # Handle missing translations
                scores.append(score_lexique.get(single_word, 0))  # Get score for the word

    return translations, scores  # Return a dict of translated sentences for each language and scores

def analyse_sentiment(translated_words, scores):
    total_score = sum(scores)  # Sum of all scores

    # Determine sentiment based on total score
    if total_score > 0.5:
        sentiment = "Positive"
    elif total_score < -0.5:
        sentiment = "Negative"
    else:
        sentiment = "Neutral"

    word_scores = {word: score for word, score in zip(translated_words, scores)}

    return total_score, sentiment, word_scores  # Return total score, sentiment, and individual word scores

# Generate 50 random sentences
input_path = 'C:\\Users\\khomo\\lexicon_translation_test.xlsx'
df_english = pd.read_excel(input_path)
words = df_english['ENGLISH'].dropna().tolist()
num_sentences = 50
sentences = [' '.join(random.sample(words, random.randint(5, 12))).capitalize() + '.' for _ in range(num_sentences)]

# Process each sentence for translation and sentiment analysis
for i, english_text in enumerate(sentences, 1):
    print(f"Processing sentence {i}: '{english_text}'")
    
    # Translate the text using the lexicon
    translations, scores = translate_text_using_lexicon(english_text, translation_lexique)

    # Create a dictionary to hold sentiment analysis results for each language
    sentiment_results = {}

    # Analyze sentiment for each translated language
    for lang, translated_words in translations.items():
        total_score, sentiment, word_scores = analyse_sentiment(translated_words, scores)
        sentiment_results[lang] = {
            "translated_sentence": ' '.join(translated_words),  # Join translated words into a single string
            "total_score": total_score,
            "sentiment": sentiment,
            "word_scores": word_scores
        }

    # Display results for each language with separation
    print(f"\nOriginal English Sentence: '{english_text}'\n")
    for lang, result in sentiment_results.items():
        print(f"Translated Sentence ({lang}):", result['translated_sentence'])  # Display the joined sentence
        print(f"Total Score ({lang}):", result['total_score'])  # Display the total score
        print(f"Sentiment ({lang}):", result['sentiment'])  # Display the overall sentiment
        print(f"Word Scores ({lang}):", result['word_scores'])  # Display individual word scores
        print("\n" + "-" * 50 + "\n")  # Separator between languages


Processing sentence 1: 'Fog fifty-four space sixty two ask 59shirt plate listen eyelash heine sin transport.'

Original English Sentence: 'Fog fifty-four space sixty two ask 59shirt plate listen eyelash heine sin transport.'

Translated Sentence (CILUBA): ditshima makumiatanu-inayi ditekemena makumiasambombo ibidi kulomba  Dilongo Kunvua Lukofi lukinu Mibi 
Total Score (CILUBA): 16
Sentiment (CILUBA): Positive
Word Scores (CILUBA): {'ditshima': 3, 'makumiatanu-inayi': 0, 'ditekemena': 1, 'makumiasambombo': 0, 'ibidi': 0, 'kulomba': 4, '': 0, 'Dilongo': 1, 'Kunvua': 1, 'Lukofi': 3, 'lukinu': 1, 'Mibi': 2}

--------------------------------------------------

Translated Sentence (FRANCAIS): Brouillard cinquante-quatre esparance soixante deux Demander  Assiette ecouter Cil heine Pécher 
Total Score (FRANCAIS): 16
Sentiment (FRANCAIS): Positive
Word Scores (FRANCAIS): {'Brouillard': 3, 'cinquante-quatre': 0, 'esparance': 1, 'soixante': 0, 'deux': 0, 'Demander': 4, '': 0, 'Assiette': 1, 'eco