In [1]:
# Step 1: Install necessary libraries
!pip install googletrans==4.0.0-rc1 langdetect textblob sumy transformers spacy

# Step 2: Download NLTK resources
import nltk
nltk.download('punkt')
nltk.download('stopwords')

# Step 3: Load SpaCy model (you may need to run this on your local machine)
!python -m spacy download en_core_web_sm

# Importing necessary libraries
from googletrans import Translator, LANGUAGES
from langdetect import detect
from textblob import TextBlob
from sumy.parsers.plaintext import PlaintextParser
from sumy.nlp.tokenizers import Tokenizer
from sumy.summarizers.lsa import LsaSummarizer
import spacy

# Initialize the translator
translator = Translator()

# Detect the language of the text
def detect_language(text):
    try:
        return detect(text)
    except Exception as e:
        print(f"Error detecting language: {e}")
        return None

# Perform sentiment analysis using TextBlob
def analyze_sentiment(text):
    analysis = TextBlob(text)
    polarity = analysis.sentiment.polarity
    subjectivity = analysis.sentiment.subjectivity
    return polarity, subjectivity

# Summarize text using Sumy
def summarize_text(text):
    try:
        if len(text.split()) > 50:  # Sumy works better with longer texts
            parser = PlaintextParser.from_string(text, Tokenizer("english"))
            summarizer = LsaSummarizer()
            summary = summarizer(parser.document, 2)  # Summarize to 2 sentences
            return ' '.join(str(sentence) for sentence in summary)
        else:
            print("Text is too short for summarization.")
            return text
    except Exception as e:
        print(f"Error summarizing text: {e}")
        return text

# Translate text into the target language
def translate_text(text, dest_language):
    try:
        translated = translator.translate(text, dest=dest_language)
        return translated.text
    except Exception as e:
        print(f"Error occurred during translation: {e}")
        return None

# Display available languages
def display_languages():
    print("Available languages:")
    for lang_code, lang_name in LANGUAGES.items():
        print(f"{lang_code}: {lang_name}")

# Get user input for text and destination language
def get_user_input(supported_languages):
    text = input("\nEnter the text you want to analyze and translate: ").strip()

    while True:
        dest_lang = input(f"Enter the language code to translate to ({', '.join(supported_languages)}): ").strip()
        if dest_lang in supported_languages:
            return text, dest_lang
        else:
            print("Invalid language code. Please try again.")

# Analyze and translate the text
def process_text(text, dest_lang):
    print("\n--- Analysis Results ---")

    # Language detection
    detected_lang = detect_language(text)
    if detected_lang:
        print(f"Detected Language: {LANGUAGES.get(detected_lang, 'Unknown')} ({detected_lang})")
    else:
        print("Unable to detect language.")

    # Sentiment analysis
    polarity, subjectivity = analyze_sentiment(text)
    sentiment = "Positive" if polarity > 0 else "Negative" if polarity < 0 else "Neutral"
    print(f"Sentiment Analysis: {sentiment} (Polarity: {polarity}, Subjectivity: {subjectivity})")

    # Text summarization
    summarized_text = summarize_text(text)
    print(f"\nSummarized Text:\n{summarized_text}")

    # Translate the text
    translated_text = translate_text(summarized_text, dest_lang)
    if translated_text:
        print(f"\nTranslated text ({LANGUAGES[dest_lang]}): {translated_text}")
    else:
        print("Translation failed.")

# Main function to run the application
def main():
    print("Welcome to the Advanced NLP Translator App")

    # Show available languages and supported ones
    display_languages()
    supported_languages = ['en', 'es', 'fr', 'de', 'it', 'pt', 'zh-cn', 'ja', 'ru']
    print(f"\nSupported languages: {', '.join(supported_languages)}")

    # Get user input for text and language
    text, dest_lang = get_user_input(supported_languages)

    # Process the text: detect language, analyze sentiment, summarize, and translate
    process_text(text, dest_lang)

# Running the application
if __name__ == "__main__":
    main()


Collecting googletrans==4.0.0-rc1
  Downloading googletrans-4.0.0rc1.tar.gz (20 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting langdetect
  Downloading langdetect-1.0.9.tar.gz (981 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m981.5/981.5 kB[0m [31m8.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting sumy
  Downloading sumy-0.11.0-py2.py3-none-any.whl.metadata (7.5 kB)
Collecting httpx==0.13.3 (from googletrans==4.0.0-rc1)
  Downloading httpx-0.13.3-py3-none-any.whl.metadata (25 kB)
Collecting hstspreload (from httpx==0.13.3->googletrans==4.0.0-rc1)
  Downloading hstspreload-2024.10.1-py3-none-any.whl.metadata (2.1 kB)
Collecting chardet==3.* (from httpx==0.13.3->googletrans==4.0.0-rc1)
  Downloading chardet-3.0.4-py2.py3-none-any.whl.metadata (3.2 kB)
Collecting idna==2.* (from httpx==0.13.3->googletrans==4.0.0-rc1)
  Downloading idna-2.10-py2.py3-none-any.whl.metadata (9.1 kB)
Collecting r

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


Collecting en-core-web-sm==3.7.1
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m61.7 MB/s[0m eta [36m0:00:00[0m
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.
Welcome to the Advanced NLP Translator App
Available languages:
af: afrikaans
sq: albanian
am: amharic
ar: arabic
hy: armenian
az: azerbaijani
eu: basque
be: belarusian
bn: bengali
bs: bosnian
bg: bulgarian
ca: catalan
ceb: cebuano
ny: chichewa
zh-cn: chinese (simplified)
zh-tw: chinese (traditional)
co: corsican
hr: croatian
cs: czech
da:

In [3]:
from langdetect import detect

# Example test cases
texts = ['Hello', 'Hola', 'Bonjour', 'こんにちは']
true_languages = ['en', 'es', 'fr', 'ja']

correct = 0
for text, true_lang in zip(texts, true_languages):
    detected_lang = detect(text)
    if detected_lang == true_lang:
        correct += 1

accuracy = correct / len(texts)
print(f"Language detection accuracy: {accuracy * 100:.2f}%")


Language detection accuracy: 50.00%
