[Reference](https://allwin-raju.medium.com/language-detection-in-python-using-lingua-d62e0f941646)

In [1]:
pip install lingua-language-detector

Collecting lingua-language-detector
  Downloading lingua_language_detector-2.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (349 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m349.2/349.2 kB[0m [31m5.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading lingua_language_detector-2.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (74.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m74.7/74.7 MB[0m [31m11.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: lingua-language-detector
Successfully installed lingua-language-detector-2.0.2


In [2]:
from lingua import Language, LanguageDetectorBuilder

# Create a detector for specific languages
detector = LanguageDetectorBuilder.from_languages(
    Language.ENGLISH,
    Language.FRENCH,
    Language.GERMAN,
    Language.SPANISH
).build()

text = "This is a sample text in English"
detected_language = detector.detect_language_of(text)
print(f"Detected language: {detected_language}")

Detected language: Language.ENGLISH


# 1. Confidence Scores

In [3]:
from lingua import Language, LanguageDetectorBuilder

detector = LanguageDetectorBuilder.from_languages(*Language.all()).build()

text = "This is a sample text in English"
results = detector.compute_language_confidence_values(text)[:5]

for result in results:
    print(f"{result.language}: {result.value:.2f}")

Language.ENGLISH: 0.40
Language.TAGALOG: 0.07
Language.LATIN: 0.06
Language.SWAHILI: 0.06
Language.ESPERANTO: 0.04


# 2. Working with Short Texts

In [4]:
from lingua import Language, LanguageDetectorBuilder

detector = LanguageDetectorBuilder.from_languages(*Language.all()) \
    .with_minimum_relative_distance(0.25) \
    .build()

short_text = "Hi"  # Very short text
result = detector.detect_language_of(short_text)

if result is None:
    print("Text too short or language unknown")
else:
    print(f"Detected language: {result}")

Text too short or language unknown


# 3. Detection of multiple languages in mixed-language texts

In [5]:
from lingua import Language, LanguageDetectorBuilder

languages = [Language.ENGLISH, Language.FRENCH, Language.GERMAN]
detector = LanguageDetectorBuilder.from_languages(*languages).build()
sentence = "Parlez-vous français? " + \
           "Ich spreche Französisch nur ein bisschen. " + \
           "A little bit is better than nothing."

for result in detector.detect_multiple_languages_of(sentence):
    print(f"{result.language.name}: '{sentence[result.start_index:result.end_index]}'")

FRENCH: 'Parlez-vous français? '
GERMAN: 'Ich spreche Französisch nur ein bisschen. '
ENGLISH: 'A little bit is better than nothing.'
