## Partie 0 - Installation des packages python

In [None]:
%pip install -r requirements.txt

## Partie I - Détection de la langue

Importation des libraries

In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

Lecture du fichier csv d'entraînement

In [2]:
df = pd.read_csv('Language Detection.csv')
df

Unnamed: 0,Text,Language
0,"Nature, in the broadest sense, is the natural...",English
1,"""Nature"" can refer to the phenomena of the phy...",English
2,"The study of nature is a large, if not the onl...",English
3,"Although humans are part of nature, human acti...",English
4,[1] The word nature is borrowed from the Old F...,English
...,...,...
10332,ನಿಮ್ಮ ತಪ್ಪು ಏನು ಬಂದಿದೆಯೆಂದರೆ ಆ ದಿನದಿಂದ ನಿಮಗೆ ಒ...,Kannada
10333,ನಾರ್ಸಿಸಾ ತಾನು ಮೊದಲಿಗೆ ಹೆಣಗಾಡುತ್ತಿದ್ದ ಮಾರ್ಗಗಳನ್...,Kannada
10334,ಹೇಗೆ ' ನಾರ್ಸಿಸಿಸಮ್ ಈಗ ಮರಿಯನ್ ಅವರಿಗೆ ಸಂಭವಿಸಿದ ಎ...,Kannada
10335,ಅವಳು ಈಗ ಹೆಚ್ಚು ಚಿನ್ನದ ಬ್ರೆಡ್ ಬಯಸುವುದಿಲ್ಲ ಎಂದು ...,Kannada


Vectorisation du dataframe et entraînement

In [3]:
vertorizer = TfidfVectorizer()
X = vertorizer.fit_transform(df['Text'])
Y = df['Language']
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.25, random_state=42)

Création du model

In [4]:
model = LogisticRegression()
model.fit(X_train, Y_train)

Récupération du score du model

In [5]:
model.score(X_train, Y_train)

0.9945820433436533

Récupération du score de la précision du model

In [6]:
y_pred = model.predict(X_test)
accuracy_score(Y_test, y_pred)

0.9551257253384913

Test du model

In [7]:
a = ["Привет, как дела"]
X = vertorizer.transform(a)
prediction = model.predict(X)
print(prediction[0])

Russian


## Partie II - Traduction du texte

Importation de la librairie de traduction

In [8]:
from googletrans import Translator, LANGUAGES
import json

In [9]:
translator = Translator()

In [10]:
def translate_text(text, dest_language):
    translation = translator.translate(text, dest=dest_language)
    return translation.text

In [11]:
translated_text = translate_text("Привет, как дела", "fr")
print(translated_text)

Salut comment vas-tu


Récupération de la liste des langues

In [12]:
lang_code = {}
for lang in LANGUAGES:
    lang_code[LANGUAGES[lang]] = lang
lang_code    

{'afrikaans': 'af',
 'albanian': 'sq',
 'amharic': 'am',
 'arabic': 'ar',
 'armenian': 'hy',
 'azerbaijani': 'az',
 'basque': 'eu',
 'belarusian': 'be',
 'bengali': 'bn',
 'bosnian': 'bs',
 'bulgarian': 'bg',
 'catalan': 'ca',
 'cebuano': 'ceb',
 'chichewa': 'ny',
 'chinese (simplified)': 'zh-cn',
 'chinese (traditional)': 'zh-tw',
 'corsican': 'co',
 'croatian': 'hr',
 'czech': 'cs',
 'danish': 'da',
 'dutch': 'nl',
 'english': 'en',
 'esperanto': 'eo',
 'estonian': 'et',
 'filipino': 'tl',
 'finnish': 'fi',
 'french': 'fr',
 'frisian': 'fy',
 'galician': 'gl',
 'georgian': 'ka',
 'german': 'de',
 'greek': 'el',
 'gujarati': 'gu',
 'haitian creole': 'ht',
 'hausa': 'ha',
 'hawaiian': 'haw',
 'hebrew': 'he',
 'hindi': 'hi',
 'hmong': 'hmn',
 'hungarian': 'hu',
 'icelandic': 'is',
 'igbo': 'ig',
 'indonesian': 'id',
 'irish': 'ga',
 'italian': 'it',
 'japanese': 'ja',
 'javanese': 'jw',
 'kannada': 'kn',
 'kazakh': 'kk',
 'khmer': 'km',
 'korean': 'ko',
 'kurdish (kurmanji)': 'ku',
 'ky

Création d'un JSON depuis la liste des langues pour l'app streamlit

In [13]:
with open('lang_code.json', 'w') as f:
    json.dump(lang_code, f)