# Language Detector

In [159]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import Pipeline
import tkinter as tk
from tkinter import messagebox
from gtts import gTTS
import os
import time
import speech_recognition as sr

# Load the CSV file

In [104]:
df = pd.read_csv("language detection.csv")


# Check the first few rows

In [107]:
print(df.head())

                                                Text Language
0   Nature, in the broadest sense, is the natural...  English
1  "Nature" can refer to the phenomena of the phy...  English
2  The study of nature is a large, if not the onl...  English
3  Although humans are part of nature, human acti...  English
4  [1] The word nature is borrowed from the Old F...  English



# Features and Labels

In [110]:
X = df['Text']
y = df['Language']


# Split into training and testing data


In [113]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Model (TF-IDF for vectorization + Naive Bayes classifier)

In [116]:
model = Pipeline([
    ('tfidf', TfidfVectorizer()),
    ('clf', MultinomialNB())
])

model.fit(X_train, y_train)


# Test accuracy

In [162]:
accuracy = model.score(X_test, y_test)
print(f"Model Accuracy: {accuracy*100:.2f}%")

Model Accuracy: 95.79%


# Speech-to-Text Function

In [164]:
def speech_to_text():
    recognizer = sr.Recognizer()

    with sr.Microphone() as source:
        messagebox.showinfo("Speak", "Please speak now")
        recognizer.adjust_for_ambient_noise(source, duration=0.5)
        audio = recognizer.listen(source)

    try:
        return recognizer.recognize_google(audio)
    except sr.UnknownValueError:
        messagebox.showerror("Error", "Could not understand audio")
        return None
    except sr.RequestError:
        messagebox.showerror("Error", "Speech service unavailable")
        return None

# Text input function

In [166]:
def detect_and_speak():
    text = text_entry.get().strip()

    if not text:
        messagebox.showwarning("Warning", "Please enter some text!")
        return

    language = model.predict([text])[0]
    result_label.config(text=f"Detected Language: {language}")

    tts = gTTS(text=text, lang="en")
    tts.save("output.mp3")

    time.sleep(0.5)
    os.system("start output.mp3")

# Speech input function

In [168]:
def speak_detect_and_speak():
    text = speech_to_text()

    if text is None:
        return

    text_entry.delete(0, tk.END)
    text_entry.insert(0, text)

    language = model.predict([text])[0]
    result_label.config(text=f"Detected Language: {language}")

    tts = gTTS(text=text, lang="en")
    tts.save("output.mp3")

    time.sleep(0.5)
    os.system("start output.mp3")

# Simple GUI with Tkinter

In [None]:
root = tk.Tk()
root.title("Language Detector with Speech Support")
root.geometry("550x350")

text_entry = tk.Entry(root, width=60)
text_entry.pack(pady=20)
text_entry.insert(0, "Enter your text here...")

detect_button = tk.Button(
    root,
    text="Detect Language & Speak (Text)",
    command=detect_and_speak
)
detect_button.pack(pady=5)

speech_button = tk.Button(
    root,
    text="Speak & Detect Language",
    command=speak_detect_and_speak
)
speech_button.pack(pady=5)

result_label = tk.Label(root, text="", font=("Helvetica", 14))
result_label.pack(pady=10)

root.mainloop()