<a href="https://colab.research.google.com/github/Akhilesh-00/Tami_AI_Assistant/blob/main/Tamil_AI_Assistant.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Step 1: Install necessary libraries and FFmpeg
print("Installing necessary libraries...")
!pip install -q gTTS SpeechRecognition google-generativeai
!apt-get install -y -qq ffmpeg
print("Installation complete.")

Installing necessary libraries...
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m32.9/32.9 MB[0m [31m58.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m98.2/98.2 kB[0m [31m8.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstallation complete.


In [None]:
# Step 2: Import modules and configure the AI model
import speech_recognition as sr
from gtts import gTTS
from IPython.display import Audio, display, Javascript
from google.colab import output
import datetime
import time
from base64 import b64decode
import ipywidgets as widgets
import os
import subprocess
import google.generativeai as genai
import json

# --- Paste your API Key here ---
API_KEY = 'AIzaSyC-vPl-lwftQ0PaaLG7c0NDnwA-jGPIipQ'  #@param {type:"string"}

# Configure the Gemini AI model with your API key
try:
    genai.configure(api_key=API_KEY)
    model = genai.GenerativeModel('gemini-1.5-flash')
    print("✅ Gemini AI model configured successfully.")
except Exception as e:
    print(f"❌ ERROR: Could not configure AI. Is your API Key correct? Details: {e}")

✅ Gemini AI model configured successfully.


In [None]:
# Step 3: Define all helper functions

# 3a. Audio Recording
RECORD_JS = """
const sleep = time => new Promise(resolve => setTimeout(resolve, time))
const b2text = blob => new Promise(resolve => {
  const reader = new FileReader()
  reader.onloadend = e => resolve(e.srcElement.result)
  reader.readAsDataURL(blob)
})
var record = time => new Promise(async resolve => {
  stream = await navigator.mediaDevices.getUserMedia({ audio: true })
  recorder = new MediaRecorder(stream)
  chunks = []
  recorder.ondataavailable = e => chunks.push(e.data)
  recorder.start()
  await sleep(time)
  recorder.onstop = async ()=>{
    blob = new Blob(chunks)
    text = await b2text(blob)
    resolve(text)
  }
  recorder.stop()
})
"""
def record_audio(filename="audio.wav", duration=7):
    try:
        display(Javascript(RECORD_JS))
        s = output.eval_js(f'record({duration*1000})')
        b = b64decode(s.split(',')[1])
        with open(filename, 'wb') as f: f.write(b)
        return filename
    except Exception as e:
        print(f"An error occurred during recording: {e}")
        return None

# 3b. Audio Conversion
def convert_audio_to_wav(input_file, output_file="converted_audio.wav"):
    if not os.path.exists(input_file): return None
    command = ["ffmpeg", "-i", input_file, "-acodec", "pcm_s16le", "-ar", "16000", "-ac", "1", output_file, "-y"]
    result = subprocess.run(command, capture_output=True, text=True)
    if result.returncode == 0 and os.path.exists(output_file):
        return output_file
    else:
        print(f"--- FFmpeg Conversion Error ---\n{result.stderr}\n-----------------------------")
        return None

# 3c. Speech-to-Text
def recognize_speech_from_mic(recognizer, audio_data):
    try:
        text = recognizer.recognize_google(audio_data, language='ta-IN')
        return {"success": True, "error": None, "transcription": text}
    except sr.RequestError:
        return {"success": False, "error": {"ta": "API கிடைக்கவில்லை.", "en": "API unavailable."}, "transcription": None}
    except sr.UnknownValueError:
        return {"success": False, "error": {"ta": "பேச்சைப் புரிந்து கொள்ள முடியவில்லை.", "en": "Unable to recognize speech."}, "transcription": None}

# 3d. Text-to-Speech
def speak_bilingual_response(response_dict):
    try:
        tamil_text = response_dict.get('ta', "Tamil response not found.")
        print(f"Assistant says (Tamil): {tamil_text}")
        tts_ta = gTTS(text=tamil_text, lang='ta', slow=False)
        tts_ta.save("response_ta.mp3")
        display(Audio("response_ta.mp3", autoplay=True))
        time.sleep(4)
        english_text = response_dict.get('en', "English response not found.")
        print(f"Assistant says (English): {english_text}")
        tts_en = gTTS(text=english_text, lang='en', slow=False)
        tts_en.save("response_en.mp3")
        display(Audio("response_en.mp3", autoplay=True))
    except Exception as e:
        print(f"An error occurred during text-to-speech: {e}")

print("✅ All helper functions are defined.")

✅ All helper functions are defined.


In [None]:
# Step 4: Define the new AI-powered brain
def process_command_with_ai(text):
    """
    Processes the command using simple rules first,
    then falls back to the Gemini AI for complex questions.
    """
    print(f"You said (in Tamil): {text}")
    text_lower = text.lower()

    # 1. Check for simple, local commands first
    if "நேரம் என்ன" in text_lower:
        now = datetime.datetime.now()
        current_time = now.strftime("%I:%M %p")
        return {"ta": f"மணி இப்போது {current_time}.", "en": f"The current time is {current_time}."}
    elif "தேதி என்ன" in text_lower:
        now = datetime.datetime.now()
        current_date = now.strftime("%d %B %Y")
        return {"ta": f"இன்றைய தேதி {current_date}.", "en": f"Today's date is {current_date}."}

    # 2. If it's not a simple command, ask the AI
    else:
        print("Asking the Gemini AI model...")
        try:
            prompt = f"""
            You are a helpful bilingual assistant. Your user asked a question in Tamil.
            Provide a concise, helpful answer.
            Format your response as a single JSON object with two keys: "ta" for the Tamil answer and "en" for the English answer.

            User's question: "{text}"
            """
            ai_response = model.generate_content(prompt)
            response_text = ai_response.text.strip().replace("```json", "").replace("```", "")
            response_dict = json.loads(response_text)
            return response_dict
        except Exception as e:
            print(f"❌ An error occurred while contacting the AI: {e}")
            return {"ta": "மன்னிக்கவும், AI பதிலளிக்கவில்லை.", "en": "Sorry, the AI model did not respond."}

print("✅ AI brain function is defined.")

✅ AI brain function is defined.


In [None]:
# Step 5: Define the main assistant function
def tamil_assistant():
    """The main loop that ties everything together."""
    if not API_KEY:
        print("🚨 ERROR: Please enter your API_KEY in the form in Cell 2 and run it again.")
        return

    r = sr.Recognizer()
    initial_greeting = {"ta": "வணக்கம்! நான் ஜெமினி மூலம் இயக்கப்படுகிறேன். உங்கள் கேள்விக்காக காத்திருக்கிறேன்.", "en": "Hello! I am now powered by Gemini. I am waiting for your question."}
    speak_bilingual_response(initial_greeting)
    time.sleep(5)
    button = widgets.Button(description="🎤 Start Listening")
    output_area = widgets.Output()

    def on_button_click(b):
        with output_area:
            output_area.clear_output()
            print("Listening for 7 seconds...")
            recorded_file = record_audio(duration=7)
            if recorded_file:
                print("Converting audio format...")
                converted_file = convert_audio_to_wav(recorded_file)
                if converted_file:
                    print("Processing speech...")
                    with sr.AudioFile(converted_file) as source:
                        r.adjust_for_ambient_noise(source, duration=1)
                        audio_data = r.record(source)
                    stt_result = recognize_speech_from_mic(r, audio_data)
                    if stt_result["success"]:
                        response = process_command_with_ai(stt_result["transcription"])
                        speak_bilingual_response(response)
                    else:
                        speak_bilingual_response(stt_result["error"])

    button.on_click(on_button_click)
    display(button, output_area)

print("✅ Main assistant function is defined.")

✅ Main assistant function is defined.


In [None]:
# Step 6: Start the AI assistant
print("Starting the AI assistant...")
tamil_assistant()
print("✅ Assistant is ready. Click the button below.")

Starting the AI assistant...
Assistant says (Tamil): வணக்கம்! நான் ஜெமினி மூலம் இயக்கப்படுகிறேன். உங்கள் கேள்விக்காக காத்திருக்கிறேன்.


Assistant says (English): Hello! I am now powered by Gemini. I am waiting for your question.


Button(description='🎤 Start Listening', style=ButtonStyle())

Output()

✅ Assistant is ready. Click the button below.
