
# 🤖 JARVIS: Free Modular Voice Assistant (v1)
This notebook is your **starting point for building a personal AI assistant (JARVIS)** with:
- 🧠 **Free HuggingFace LLMs** (e.g. FLAN-T5)
- 🎙 **Speech recognition**
- 🔊 **Text-to-speech**
- 💸 **GPT-4 cost estimator** (for future planning)
- 🔌 **Pluggable LLM interface** for easy upgrades

**Say "exit" to stop the loop.**


In [None]:

# ✅ Install dependencies
!pip install -q transformers gtts speechrecognition pydub accelerate
!apt-get install -y ffmpeg


In [None]:

import os
import torch
import time
import speech_recognition as sr
from gtts import gTTS
from pydub import AudioSegment
from transformers import pipeline

# ✅ Modular LanguageModel (can be swapped later)
class LanguageModel:
    def __init__(self, model_name="google/flan-t5-base"):
        self.generator = pipeline("text2text-generation", model=model_name, device=0 if torch.cuda.is_available() else -1)

    def generate(self, prompt):
        response = self.generator(prompt, max_new_tokens=100)[0]['generated_text']
        return response

# ✅ Cost Estimator for GPT-4 (approx, in INR)
def estimate_cost(prompt, response, model_name="gpt-4"):
    input_tokens = len(prompt.split()) * 1.3
    output_tokens = len(response.split()) * 1.3
    if model_name == "gpt-4":
        cost = (input_tokens / 1000 * 0.01) + (output_tokens / 1000 * 0.03)
        return round(cost * 85, 2)  # USD to INR
    return 0.0

# ✅ Text-to-Speech
def speak(text):
    tts = gTTS(text=text, lang='en')
    tts.save("response.mp3")
    sound = AudioSegment.from_file("response.mp3", format="mp3")
    sound.export("response.wav", format="wav")
    os.system("ffplay -nodisp -autoexit response.wav")

# ✅ Speech-to-Text from Microphone
def listen():
    r = sr.Recognizer()
    with sr.Microphone() as source:
        print("🎤 Speak now...")
        audio = r.listen(source)
        try:
            text = r.recognize_google(audio)
            print(f"🗣️ You said: {text}")
            return text
        except sr.UnknownValueError:
            return "Sorry, I couldn't understand that."
        except sr.RequestError:
            return "Speech recognition failed."

# ✅ Run Assistant Loop
model = LanguageModel()  # Plug-and-play here
while True:
    prompt = listen()
    if prompt.lower() in ["exit", "quit", "stop"]:
        print("👋 Exiting JARVIS.")
        break

    response = model.generate(prompt)
    print(f"🤖 JARVIS: {response}")
    speak(response)
    print(f"💸 Estimated GPT-4 cost (INR): ₹{estimate_cost(prompt, response)}")
