In [1]:
# ==============================================================================
# 1. SETUP AND INSTALLATIONS
# ==============================================================================
# This block installs all necessary libraries for the bot to function.

!pip install -q -U litellm google-generativeai sentence-transformers faiss-cpu

import os
import sqlite3
import numpy as np
from datetime import datetime
from google.colab import userdata
import litellm

# Suppress noisy warnings from the sentence_transformers library
import warnings
warnings.filterwarnings("ignore", category=FutureWarning, module="sentence_transformers")

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.2/41.2 kB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.0/9.0 MB[0m [31m21.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m31.4/31.4 MB[0m [31m36.4 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
# ==============================================================================
# 2. SERVICE CLASSES (MODULAR ARCHITECTURE)
# ==============================================================================
# These classes handle specific functionalities like LLM calls, DB management,
# and vector operations, making the main bot code cleaner.

class LLMService:
    """Handles all interactions with the Google Gemini API via LiteLLM."""
    def __init__(self, api_key):
        api_key = userdata.get('GOOGLE_API_KEY')
        os.environ["GEMINI_API_KEY"] = api_key
        self.model = "gemini/gemini-2.5-flash-lite"

    def invoke(self, prompt: str, temperature: float = 0.8) -> str:
        """Sends a prompt to the LLM and returns the text response."""
        try:
            messages = [{"role": "user", "content": prompt}]
            response = litellm.completion(
                model=self.model,
                messages=messages,
                temperature=temperature
            )
            return response.choices[0].message.content
        except Exception as e:
            print(f"An error occurred with the LLM API call: {e}")
            return "Sorry, I encountered an error and can't respond right now."

class DatabaseManager:
    """Manages the SQLite database for storing conversations and mistakes."""
    def __init__(self, db_name="language_learning.db"):
        self.conn = sqlite3.connect(db_name)
        self.setup_tables()

    def setup_tables(self):
        """Creates the necessary tables if they don't already exist."""
        with self.conn:
            self.conn.execute('''
                CREATE TABLE IF NOT EXISTS conversations (
                    id INTEGER PRIMARY KEY AUTOINCREMENT,
                    user_id TEXT NOT NULL,
                    session_id TEXT NOT NULL,
                    role TEXT NOT NULL, -- 'user', 'bot', or 'scene'
                    content TEXT NOT NULL,
                    timestamp TEXT NOT NULL
                )
            ''')
            self.conn.execute('''
                CREATE TABLE IF NOT EXISTS mistakes (
                    id INTEGER PRIMARY KEY AUTOINCREMENT,
                    user_id TEXT NOT NULL,
                    session_id TEXT NOT NULL,
                    mistake TEXT NOT NULL,
                    correction TEXT NOT NULL,
                    timestamp TEXT NOT NULL
                )
            ''')

    def add_conversation_turn(self, user_id, session_id, role, content) -> int:
        """Adds a conversation turn to the database and returns its ID."""
        timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        with self.conn:
            cursor = self.conn.execute(
                "INSERT INTO conversations (user_id, session_id, role, content, timestamp) VALUES (?, ?, ?, ?, ?)",
                (user_id, session_id, role, content, timestamp)
            )
            return cursor.lastrowid

    def add_mistake(self, user_id, session_id, mistake, correction):
        """Adds a recorded mistake to the database."""
        timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        with self.conn:
            # FIX: Added a 5th '?' placeholder for the timestamp value.
            self.conn.execute(
                "INSERT INTO mistakes (user_id, session_id, mistake, correction, timestamp) VALUES (?, ?, ?, ?, ?)",
                (user_id, session_id, mistake, correction, timestamp)
            )

    def get_conversations_by_ids(self, ids: list[int]) -> list[dict]:
        """Retrieves conversation turns from the database based on a list of IDs."""
        if not ids:
            return []
        placeholders = ','.join('?' for _ in ids)
        query = f"SELECT role, content FROM conversations WHERE id IN ({placeholders}) ORDER BY timestamp"
        with self.conn:
            cursor = self.conn.execute(query, ids)
            return [{"role": row[0], "content": row[1]} for row in cursor.fetchall()]

    def get_session_mistakes(self, session_id: str) -> list[dict]:
        """Retrieves all mistakes made during a specific session."""
        query = "SELECT mistake, correction FROM mistakes WHERE session_id = ?"
        with self.conn:
            cursor = self.conn.execute(query, (session_id,))
            return [{"mistake": row[0], "correction": row[1]} for row in cursor.fetchall()]


class VectorStoreManager:
    """Manages the FAISS vector store for semantic search of conversations."""
    def __init__(self, model_name='all-MiniLM-L6-v2'):
        # Lazy load sentence_transformers and faiss to avoid import errors on first run
        from sentence_transformers import SentenceTransformer
        import faiss

        self.model = SentenceTransformer(model_name)
        self.dimension = self.model.get_sentence_embedding_dimension()
        self.index = faiss.IndexFlatL2(self.dimension)
        self.id_map = [] # Maps FAISS index to SQLite conversation ID

    def add_embedding(self, text: str, db_id: int):
        """Creates an embedding for text and adds it to the FAISS index."""
        embedding = self.model.encode([text])
        self.index.add(np.array(embedding, dtype=np.float32))
        self.id_map.append(db_id)

    def search(self, query_text: str, k: int = 3) -> list[int]:
        """Searches the index for the most relevant conversation IDs."""
        if self.index.ntotal == 0:
            return []
        query_embedding = self.model.encode([query_text])
        _, indices = self.index.search(np.array(query_embedding, dtype=np.float32), k)

        # Get the unique database IDs corresponding to the FAISS indices
        found_ids = [self.id_map[i] for i in indices[0] if i < len(self.id_map)]
        return list(set(found_ids))


In [3]:
# ==============================================================================
# 3. MAIN BOT CLASS
# ==============================================================================
# This class orchestrates the services to create the language learning experience.

class LanguageLearningBot:
    """The main application class for the language learning bot."""
    def __init__(self):
        # User and Session Info
        self.user_id = "user_" + str(int(datetime.now().timestamp()))
        self.session_id = "session_" + str(int(datetime.now().timestamp()))
        self.user_name = ""
        self.known_language = ""
        self.learning_language = ""
        self.level = ""
        self.conversation_history = []

        # Initialize Services
        try:
            api_key = userdata.get('GOOGLE_API_KEY')
            self.llm = LLMService(api_key=api_key)
            self.db = DatabaseManager()
            self.vector_store = VectorStoreManager()
            print("✅ Services initialized successfully.")
        except Exception as e:
            print(f"🔥 Failed to initialize services. Ensure your GOOGLE_API_KEY is set in Colab Secrets. Error: {e}")
            raise

    def get_user_preferences(self):
        print("# Language Learning Chatbot\n")
        self.user_name = input("Hello! What is your name?\n**User:** ")
        self.learning_language = input(f"\nNice to meet you, {self.user_name}! What language do you want to learn?\n**User:** ")
        self.known_language = input("\nWhat is your native language?\n**User:** ")
        self.level = input(f"\nWhat is your current level in {self.learning_language}? (Beginner, Intermediate, Advanced)\n**User:** ")

    def _log_turn(self, role: str, content: str):
        """Helper to log conversation turns to both history, DB, and vector store."""
        self.conversation_history.append({"role": role, "content": content})
        db_id = self.db.add_conversation_turn(self.user_id, self.session_id, role, content)
        # We only create embeddings for user/bot text, not system messages like scenes
        if role in ['user', 'bot']:
            self.vector_store.add_embedding(content, db_id)

    def _get_rag_context(self, topic: str, k: int = 3) -> str:
        """Retrieves relevant past conversations to be used as context (RAG)."""
        print("\n🧠 Retrieving relevant memories...")
        retrieved_ids = self.vector_store.search(topic, k)
        if not retrieved_ids:
            print("   No relevant memories found.")
            return "No relevant past conversations found."

        past_conversations = self.db.get_conversations_by_ids(retrieved_ids)
        context_str = "\n".join([f"- {conv['role'].capitalize()}: {conv['content']}" for conv in past_conversations])
        print(f"   Found {len(past_conversations)} relevant snippets.")
        return context_str

    def generate_scene(self):
        """Generates a new scene, augmented with RAG context."""
        topic = f"A {self.level} conversation in {self.learning_language} for a {self.known_language} speaker."
        rag_context = self._get_rag_context(topic)

        prompt = f"""
You are a creative language teacher.
The user is {self.user_name}, a {self.level} learner of {self.learning_language}, whose native language is {self.known_language}.

**Review of Past Relevant Conversations (for your context only):**
{rag_context}

**Your Task:**
Based on the user's level and their past conversations, generate a simple, engaging conversational scene.
- The scene must be written entirely in {self.learning_language}.
- It should be a short dialogue (1-2 lines).
- For 'Beginner' level, provide a {self.known_language} translation in parentheses after each line.
- End with a question in {self.known_language} asking the user how they would respond.
- Format: "Setting: [setting]\n[Person 1]: [dialogue]\n[Person 2]: [dialogue]\nQuestion: [question in known_language]"
"""
        scene = self.llm.invoke(prompt)
        self._log_turn("scene", scene)
        return scene

    def continue_scene(self, user_response):
        history_str = "\n".join([f"{turn['role'].capitalize()}: {turn['content']}" for turn in self.conversation_history[-5:]])
        prompt = f"""
You are Person 2 in a language practice role-play.
The user, {self.user_name}, is a {self.level} learner of {self.learning_language}.
Their native language is {self.known_language}.

**Current Conversation History:**
{history_str}

**User's latest response:** "{user_response}"

**Your Task:**
Respond naturally to the user's message as Person 2.
- Your response should be 1-2 sentences in {self.learning_language}.
- For 'Beginner' level, provide a {self.known_language} translation in parentheses.
- End with a follow-up question in {self.known_language} to keep the conversation going.
- Format: "[Your response in {self.learning_language}]\nQuestion: [Your question in {self.known_language}]"
"""
        response = self.llm.invoke(prompt)
        self._log_turn("bot", response)
        return response

    def check_response(self, user_input):
        history_str = "\n".join([f"{turn['role'].capitalize()}: {turn['content']}" for turn in self.conversation_history[-5:]])

        # UPDATED PROMPT: Asks the LLM to use specific keywords to start its response.
        prompt = f"""
You are a strict but encouraging language teacher.
The user, {self.user_name}, is a {self.level} {self.learning_language} learner. Native language: {self.known_language}.

**Conversation Context:**
{history_str}

**User's response to check:** "{user_input}"

**Your Task:**
Analyze the user's response. Your entire feedback MUST start with one of two keywords: "CORRECTION:" or "PERFECT:".
1.  If the response has ANY mistakes (grammar, spelling, politeness, is vulgar, or is off-topic), start your feedback with "CORRECTION:". Then, explain the mistake in {self.known_language} and provide the correct version in {self.learning_language}.
2.  If the response is grammatically perfect and appropriate for the context, start your feedback with "PERFECT:". Then, you can optionally suggest a more natural way to phrase it.

Give your feedback in {self.known_language}. The corrected/suggested text must be in {self.learning_language}.
"""
        analysis = self.llm.invoke(prompt, temperature=0.2)

        # UPDATED LOGIC: No longer tries to parse the correction.
        # It now checks for the keyword and logs the entire feedback if it's a correction.
        if analysis.strip().upper().startswith("CORRECTION:"):
            # We log the entire analysis as the "correction" because it contains the full context of the error.
            self.db.add_mistake(self.user_id, self.session_id, user_input, analysis)
            print("\nI'll note that down for your review.")

        return analysis

    def provide_hints(self):
        """Generates contextual hints for the user."""
        history_str = "\n".join([f"{turn['role'].capitalize()}: {turn['content']}" for turn in self.conversation_history[-3:]])
        prompt = f"""
You are a helpful language assistant.
The user, {self.user_name}, is a {self.level} learner of {self.learning_language}, and their native language is {self.known_language}.

**Current Conversation Context:**
{history_str}

**Your Task:**
Provide a list of 3-4 relevant {self.learning_language} words or short phrases that the user could use in their next response. Include their meaning in {self.known_language}.
Do not give a full sentence. Focus on helpful vocabulary.

Format the response exactly like this:
- [word/phrase in {self.learning_language}]: [meaning in {self.known_language}]
"""
        hints = self.llm.invoke(prompt, temperature=0.5)
        return hints

    def get_review(self):
        mistakes = self.db.get_session_mistakes(self.session_id)
        if not mistakes:
            mistakes_str = "No specific mistakes were automatically recorded in this session."
        else:
            # The 'correction' now contains the full feedback text
            mistakes_str = "\n".join([f"- User said: '{m['mistake']}' -> Feedback received: '{m['correction']}'" for m in mistakes])

        conversation_str = "\n".join([f"{turn['role'].capitalize()}: {turn['content']}" for turn in self.conversation_history])

        # UPDATED PROMPT: Tells the LLM to prioritize the full transcript for behavioral analysis.
        prompt = f"""
You are a language teacher providing end-of-session feedback in {self.known_language}.
The user is {self.user_name}, learning {self.learning_language}.

**Full Conversation Transcript:**
{conversation_str}

**Summary of Automatically Recorded Mistakes:**
{mistakes_str}

**Your Task:**
Write a short, encouraging, and honest review (2-3 sentences).
Your primary source of truth is the **Full Conversation Transcript**.
1.  First, analyze the transcript for overall performance, including politeness, tone, and staying on topic. If the user was rude or used inappropriate language, this MUST be the main point of your feedback.
2.  Then, use the "Recorded Mistakes" list to identify specific patterns in their learning.
3.  Conclude with a suggestion for what to focus on next.
"""
        return self.llm.invoke(prompt)

    def run(self):
        """Main loop to run the chatbot application."""
        self.get_user_preferences()
        print("\n## Practice Conversation\n")
        print(f"Great, {self.user_name}! Let’s practice a conversation.")
        scene = self.generate_scene()
        print(f"\n**Scene:** {scene}\n")

        # Show hints for beginners after the initial scene
        if self.level.lower() == 'beginner':
            print("### Hints\n")
            hints = self.provide_hints()
            print(f"{hints}\n")

        while True:
            user_input = input("**User:** ")
            if user_input.lower() in ['quit', 'exit']:
                break

            self._log_turn("user", user_input)

            print("\n### Feedback\n")
            analysis = self.check_response(user_input)
            print(f"{analysis}\n")

            continued_scene = self.continue_scene(user_input)
            print(f"**Scene (continued):** {continued_scene}\n")

            # Show hints for the next turn
            if self.level.lower() == 'beginner':
                print("### Hints\n")
                hints = self.provide_hints()
                print(f"{hints}\n")

        print("\n## Session Review\n")
        review = self.get_review()
        print(f"{review}\n")

        mistakes = self.db.get_session_mistakes(self.session_id)
        print("### Mistakes Made This Session\n")
        if not mistakes:
            print("No mistakes were recorded in this session.")
        else:
            for m in mistakes:
                print(f"- **Mistake:** {m['mistake']}\n  **Correction:** {m['correction']}\n")



In [4]:
# ==============================================================================
# 4. EXECUTION BLOCK
# ==============================================================================
if __name__ == "__main__":
    try:
        # This check prevents re-running the bot if the cell is executed multiple times
        if 'bot' not in locals():
            bot = LanguageLearningBot()
            bot.run()
        else:
            print("\nBot has already been run. To start a new session, restart the kernel and run the cell again.")
    except Exception as e:
        print(f"An unexpected error occurred during execution: {e}")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

✅ Services initialized successfully.
# Language Learning Chatbot

Hello! What is your name?
**User:** kunal

Nice to meet you, kunal! What language do you want to learn?
**User:** marathi

What is your native language?
**User:** english

What is your current level in marathi? (Beginner, Intermediate, Advanced)
**User:** beginner

## Practice Conversation

Great, kunal! Let’s practice a conversation.

🧠 Retrieving relevant memories...
   No relevant memories found.

**Scene:** Setting: A park
Aarti: नमस्कार, तुम्ही कसे आहात? (Hello, how are you?)
Kunal: मी ठीक आहे, धन्यवाद. तुम्ही? (I am fine, thank you. And you?)

Question: How would you respond to Aarti's question, "तुम्ही कसे आहात?" (How are you?)

### Hints

- छान: Good/Nice
- मजेत: Happily/In good spirits
- ठीकठाक: So-so/Alright
- बरं वाटतंय: Feeling well

**User:** मी ठीक आहे, धन्यवाद. तुम्ही?

### Feedback

PERFECT: That's exactly right, Kunal! You've used the correct phrase to respond and politely ask Aarti how she is. Keep up t