<a href="https://colab.research.google.com/github/DeepakKumar2005fg/AIML-/blob/main/Untitled43.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
import re

class CrossLingualIRSystem:
    def __init__(self):
        # Sample documents in different languages
        self.documents = {
            "en_doc1": "The quick brown fox jumps over the lazy dog.",
            "en_doc2": "A cat sleeps on the mat.",
            "fr_doc1": "Le rapide renard brun saute par-dessus le chien paresseux.",
            "fr_doc2": "Un chat dort sur le tapis.",
            "es_doc1": "El rápido zorro marrón salta sobre el perro perezoso.",
            "es_doc2": "Un gato duerme en la alfombra."
        }
        # Build a single inverted index for all documents regardless of language
        self.inverted_index = self._build_global_inverted_index()

        # --- Simplified Translation Dictionaries (for demonstration only) ---
        # In a real system, this would use powerful Machine Translation APIs (e.g., Google Translate, DeepL).
        self.translation_dict = {
            "en_to_fr": {
                "fox": "renard", "dog": "chien", "cat": "chat", "sleeps": "dort",
                "quick": "rapide", "brown": "brun", "jumps": "saute", "lazy": "paresseux",
                "mat": "tapis", "the": "le", "a": "un", "on": "sur", "over": "par-dessus"
            },
            "en_to_es": {
                "fox": "zorro", "dog": "perro", "cat": "gato", "sleeps": "duerme",
                "quick": "rápido", "brown": "marrón", "salta": "jumps", "lazy": "perezoso",
                "mat": "alfombra", "the": "el", "a": "un", "on": "en", "over": "sobre"
            },
            # Add reverse translations if needed for queries in other source languages
            "fr_to_en": {
                "renard": "fox", "chien": "dog", "chat": "cat", "dort": "sleeps",
                "rapide": "quick", "brun": "brown", "saute": "jumps", "paresseux": "lazy",
                "tapis": "mat", "le": "the", "un": "a", "sur": "on", "par-dessus": "over"
            },
            "es_to_en": {
                "zorro": "fox", "perro": "dog", "gato": "cat", "duerme": "sleeps",
                "rápido": "quick", "marrón": "brown", "salta": "jumps", "perezoso": "lazy",
                "alfombra": "mat", "el": "the", "un": "a", "en": "on", "sobre": "over"
            }
        }

    def _build_global_inverted_index(self):
        """Builds a single inverted index for all documents, regardless of language."""
        inverted_index = {}
        for doc_id, content in self.documents.items():
            # Simple tokenization and lowercasing
            words = re.findall(r'\b\w+\b', content.lower())
            for word in set(words): # Process unique words in each document
                if word not in inverted_index:
                    inverted_index[word] = []
                inverted_index[word].append(doc_id)
        return inverted_index

    def _translate_query(self, query_text, source_lang, target_lang):
        """
        Simulates query translation using a simple dictionary lookup.
        In a real system, this would use a robust MT API.
        Returns a list of translated query words.
        """
        # Get the appropriate translation map
        translation_map = self.translation_dict.get(f"{source_lang}_to_{target_lang}", {})

        translated_words = []
        # Tokenize the query into words
        query_words = re.findall(r'\b\w+\b', query_text.lower())

        for word in query_words:
            # Look up translation, if not found, use the original word
            translated_words.append(translation_map.get(word, word))
        return translated_words

    def search(self, user_query, source_language="en", target_languages=["en", "fr", "es"]):
        """
        Performs a cross-lingual search.
        Translates the user query to target languages and searches for relevant documents.
        """
        all_matching_docs = set()

        print(f"\n--- Question 10: Searching for '{user_query}' (Source: {source_language}) ---")

        for target_lang in target_languages:
            if target_lang == source_language:
                # If target language is the same as source, no translation needed
                translated_query_words = re.findall(r'\b\w+\b', user_query.lower())
            else:
                # Translate query to the target language
                translated_query_words = self._translate_query(user_query, source_language, target_lang)

            print(f"  Query translated to {target_lang}: {' '.join(translated_query_words)}")

            if not translated_query_words:
                continue # Skip if query is empty after translation

            # Perform search using the translated query words (AND logic)
            # Initialize current_results with documents for the first translated word
            current_results = set(self.inverted_index.get(translated_query_words[0], []))

            # Intersect with results for subsequent translated query words
            for i in range(1, len(translated_query_words)):
                word_results = set(self.inverted_index.get(translated_query_words[i], []))
                current_results = current_results.intersection(word_results)
                if not current_results: # If at any point the intersection is empty, no need to continue
                    break # No documents contain all words

            # Filter results to include only documents primarily in the current target language
            # (Assuming doc_ids are prefixed with language codes like "en_", "fr_", etc.)
            filtered_results = [doc_id for doc_id in current_results if doc_id.startswith(f"{target_lang}_")]
            all_matching_docs.update(filtered_results) # Add these to the overall results

        return sorted(list(all_matching_docs)) # Return unique and sorted document IDs

    def get_document_content(self, doc_id):
        """Returns the content of a document given its ID."""
        return self.documents.get(doc_id, "Document not found.")

# --- Demonstration ---
if __name__ == "__main__":
    cl_ir_system = CrossLingualIRSystem()

    # User query in English
    query1 = "brown fox"
    matching_docs1 = cl_ir_system.search(query1, source_language="en", target_languages=["en", "fr", "es"])
    print(f"\nDocuments matching '{query1}': {matching_docs1}")
    for doc_id in matching_docs1:
        print(f"  Content of {doc_id}: {cl_ir_system.get_document_content(doc_id)}")

    query2 = "cat sleeps"
    matching_docs2 = cl_ir_system.search(query2, source_language="en", target_languages=["en", "fr", "es"])
    print(f"\nDocuments matching '{query2}': {matching_docs2}")
    for doc_id in matching_docs2:
        print(f"  Content of {doc_id}: {cl_ir_system.get_document_content(doc_id)}")

    query3 = "lazy dog"
    matching_docs3 = cl_ir_system.search(query3, source_language="en", target_languages=["en", "fr", "es"])
    print(f"\nDocuments matching '{query3}': {matching_docs3}")
    for doc_id in matching_docs3:
        print(f"  Content of {doc_id}: {cl_ir_system.get_document_content(doc_id)}")

    # Example: Query in French
    query4 = "renard rapide" # "quick fox" in French
    matching_docs4 = cl_ir_system.search(query4, source_language="fr", target_languages=["en", "fr", "es"])
    print(f"\nDocuments matching '{query4}' (from French query): {matching_docs4}")
    for doc_id in matching_docs4:
        print(f"  Content of {doc_id}: {cl_ir_system.get_document_content(doc_id)}")


--- Question 10: Searching for 'brown fox' (Source: en) ---
  Query translated to en: brown fox
  Query translated to fr: brun renard
  Query translated to es: marrón zorro

Documents matching 'brown fox': ['en_doc1', 'es_doc1', 'fr_doc1']
  Content of en_doc1: The quick brown fox jumps over the lazy dog.
  Content of es_doc1: El rápido zorro marrón salta sobre el perro perezoso.
  Content of fr_doc1: Le rapide renard brun saute par-dessus le chien paresseux.

--- Question 10: Searching for 'cat sleeps' (Source: en) ---
  Query translated to en: cat sleeps
  Query translated to fr: chat dort
  Query translated to es: gato duerme

Documents matching 'cat sleeps': ['en_doc2', 'es_doc2', 'fr_doc2']
  Content of en_doc2: A cat sleeps on the mat.
  Content of es_doc2: Un gato duerme en la alfombra.
  Content of fr_doc2: Un chat dort sur le tapis.

--- Question 10: Searching for 'lazy dog' (Source: en) ---
  Query translated to en: lazy dog
  Query translated to fr: paresseux chien
  Query 