In [1]:
!pip install indic-transliteration

Collecting indic-transliteration
  Downloading indic_transliteration-2.3.68-py3-none-any.whl.metadata (1.4 kB)
Collecting backports.functools-lru-cache (from indic-transliteration)
  Downloading backports.functools_lru_cache-2.0.0-py2.py3-none-any.whl.metadata (3.5 kB)
Collecting roman (from indic-transliteration)
  Downloading roman-4.2-py3-none-any.whl.metadata (3.6 kB)
Downloading indic_transliteration-2.3.68-py3-none-any.whl (155 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m155.3/155.3 kB[0m [31m6.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading backports.functools_lru_cache-2.0.0-py2.py3-none-any.whl (6.7 kB)
Downloading roman-4.2-py3-none-any.whl (5.5 kB)
Installing collected packages: roman, backports.functools-lru-cache, indic-transliteration
Successfully installed backports.functools-lru-cache-2.0.0 indic-transliteration-2.3.68 roman-4.2


In [2]:
!pip install --upgrade googletrans==4.0.0-rc1

Collecting googletrans==4.0.0-rc1
  Downloading googletrans-4.0.0rc1.tar.gz (20 kB)
  Preparing metadata (setup.py) ... [?25l- \ | done
[?25hCollecting httpx==0.13.3 (from googletrans==4.0.0-rc1)
  Downloading httpx-0.13.3-py3-none-any.whl.metadata (25 kB)
Collecting hstspreload (from httpx==0.13.3->googletrans==4.0.0-rc1)
  Downloading hstspreload-2024.11.1-py3-none-any.whl.metadata (2.1 kB)
Collecting chardet==3.* (from httpx==0.13.3->googletrans==4.0.0-rc1)
  Downloading chardet-3.0.4-py2.py3-none-any.whl.metadata (3.2 kB)
Collecting idna==2.* (from httpx==0.13.3->googletrans==4.0.0-rc1)
  Downloading idna-2.10-py2.py3-none-any.whl.metadata (9.1 kB)
Collecting rfc3986<2,>=1.3 (from httpx==0.13.3->googletrans==4.0.0-rc1)
  Downloading rfc3986-1.5.0-py2.py3-none-any.whl.metadata (6.5 kB)
Collecting httpcore==0.9.* (from httpx==0.13.3->googletrans==4.0.0-rc1)
  Downloading httpcore-0.9.1-py3-none-any.whl.metadata (4.6 kB)
Collecting h11<0.10,>=0.8 (from httpcore=

In [3]:
from googletrans import Translator
import random
import spacy
from indic_transliteration import sanscript
from indic_transliteration.sanscript import transliterate

# Initialize the Translator object
translator = Translator()

# Load the spaCy English model
nlp = spacy.load("en_core_web_sm")

def translate_to_hindi(input_sentence):
    """Translate English sentence to Hindi."""
    try:
        translated = translator.translate(input_sentence, src='en', dest='hi')
        return translated.text
    except Exception as e:
        print("Error in translation:", e)
        return None

def transliterate_to_english(hindi_sentence):
    """Transliterate Hindi sentence to English using indic-transliteration."""
    try:
        # Use indic-transliteration for better Hindi to English transliteration
        transliterated = transliterate(hindi_sentence, sanscript.DEVANAGARI, sanscript.ITRANS)
        return transliterated
    except Exception as e:
        print("Error in transliteration:", e)
        return None

def pos_tagging(input_sentence):
    """POS tagging to identify word types (e.g., noun, verb)."""
    doc = nlp(input_sentence)
    return [(token.text, token.pos_) for token in doc]

def create_code_switched_sentence(input_english, transliterated_english):
    """Create a contextually accurate code-switched sentence."""
    input_words = input_english.split()
    transliterated_words = transliterated_english.split()

    # POS tagging to get the word types
    input_tags = pos_tagging(input_english)
    
    mixed_sentence = []
    
    for i in range(max(len(input_words), len(transliterated_words))):
        if i < len(input_words):
            word, tag = input_tags[i]
            switch_chance = random.random()

            # Criteria for switching:
            # - For nouns or adjectives, switch to Hindi
            # - For verbs, keep in English (to maintain syntax)
            if (tag in ['NOUN', 'ADJ'] and switch_chance > 0.3) or (tag == 'PROPN' and switch_chance > 0.5):
                # Pick a word from transliterated sentence if criteria is met
                if i < len(transliterated_words):
                    mixed_sentence.append(transliterated_words[i])
                else:
                    mixed_sentence.append(word)
            else:
                mixed_sentence.append(word)

        # In case the transliterated words are shorter, just add remaining words
        if i >= len(input_words) and i < len(transliterated_words):
            mixed_sentence.append(transliterated_words[i])

    return ' '.join(mixed_sentence)

# Main function
def main():
    # Step 1: Input sentence in English
    input_sentence = input("Enter a sentence in English: ")
    
    # Step 2: Translate to Hindi
    hindi_sentence = translate_to_hindi(input_sentence)
    if not hindi_sentence:
        return

    print("Hindi translation:", hindi_sentence)
    
    # Step 3: Transliterate Hindi back to English
    transliterated_english = transliterate_to_english(hindi_sentence)
    if not transliterated_english:
        return
    
    print("Transliterated English:", transliterated_english)
    
    # Step 4: Create contextually accurate code-switched sentence
    code_switched_sentence = create_code_switched_sentence(input_sentence, transliterated_english)
    print("Code-switched sentence:", code_switched_sentence)

# Run the main function
if __name__ == "__main__":
    main()

StdinNotImplementedError: raw_input was called, but this frontend does not support input requests.

In [None]:


!pip install spacy
!python -m spacy download en_core_web_sm
!python -m spacy download xx_ent_wiki_sm

In [None]:
import random
import spacy

# Load spaCy models for English and Hindi
nlp_en = spacy.load("en_core_web_sm")
nlp_hi = spacy.load("xx_ent_wiki_sm")  # Using a multilingual model for Hindi

# Sample bilingual word alignment (bwa) dictionary (English -> Hindi)
bwa = {
    'market': 'बाजार',
    'fruits': 'फल',
    'buy': 'खरीदना',
    'going': 'जा रहा हूँ'
}

# Example Input sentences (Matrix Language: English, Embedded Language: Hindi)
ms = "I am going to the market to buy some fruits."
es = "मैं बाजार जा रहा हूँ फल खरीदने के लिए।"

# Sample POS tagging, NER, and Parse Tree structure (simplified)
# In practice, this would be generated by parsing tools (spaCy or Stanford Parser)
pt = [
    ('I', 'PRON'),
    ('am', 'AUX'),
    ('going', 'VERB'),
    ('to', 'ADP'),
    ('the', 'DET'),
    ('market', 'NOUN'),
    ('to', 'PART'),
    ('buy', 'VERB'),
    ('some', 'DET'),
    ('fruits', 'NOUN')
]
pos = [('I', 'PRON'), ('am', 'AUX'), ('going', 'VERB'), ('to', 'ADP'), ('the', 'DET'), ('market', 'NOUN'),
       ('to', 'PART'), ('buy', 'VERB'), ('some', 'DET'), ('fruits', 'NOUN')]
ner = [('market', 'GPE'), ('fruits', 'FOOD')]  # Example: NER tags (e.g., GPE for location, FOOD for fruits)

def is_switchable(node, pos_tag):
    """Determine if a word can be switched according to POS tag and content word."""
    # Switch only nouns, verbs, and adjectives
    switchable_tags = ['NOUN', 'VERB', 'ADJ']
    if pos_tag in switchable_tags:
        return True
    return False

def generate_code_switched_sentence(bwa, ms, es, pt, pos, ner):
    """Generate synthetic code-switched sentence based on the algorithm."""
    # Convert ms (English) to list of words
    ms_words = ms.split()

    # Step 1: Handle Named Entities (NER)
    for entity, entity_type in ner:
        if entity in es:  # Check if translation exists in es (Embedded Language)
            ms = ms.replace(entity, entity)  # Replace with the translated word (assumed to be available)

    # Step 2: Process parsing tree nodes and POS tagging
    code_switched_sentence = []
    for idx, (word, tag) in enumerate(pt):
        switch_label = False

        # Step 6: Switch based on Matrix Language Theory (content words only)
        if is_switchable(word, tag):
            switch_label = True

        # Step 7-12: Adjust switch_label based on POS
        if switch_label:
            # If lexicality is not in {noun, adjective, verb}, do not switch
            if tag not in ['NOUN', 'ADJ', 'VERB']:
                switch_label = False

        # Step 13: Check if word in bilingual word alignment and apply translation
        if switch_label:
            if word in bwa:  # Check if word exists in bilingual word alignment (bwa)
                # Replace word in ms with the translation from es
                ms_words[idx] = bwa.get(word, word)

        code_switched_sentence.append(ms_words[idx])

    # Return the final sentence
    return ' '.join(code_switched_sentence)

# Main function to run the algorithm
def main():
    code_switched_sentence = generate_code_switched_sentence(bwa, ms, es, pt, pos, ner)
    print("Code-switched Sentence:", code_switched_sentence)

if __name__ == "__main__":
    main()

In [None]:
import spacy
from googletrans import Translator
import random

# Load spaCy model for English
nlp_en = spacy.load("en_core_web_sm")

# Initialize the Translator for translation and transliteration
translator = Translator()

# Example Input sentence (Matrix Language: English, Embedded Language: Hindi)
ms = "I am going to the market to buy some fruits."
es = "मैं बाजार जा रहा हूँ फल खरीदने के लिए।"

# Example bilingual word alignment (BWA) - here, English -> Hindi transliterations
# Note: In practice, this can be fetched using a bilingual dictionary or translation model
bwa = {
    'market': 'bazar',
    'fruits': 'phal',
    'buy': 'khareedna',
    'going': 'ja raha hoon'
}

# Generate synthetic code-switched sentence
def is_switchable(word, pos_tag):
    """Determine if a word is switchable based on its POS tag."""
    switchable_tags = ['NOUN', 'VERB', 'ADJ']  # Only nouns, verbs, adjectives can be switched
    return pos_tag in switchable_tags

def transliterate_to_english(hindi_word):
    """Transliterate Hindi words to English using Google Translator."""
    try:
        translated = translator.translate(hindi_word, src='hi', dest='en')
        return translated.text
    except Exception as e:
        print(f"Error in transliteration: {e}")
        return hindi_word  # Return the original word if there's an error

def generate_code_switched_sentence(bwa, ms, es, nlp_model, translator):
    """Generate synthetic code-switched sentence based on the algorithm."""
    # Parse the English sentence using spaCy to get POS, NER, and parse tree structure
    doc = nlp_model(ms)
    
    # Convert ms (Matrix Language: English) to list of words
    ms_words = ms.split()
    switched_words = []
    
    # Step 1: Handle Named Entities (NER) (simplified assumption)
    for ent in doc.ents:
        if ent.text.lower() in bwa:  # Check if translation exists in BWA
            # Replace entity in ms with its transliterated version from BWA
            ms_words = [bwa.get(word.lower(), word) for word in ms_words]
    
    # Step 2: Process parsing tree nodes and POS tagging for code-switching
    for word, tag in zip(ms_words, [token.pos_ for token in doc]):
        switch_label = False
        
        # Step 6: Switch based on Matrix Language Theory (content words only)
        if is_switchable(word, tag):
            switch_label = True
        
        # Step 7-12: Adjust switch_label based on POS (Only content words allowed to switch)
        if switch_label:
            # If word exists in BWA, replace it with transliteration
            if word.lower() in bwa:
                transliterated_word = transliterate_to_english(bwa.get(word.lower(), word))
                switched_words.append(transliterated_word)
            else:
                switched_words.append(word)
        else:
            switched_words.append(word)
    
    # Return the final sentence
    return ' '.join(switched_words)

# Main function to run the algorithm
def main():
    code_switched_sentence = generate_code_switched_sentence(bwa, ms, es, nlp_en, translator)
    print("Code-switched Sentence:", code_switched_sentence)

if __name__ == "__main__":
    main()

In [None]:
import spacy
from indic_transliteration import sanscript
from indic_transliteration.sanscript import transliterate

# Load spaCy model for English
nlp = spacy.load("en_core_web_sm")

# Function to check if the word should be replaced based on its POS
def should_switch(word, pos_tag):
    """Only switch nouns, verbs, and adjectives."""
    switchable_tags = ['NOUN', 'VERB', 'ADJ']
    return pos_tag in switchable_tags

# Function to transliterate English word to Hindi (Roman script)
def transliterate_word(word):
    """Transliterate English word to Hindi in Roman script."""
    try:
        # Transliterate using indic-transliteration library
        transliterated_word = transliterate(word, sanscript.IAST, sanscript.DEVANAGARI)
        return transliterated_word
    except Exception as e:
        print(f"Error during transliteration: {e}")
        return word  # Return the original word if error occurs

# Function to generate a code-switched sentence using BWA and POS tagging
def generate_code_switched_sentence(input_sentence):
    # Process the input sentence using spaCy
    doc = nlp(input_sentence)
    
    # List to hold the words of the code-switched sentence
    code_switched_words = []
    
    # Iterate through each word in the sentence with its POS tag
    for token in doc:
        word = token.text
        pos_tag = token.pos_
        
        # Apply code-switching only to nouns, verbs, and adjectives
        if should_switch(word, pos_tag):
            transliterated_word = transliterate_word(word)
            code_switched_words.append(transliterated_word)
        else:
            # Keep non-switchable words (e.g., conjunctions, prepositions) in English
            code_switched_words.append(word)
    
    # Return the modified sentence with code-switching
    return ' '.join(code_switched_words)

# Main function
def main():
    # Input sentence in English (Matrix Language)
    input_sentence = input("Enter an English sentence: ")
    
    # Generate the code-switched sentence
    code_switched_sentence = generate_code_switched_sentence(input_sentence)
    
    # Output the result
    print("Code-switched Sentence:", code_switched_sentence)

if __name__ == "__main__":
    main()

In [None]:
!pip install ai4bharat-transliteration

# Translation

In [None]:
input_sentence= input("write a sentence")
translator = Translator()
try:
    translated = translator.translate(input_sentence, src='en', dest='hi')
    print(translated.text)
except Exception as e:
    print("Error in translation:", e)
    


# Transliteration


In [None]:
translator = Translator()
try:
    translated = translator.translate(input_sentence, src='en', dest='hi')
    print(translated.text)
except Exception as e:
    print("Error in translation:", e)
from ai4bharat.transliteration import XlitEngine
e = XlitEngine(src_script_type="indic", beam_width=10, rescore=False)
out = e.translit_sentence(translated.text, lang_code="hi")
print(out)


In [None]:
print(input_sentence + "--------->" +out)

In [None]:
import spacy
import random

# Load the spaCy multilingual model that can handle Hindi as well as English transliteration
nlp_en = spacy.load('en_core_web_sm')  # English model
nlp_hi = spacy.load('xx_ent_wiki_sm')  # Multilingual model for Hindi transliteration

# Example Hindi to English dictionary (expand this as needed)
hindi_to_english_dict = {
    "bajar": "market",
    "phal": "fruits",
    "khareedane": "buy",
    "liye": "for",
    "jaa": "go",
    "rahahaa": "going",
    "huun": "am",
    "kuchh": "some",
}

def contextual_code_switch(input_sentence, transliterated_sentence):
    # Tokenize and POS tag both sentences (input: English, output: Hindi transliterated)
    input_doc = nlp_en(input_sentence)  # POS tag for English
    transliterated_doc = nlp_hi(transliterated_sentence)  # POS tag for Hindi transliterated
    
    input_tokens = input_sentence.split()
    transliterated_tokens = transliterated_sentence.split()

    # Initialize an empty list for the code-switched sentence
    code_switched_sentence = []

    # Max length to ensure we do not go out of bounds
    max_len = max(len(input_tokens), len(transliterated_tokens))

    # Iterate over the tokens to blend the sentences
    for i in range(max_len):
        if i < len(input_tokens) and i < len(transliterated_tokens):
            # Get the POS tag of the current word in the original sentence and transliterated sentence
            current_pos_tag_input = input_doc[i].pos_ if i < len(input_doc) else None
            current_pos_tag_transliterated = transliterated_doc[i].pos_ if i < len(transliterated_doc) else None

            # Decision-making for code-switching based on POS tags and the dictionary
            if current_pos_tag_input in ['VERB', 'AUX', 'ADP', 'ADV']:  # Keep verbs and auxiliaries in English
                code_switched_sentence.append(input_tokens[i])  # Keep in English
            elif current_pos_tag_input in ['NOUN', 'PROPN']:  # Nouns can be switched
                # For nouns, check if there's a mapping in the dictionary
                word_in_dict = hindi_to_english_dict.get(transliterated_tokens[i], None)
                if word_in_dict:
                    code_switched_sentence.append(word_in_dict)  # Map to English if present in the dictionary
                else:
                    code_switched_sentence.append(transliterated_tokens[i])  # Otherwise, keep the transliterated word
            elif current_pos_tag_input in ['DET', 'PRON']:  # Determiners and pronouns often stay in English
                code_switched_sentence.append(input_tokens[i])
            else:
                # For adjectives, adverbs, and conjunctions, we randomly decide based on dictionary mapping
                if random.random() < 0.5:
                    code_switched_sentence.append(input_tokens[i])
                else:
                    word_in_dict = hindi_to_english_dict.get(transliterated_tokens[i], None)
                    if word_in_dict:
                        code_switched_sentence.append(word_in_dict)
                    else:
                        code_switched_sentence.append(transliterated_tokens[i])

        elif i < len(input_tokens):
            # If only the input has remaining tokens
            code_switched_sentence.append(input_tokens[i])
        elif i < len(transliterated_tokens):
            # If only the transliterated has remaining tokens
            word_in_dict = hindi_to_english_dict.get(transliterated_tokens[i], None)
            if word_in_dict:
                code_switched_sentence.append(word_in_dict)
            else:
                code_switched_sentence.append(transliterated_tokens[i])

    # Join the tokens to form the final sentence
    return ' '.join(code_switched_sentence)


# Example usage
input_sentence = "i am going to market to buy some fruits"
out = "main kuchh phal khareedane key liye bajar jaa rahaa huun"

# Call the function to get the code-switched sentence
result = contextual_code_switch(input_sentence, out)
print(result)

In [None]:
import spacy
import random

class CustomDictionaryCodeSwitcher:
    def __init__(self, custom_dictionary=None):
        # Load English NLP model
        self.nlp = spacy.load("en_core_web_sm")
        
        # Use provided dictionary or default to empty
        self.word_mapping = custom_dictionary or {}
    
    def generate_code_switched_sentence(self, matrix_sentence, switch_probability=0.5):
        """
        Generate a code-switched sentence using custom dictionary
        
        Args:
        matrix_sentence (str): Original sentence to be code-switched
        switch_probability (float): Probability of switching a word
        
        Returns:
        str: Code-switched sentence
        """
        # Parse matrix language sentence
        doc = self.nlp(matrix_sentence)
        
        # Create the code-switched sentence
        code_switched_tokens = []
        for token in doc:
            # Check if token exists in mapping and should be switched
            if (token.text in self.word_mapping and 
                random.random() < switch_probability):
                code_switched_tokens.append(self.word_mapping[token.text])
            else:
                code_switched_tokens.append(token.text)
        
        return ' '.join(code_switched_tokens)

# Example usage
def main():
    # Set random seed for reproducibility
    random.seed(42)
    
    # Custom dictionary for code-switching
    custom_dict = {
        "market": "bajar",
        "fruits": "phal",
        "going": "jaa",
        "buying": "khareedne",
        "to": "ko",
        "some": "kuchh"
    }
    
    # Input sentence
    matrix_sentence = "i am going to the market for buying some fruits"
    
    # Initialize code-switch generator with custom dictionary
    code_switcher = CustomDictionaryCodeSwitcher(custom_dict)
    
    # Generate code-switched sentence
    code_switched = code_switcher.generate_code_switched_sentence(matrix_sentence)
    
    print("Original Sentence:", matrix_sentence)
    print("Code-Switched Sentence:", code_switched)
    print("\nUsed Dictionary:", custom_dict)

# Uncomment to run
main()

In [None]:
import spacy
import random
import re

class EnhancedCodeSwitcher:
    def __init__(self, custom_dictionary=None, nlp_model="en_core_web_sm"):
        """
        Initialize the code-switching generator
        
        Args:
        custom_dictionary (dict): Custom word mapping
        nlp_model (str): SpaCy language model to use
        """
        # Load NLP model
        try:
            self.nlp = spacy.load(nlp_model)
        except OSError:
            print(f"Warning: Model {nlp_model} not found. Please download it.")
            raise
        
        # Initialize dictionary
        self.word_mapping = custom_dictionary or {}
        
        # Additional linguistic features
        self.pos_switch_preferences = {
            'NOUN': 0.7,     # High preference for switching nouns
            'VERB': 0.6,     # Moderate preference for verbs
            'ADJ': 0.5,      # Moderate preference for adjectives
            'PROPN': 0.4,    # Low preference for proper nouns
            'ADV': 0.3,      # Low preference for adverbs
            'default': 0.2   # Low preference for other parts of speech
        }
    
    def _preprocess_sentence(self, sentence):
        """
        Preprocess sentence to handle special cases
        
        Args:
        sentence (str): Input sentence
        
        Returns:
        str: Preprocessed sentence
        """
        # Remove extra whitespaces
        sentence = re.sub(r'\s+', ' ', sentence).strip()
        return sentence
    
    def _get_pos_switch_probability(self, token):
        """
        Determine switching probability based on part of speech
        
        Args:
        token (spacy.tokens.Token): SpaCy token
        
        Returns:
        float: Probability of switching
        """
        return self.pos_switch_preferences.get(token.pos_, 
                                               self.pos_switch_preferences['default'])
    
    def generate_code_switched_sentence(
        self, 
        matrix_sentence, 
        global_switch_probability=0.5, 
        context_aware=True
    ):
        """
        Generate a sophisticated code-switched sentence
        
        Args:
        matrix_sentence (str): Original sentence to be code-switched
        global_switch_probability (float): Overall switching likelihood
        context_aware (bool): Enable context-aware switching
        
        Returns:
        str: Code-switched sentence
        """
        # Preprocess sentence
        matrix_sentence = self._preprocess_sentence(matrix_sentence)
        
        # Parse matrix language sentence
        doc = self.nlp(matrix_sentence)
        
        # Create the code-switched sentence
        code_switched_tokens = []
        
        for token in doc:
            # Determine switching probability
            pos_switch_prob = (
                self._get_pos_switch_probability(token) * 
                global_switch_probability
            )
            
            # Context-aware switching
            if (context_aware and 
                token.text in self.word_mapping and 
                random.random() < pos_switch_prob):
                
                # Switch the word
                switched_word = self.word_mapping[token.text]
                code_switched_tokens.append(switched_word)
            else:
                # Keep original word
                code_switched_tokens.append(token.text)
        
        return ' '.join(code_switched_tokens)
    
    def add_word_mapping(self, new_mappings):
        """
        Add new word mappings to the existing dictionary
        
        Args:
        new_mappings (dict): Additional word mappings
        """
        self.word_mapping.update(new_mappings)
    
    def get_word_mappings(self):
        """
        Retrieve current word mappings
        
        Returns:
        dict: Current word mappings
        """
        return self.word_mapping

# Example usage and demonstration
def main():
    # Set random seed for reproducibility
    random.seed(420)
    
    # Comprehensive dictionary for code-switching
    code_switch_dict = {
        # Nouns
        "market": "bajar",
        "fruits": "phal",
        "home": "ghar",
        "books": "kitab",
        
        # Verbs
        "going to": "jaa rahahu",
        "buy": "khareedne",
        "read": "padhna",
        
        # Adjectives
        "good": "achcha",
        "big": "bada",
        
        # Prepositions and other words
        "to": "ko",
        "some": "kuchh",
        "very": "bahut"
    }
    
    # Initialize code-switch generator
    code_switcher = EnhancedCodeSwitcher(code_switch_dict)
    
    # Test sentences
    test_sentences = [
        "i am going to market to buy some fruits, but I also have to buy books",
        "i have a good book at home",
        "he is reading a very big book"
    ]
    
    # Generate code-switched sentences
    print("Code-Switched Sentences:")
    for sentence in test_sentences:
        code_switched = code_switcher.generate_code_switched_sentence(
            sentence, 
            global_switch_probability=0.8
        )
        print(f"Original: {sentence}")
        print(f"Switched: {code_switched}\n")
    
    # Demonstrate adding new mappings
    code_switcher.add_word_mapping({
        "school": "school",
        "computer": "computer"
    })

# Uncomment to run


main()

In [None]:
import spacy
import random

class AlgorithmicCodeSwitcher:
    def __init__(self, word_alignments=None):
        """
        Initialize the code-switching generator
        
        Args:
        word_alignments (dict): Bilingual word alignment dictionary
        """
        # Load English and multi-language NLP models
        self.nlp_en = spacy.load("en_core_web_sm")
        
        # Word alignment dictionary
        self.word_alignments = word_alignments or {}
    
    def _is_switchable_node(self, token):
        """
        Determine if a node is switchable based on Matrix Language Theory
        
        Args:
        token (spacy.tokens.Token): SpaCy token
        
        Returns:
        bool: Whether the token is switchable
        """
        # Switchable parts of speech
        switchable_pos = ['NOUN', 'ADJ', 'VERB', 'PROPN']
        
        # Check if token's POS is in switchable list
        return token.pos_ in switchable_pos
    
    def generate_code_switched_sentence(
        self, 
        matrix_sentence, 
        embedded_sentence
    ):
        """
        Generate a code-switched sentence using algorithmic approach
        
        Args:
        matrix_sentence (str): Input sentence in matrix language
        embedded_sentence (str): Input sentence in embedded language
        
        Returns:
        str: Code-switched sentence
        """
        # Step 1: Parse matrix language sentence
        ms_doc = self.nlp_en(matrix_sentence)
        
        # Tokenize embedded sentence
        embedded_tokens = embedded_sentence.split()
        
        # Initialize output tokens
        code_switched_tokens = []
        
        # Step 2: Named Entity Replacement
        for token in ms_doc:
            # Check if token is a Named Entity and has a translation
            if (token.ent_type_ and 
                token.text in self.word_alignments):
                # Replace Named Entity
                code_switched_tokens.append(
                    self.word_alignments[token.text]
                )
            else:
                code_switched_tokens.append(token.text)
        
        # Step 3: Parsing Tree and Switchability Analysis
        for i, token in enumerate(ms_doc):
            # Check if node is switchable
            if self._is_switchable_node(token):
                # Step 4: Word Alignment Replacement
                if (token.text in self.word_alignments and 
                    i < len(embedded_tokens)):
                    # Replace with aligned word or embedded language token
                    replacement = (
                        self.word_alignments.get(token.text, 
                        embedded_tokens[i])
                    )
                    code_switched_tokens[i] = replacement
        
        return ' '.join(code_switched_tokens)

# Example usage
def main():
    # Bilingual word alignment dictionary
    word_alignments = {
        # Named Entities
        "John": "John",
        "New York": "New York",
        
        # Other word mappings
        "market": "Bajar",
        "going": "ja rahahu",
        "buy": "kharedne",
        "fruits": "phal",
        
    }
    
    # Initialize code-switch generator
    code_switcher = AlgorithmicCodeSwitcher(word_alignments)
    
    # Test sentences
    matrix_sentence = "i am going to market to buy some fruits"
    embedded_sentence = "main kuchh phal khareedane key liye bajar jaa rahaa huun"
    
    # Generate code-switched sentence
    code_switched = code_switcher.generate_code_switched_sentence(
        matrix_sentence, 
        embedded_sentence
    )
    
    print("Matrix Language Sentence:", matrix_sentence)
    print("Embedded Language Sentence:", embedded_sentence)
    print("Code-Switched Sentence:", code_switched)

# Uncomment to run
main()

In [None]:
import spacy
import random

class NaturalCodeSwitcher:
    def __init__(self, word_alignments=None, switch_probability=0.3):
        """
        Initialize the natural code-switching generator.
        
        Args:
        word_alignments (dict): Bilingual word alignment dictionary.
        switch_probability (float): Probability of switching eligible words.
        """
        # Load English NLP model
        self.nlp_en = spacy.load("en_core_web_sm")
        
        # Word alignment dictionary
        self.word_alignments = word_alignments or {}
        
        # Switching probability
        self.switch_probability = switch_probability

    def _is_switchable_node(self, token):
        """
        Determine if a node is switchable based on Matrix Language Theory.
        
        Args:
        token (spacy.tokens.Token): SpaCy token.
        
        Returns:
        bool: Whether the token is switchable.
        """
        # Switchable parts of speech
        switchable_pos = ['NOUN', 'ADJ', 'VERB', 'PROPN']
        
        # Check if token's POS is in switchable list
        return token.pos_ in switchable_pos and not token.is_stop

    def _switch_token(self, token, embedded_tokens, index):
        """
        Switch a token to the embedded language based on context.
        
        Args:
        token (spacy.tokens.Token): Token to be switched.
        embedded_tokens (list): Tokens in the embedded language.
        index (int): Current token index.
        
        Returns:
        str: Switched or original token.
        """
        if token.text in self.word_alignments:
            # Replace with aligned word
            return self.word_alignments[token.text]
        elif index < len(embedded_tokens):
            # Replace with embedded language token
            return embedded_tokens[index]
        return token.text

    def generate_code_switched_sentence(
        self, 
        matrix_sentence, 
        embedded_sentence
    ):
        """
        Generate a code-switched sentence using an improved algorithm.
        
        Args:
        matrix_sentence (str): Input sentence in matrix language.
        embedded_sentence (str): Input sentence in embedded language.
        
        Returns:
        str: Code-switched sentence.
        """
        # Parse the matrix language sentence
        ms_doc = self.nlp_en(matrix_sentence)
        
        # Tokenize embedded sentence
        embedded_tokens = embedded_sentence.split()
        
        # Initialize output tokens
        code_switched_tokens = []
        
        for i, token in enumerate(ms_doc):
            if self._is_switchable_node(token) and random.random() < self.switch_probability:
                # Attempt to switch token
                switched_token = self._switch_token(token, embedded_tokens, i)
                code_switched_tokens.append(switched_token)
            else:
                # Keep original token
                code_switched_tokens.append(token.text)
        
        # Join tokens into a sentence
        return ' '.join(code_switched_tokens)

# Example usage
def main():
    # Bilingual word alignment dictionary
    word_alignments = {
        "market": "bazar",
        "going": "ja raha hoon",
        "buy": "kharidne",
        "fruits": "phal",
        "some": "kuchh",
        "to": "ke liye",
   "kids": "bachche",
    "are": "hain",
    "playing": "khel rahe",
    "in": "mein",
    "garden": "bagiche",
    "with": "ke saath",
    "friends": "doston",
            "I": "mujhe",
    "love": "pasand",
    "eating": "khana",
    "spicy": "teekha",
    "food": "khana",
    "during": "ke dinon mein",
    "winter": "sardi",
    "she": "woh",
    "is": "hai",
    "reading": "padh rahi",
    "book": "kitaab",
    "in": "mein",
    "park": "park",
            "he": "woh",
    "went": "gaya",
    "to": "ke liye",
    "school": "school",
    "learn": "seekhne",
    "mathematics": "ganit",
         "they": "woh",
    "are": "hain",
    "watching": "dekh rahe",
    "movie": "film",
    "at": "par",
    "home": "ghar",
    "tonight": "aaj raat",
          "we": "hum",
    "are": "hain",
    "going": "ja rahe",
    "to": "par",
    "beach": "samudra tat",
    "enjoy": "maza lene",
    "sunset": "suryast",
        "my": "mere",
    "father": "pita",
    "is": "hain",
    "working": "kaam kar rahe",
    "on": "par",
    "big": "bade",
    "project": "project",
    "in": "mein",
    "office": "daftar",
            "he": "usko",
    "likes": "pasand",
    "traveling": "ghoomna",
    "to": "par",
    "new": "naye",
    "places": "jagahon",
    "meeting": "milna",
    "people": "logon",
          "dog": "kutta",
    "is": "hai",
    "barking": "bhauk raha",
    "loudly": "zor se",
    "near": "ke paas",
    "house": "ghar",
          "she": "woh",
    "wants": "chahti",
    "to": "ke liye",
    "bake": "banana",
    "chocolate": "chocolate",
    "cake": "cake",
    "for": "ke liye",
    "friend": "dost"
}
    
    
    # Initialize code-switch generator
    code_switcher = NaturalCodeSwitcher(word_alignments, switch_probability=0.5)
    
    # Test sentences
    matrix_sentence = "I love eating spicy food during winter. She is reading a book in the park while the kids are playing in the garden with their friends. He went to school to learn mathematics, and my father is working on a big project in his office. They are watching a movie at home tonight. Meanwhile, the dog is barking loudly near the house. We are going to the beach to enjoy the sunset. Later, she wants to bake a chocolate cake for her friend. Tomorrow, I am going to the market to buy some fruits."
    embedded_sentence = "Mujhe tikha khana pasand hai sardi ke dinon mein. Woh park mein ek kitaab padh rahi hai jab bachche bagiche mein apne doston ke saath khel rahe hain. Woh school gaya tha ganit seekhne ke liye, aur mere pita daftar mein ek bade project par kaam kar rahe hain. Woh aaj raat ghar par ek film dekh rahe hain. Isi beech, kutta ghar ke paas zor se bhauk raha hai. Hum samudra tat par suryast ka maza lene ja rahe hain. Baad mein, woh apne dost ke liye ek chocolate cake banana chahti hai. Kal, main bazar ja raha hoon kuchh phal kharidne ke liye.."
    
    # Generate code-switched sentence
    code_switched = code_switcher.generate_code_switched_sentence(
        matrix_sentence, 
        embedded_sentence
    )
    
    print("Matrix Language Sentence:", matrix_sentence)
    print("Embedded Language Sentence:", embedded_sentence)
    print("Code-Switched Sentence:", code_switched)

# Uncomment to run
main()