# Transliteration of Sanskirt into Roman letters

In [1]:
#
# unicode source : https://unicode.org/charts/PDF/U0900.pdf
#

import re

vowels = {"\u0905":"a", "\u0906":"A","\u0907":"i","\u0908":"I","\u0909":"u",
         "\u090A":"U","\u090F":"e","\u0910":"ai","\u0913":"o","\u0914":"au",
         "\u090B":"R","\u0960":"RR","\u090C":"lR","\u0961":"lRR"}

consonants = {"\u0915":"ka","\u0916":"kha","\u0917":"ga","\u0918":"gha","\u0919":"G",
             "\u091A":"ca","\u091B":"cha","\u091C":"ja","\u091D":"jha","\u091E":"Ja",
             "\u091F":"Ta","\u0920":"Tha","\u0921":"Da","\u0922":"Dha","\u0923":"Na",
              "\u0924":"ta","\u0925":"tha","\u0926":"da","\u0927":"dha","\u0928":"na",
             "\u092A":"pa","\u092B":"pha","\u092C":"ba","\u092D":"bha","\u092E":"ma",
             "\u092F":"ya","\u0930":"ra","\u0931":"la","\u0932":"la","\u0935":"va","\u0936":"za",
             "\u0937":"Sa","\u0938":"sa","\u0939":"ha"}

vowel_signs = {"\u093E":"A","\u093F":"i","\u0940":"I","\u0941":"u",
                "\u0942":"U","\u0943":"R","\u0944":"RR","\u0947":"e",
               "\u0948":"ai","\u094B":"o","\u094C":"au","\u0962":"lR",
               "\u0963":"lRR","\u0902":"M","\u0903":"H","\u094F":"ॏ"}

special_char = {"\u0945":"ॅ","\u0946":"ॆ","\u0951" :"ऺ","\u0964":"।",
               "\u0965":"॥","\u093D":"'"}

numbers = {"\u0966":"1","\u0967":"2","\u0968":"3","\u096A":"4","\u096B":"5",
          "\u096C":"6","\u096D":"7","\u096E":"8","\u096F":"9",}

halant = "\u094D"

In [2]:
# Sanskrit text used is Adiparvam from Mahabharata
# Source : http://bombay.indology.info/mahabharata/welcome.html

with open("Adiparvan.txt") as f:
    txt = f.readlines()

comments = re.compile(r"^%")
ascii_code = re.compile(r"(^[0-9]+[a-zA-Z]*)")



In [3]:

with open("AdiparvanTransliterated.txt","w",encoding = "utf-8") as f:
    for line in txt:
        
        if comments.match(line): #ignoring comments
            f.write(line)
            
            
        else:
            sentence = ascii_code.sub("",line) #removing ascii code
            
            
            trans = []
            line_length = len(sentence)
            j = 0
            while j < line_length:
                c = sentence[j]
                
                if c in vowels:
                    trans.append(vowels[c])
                    
                elif c in consonants:
                    
                    '''
                       To prevent cases like
                       नारायणं = naAraAyaNaM instead of nArAyaNaM
                    '''
                    if j+1 < line_length and sentence[j+1] not in  vowel_signs:
                        trans.append(consonants[c])
                    else:
                        trans.append(consonants[c].strip("a"))
                    
                elif c in vowel_signs:
                    trans.append(vowel_signs[c])
                    
                elif c in special_char:
                    trans.append(special_char[c])
                    
                elif c in numbers:
                    trans.append(numbers[c])
                    
                elif c == halant:
                    c = trans.pop()
                    trans.append(c.strip("a"))
                    
                else:
                    trans.append(c)
                j += 1
            f.write("".join(trans))
                    
                    
            
            