In [3]:
#Importing dependencies
from io import open
from conllu import parse_incr
import pandas as pd
import itertools
from sqlalchemy import create_engine
from config import db_password
import numpy as np


In [34]:
# Importing from file
file = 'TamilTB.v0.1\data\TamilTB.v0.1.utf8.conll'
data_file = open(file, "r", encoding="utf-8")


In [4]:
#Setting precision of dataframe as 0
pd.set_option('precision', 0)

#Setting Dataframe display to max
pd.set_option('display.max_rows', None)

In [35]:
#Converting conll to a dataframe
columns = ['id', 'form', 'lemma', 'upos', 'xpos', 'feats', 'head', 'Morpheme']
df = pd.DataFrame(columns = columns)
for tokenlist in parse_incr(data_file):
    for word in tokenlist:
        df = df.append({'id': word["id"], 
                        'form' : word["form"],
                        'lemma' : word["lemma"],
                        'upos' : word["upos"],
                        'xpos' : word["xpos"],
                        'feats' : word["feats"],
                        'head' : word["head"]},
                        ignore_index = True
                        )
df.head()

Unnamed: 0,id,form,lemma,upos,xpos,feats,head,Morpheme
0,1,சென்னை,சென்னை,N,NEN-3SN--,"{'Cas': 'N', 'Per': '3', 'Num': 'S', 'Gen': 'N'}",2,
1,2,அருகே,அருகே,P,PP-------,,18,
2,3,ஸ்ரீ,ஸ்ரீ,N,NEN-3SN--,"{'Cas': 'N', 'Per': '3', 'Num': 'S', 'Gen': 'N'}",4,
3,4,பெரும்புதூரில்,பெரும்புதூர்,N,NEL-3SN--,"{'Cas': 'L', 'Per': '3', 'Num': 'S', 'Gen': 'N'}",18,
4,5,கிரீன்,கிரீன்,N,NEN-3SN--,"{'Cas': 'N', 'Per': '3', 'Num': 'S', 'Gen': 'N'}",6,


In [36]:
#Function to extract the morphemes by stripping away the lemma
def morpheme_extract(lemmalist, wordlist):
    if len(lemmalist)<len(wordlist):
        for i in range(len(lemmalist)):
            if (wordlist[i] != lemmalist[i]):
                difference.append(wordlist[i])
        index = i
        for i in range(len(wordlist)):
            if i > index:
                difference.append(wordlist[i])
                morpheme = ''.join(difference)
                row["Morpheme"] = morpheme
    else:
        row["Morpheme"] = ""


In [37]:
# Going through each row in the dataframe and calling the function to strip away the morphemes
for index, row in df.iterrows():
    word = row["form"]
    wordlist = [ch for ch in word] 
    lemma = row["lemma"]
    lemmalist = [ch for ch in lemma] 
    difference = []
    morpheme_extract(lemmalist, wordlist)
    difference[:5]

In [38]:
df.head()

Unnamed: 0,id,form,lemma,upos,xpos,feats,head,Morpheme
0,1,சென்னை,சென்னை,N,NEN-3SN--,"{'Cas': 'N', 'Per': '3', 'Num': 'S', 'Gen': 'N'}",2,
1,2,அருகே,அருகே,P,PP-------,,18,
2,3,ஸ்ரீ,ஸ்ரீ,N,NEN-3SN--,"{'Cas': 'N', 'Per': '3', 'Num': 'S', 'Gen': 'N'}",4,
3,4,பெரும்புதூரில்,பெரும்புதூர்,N,NEL-3SN--,"{'Cas': 'L', 'Per': '3', 'Num': 'S', 'Gen': 'N'}",18,ில்
4,5,கிரீன்,கிரீன்,N,NEN-3SN--,"{'Cas': 'N', 'Per': '3', 'Num': 'S', 'Gen': 'N'}",6,


In [39]:
#Dropping id column
df.drop(columns='id', inplace=True)
df.head()

Unnamed: 0,form,lemma,upos,xpos,feats,head,Morpheme
0,சென்னை,சென்னை,N,NEN-3SN--,"{'Cas': 'N', 'Per': '3', 'Num': 'S', 'Gen': 'N'}",2,
1,அருகே,அருகே,P,PP-------,,18,
2,ஸ்ரீ,ஸ்ரீ,N,NEN-3SN--,"{'Cas': 'N', 'Per': '3', 'Num': 'S', 'Gen': 'N'}",4,
3,பெரும்புதூரில்,பெரும்புதூர்,N,NEL-3SN--,"{'Cas': 'L', 'Per': '3', 'Num': 'S', 'Gen': 'N'}",18,ில்
4,கிரீன்,கிரீன்,N,NEN-3SN--,"{'Cas': 'N', 'Per': '3', 'Num': 'S', 'Gen': 'N'}",6,


In [40]:
#Reading sentences csv 
file_path = "Sentence_index.csv"
sentence_df = pd.read_csv(file_path, encoding="utf-8")
sentence_df.dropna(inplace=True)
sentence_df.drop(columns='id', inplace=True)
sentence_df.reset_index(inplace=True)
sentence_df.head()

Unnamed: 0,index,NoSpaceAfter
0,0,0
1,1,0
2,2,0
3,3,0
4,4,0


In [57]:
#Adding NoSpaceAfter column to dataframe. The 'no_space_after' will be set to 1 if the following token is part of the current token.
#Whenever the splitting takes place this attribute will be set to 1 for the first token.
#For example, The 'no_space_after' attribute for pATukAkkap  will be 1. Whereas the 'no_space_after' attribute for um will be 0.  
df['NoSpaceAfter'] = sentence_df['NoSpaceAfter']
df.head()

Unnamed: 0,form,lemma,upos,xpos,feats,head,Morpheme,NoSpaceAfter
0,சென்னை,சென்னை,N,NEN-3SN--,"{'Cas': 'N', 'Per': '3', 'Num': 'S', 'Gen': 'N'}",2,,0
1,அருகே,அருகே,P,PP-------,,18,,0
2,ஸ்ரீ,ஸ்ரீ,N,NEN-3SN--,"{'Cas': 'N', 'Per': '3', 'Num': 'S', 'Gen': 'N'}",4,,0
3,பெரும்புதூரில்,பெரும்புதூர்,N,NEL-3SN--,"{'Cas': 'L', 'Per': '3', 'Num': 'S', 'Gen': 'N'}",18,ில்,0
4,கிரீன்,கிரீன்,N,NEN-3SN--,"{'Cas': 'N', 'Per': '3', 'Num': 'S', 'Gen': 'N'}",6,,0


In [58]:
#Adding postpositions and clitics to morpheme list
for i in range(0, len(df)):
    if df.loc[i,'NoSpaceAfter']==1:
        if df.loc[i+1,'NoSpaceAfter']==1:
            if df.loc[i+2,'NoSpaceAfter']==1:
                if df.loc[i+3,'NoSpaceAfter']==1:
                    df.loc[i+1, 'Morpheme'] = df.loc[i+1, 'form']
                    df.loc[i+2, 'Morpheme'] = df.loc[i+2, 'form']
                    df.loc[i+3, 'Morpheme'] = df.loc[i+2, 'form']
                    df.loc[i+4, 'Morpheme'] = df.loc[i+2, 'form']
                    df.loc[i+1, 'NoSpaceAfter'] = 2
                    df.loc[i+2, 'NoSpaceAfter'] = 2
                    df.loc[i+3, 'NoSpaceAfter'] = 2
                    df.loc[i+4, 'NoSpaceAfter'] = 2
                    print(i)
                else:
                    df.loc[i+1, 'Morpheme'] = df.loc[i+1, 'form']
                    df.loc[i+2, 'Morpheme'] = df.loc[i+2, 'form']
                    df.loc[i+3, 'Morpheme'] = df.loc[i+2, 'form']
                    df.loc[i+1, 'NoSpaceAfter'] = 2
                    df.loc[i+2, 'NoSpaceAfter'] = 2
                    df.loc[i+3, 'NoSpaceAfter'] = 2
            else:
                df.loc[i+1, 'Morpheme'] = df.loc[i+1, 'form']
                df.loc[i+2, 'Morpheme'] = df.loc[i+2, 'form']
                df.loc[i+1, 'NoSpaceAfter'] = 2
                df.loc[i+2, 'NoSpaceAfter'] = 2
        else:
            df.loc[i+1, 'Morpheme'] = df.loc[i+1, 'form']
            df.loc[i+1, 'NoSpaceAfter'] = 2



1561
2954


In [59]:
#Creating dataframe out of the frequencies of the morphemes
value_counts_df = pd.DataFrame(df["Morpheme"].value_counts())
value_counts_df.reset_index(inplace=True)
value_counts_df = value_counts_df.rename(columns = {'index':'Morpheme', 'Morpheme':'Counts'})
value_counts_df["Counts"][0]=0
value_counts_df.head(30)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Unnamed: 0,Morpheme,Counts
0,,0
1,உம்,234
2,கள்,220
3,யில்,164
4,ம்,141
5,ில்,136
6,ஆக,127
7,ப்,109
8,த்,83
9,ததில்,71


In [60]:
#Merging value counts with the original dataframe
merged_df = pd.merge(df, value_counts_df, on="Morpheme", how = "left")
merged_df.head()

Unnamed: 0,form,lemma,upos,xpos,feats,head,Morpheme,NoSpaceAfter,Counts
0,சென்னை,சென்னை,N,NEN-3SN--,"{'Cas': 'N', 'Per': '3', 'Num': 'S', 'Gen': 'N'}",2,,0,0
1,அருகே,அருகே,P,PP-------,,18,,0,0
2,ஸ்ரீ,ஸ்ரீ,N,NEN-3SN--,"{'Cas': 'N', 'Per': '3', 'Num': 'S', 'Gen': 'N'}",4,,0,0
3,பெரும்புதூரில்,பெரும்புதூர்,N,NEL-3SN--,"{'Cas': 'L', 'Per': '3', 'Num': 'S', 'Gen': 'N'}",18,ில்,0,136
4,கிரீன்,கிரீன்,N,NEN-3SN--,"{'Cas': 'N', 'Per': '3', 'Num': 'S', 'Gen': 'N'}",6,,0,0


In [61]:
# Removing feats
merged_df.drop(columns='feats', inplace=True)
merged_df.head()

Unnamed: 0,form,lemma,upos,xpos,head,Morpheme,NoSpaceAfter,Counts
0,சென்னை,சென்னை,N,NEN-3SN--,2,,0,0
1,அருகே,அருகே,P,PP-------,18,,0,0
2,ஸ்ரீ,ஸ்ரீ,N,NEN-3SN--,4,,0,0
3,பெரும்புதூரில்,பெரும்புதூர்,N,NEL-3SN--,18,ில்,0,136
4,கிரீன்,கிரீன்,N,NEN-3SN--,6,,0,0


In [64]:
merged_df.rename(columns = {"Morpheme":"FormWithoutLemma"}, inplace = True)
merged_df.head()

Unnamed: 0,form,lemma,upos,xpos,head,FormWithoutLemma,NoSpaceAfter,Counts
0,சென்னை,சென்னை,N,NEN-3SN--,2,,0,0
1,அருகே,அருகே,P,PP-------,18,,0,0
2,ஸ்ரீ,ஸ்ரீ,N,NEN-3SN--,4,,0,0
3,பெரும்புதூரில்,பெரும்புதூர்,N,NEL-3SN--,18,ில்,0,136
4,கிரீன்,கிரீன்,N,NEN-3SN--,6,,0,0


In [72]:
#Creating a function to separate a string and store it into the Morpheme Separated column
def separator(input_df):
    input_df['MorphemeSeparated'] = np.nan
    input_df['MorphemeSeparated'] = input_df['MorphemeSeparated'].astype(object)
    for i in range(len(input_df)):
        word = input_df.loc[i, 'form']
        a = [ch for ch in word]
        b = np.empty(1, dtype=object)
        b[0] = a
        input_df.loc[i, 'MorphemeSeparated'] = b
    

In [73]:
separator(merged_df)
merged_df.head()

Unnamed: 0,form,lemma,upos,xpos,head,FormWithoutLemma,NoSpaceAfter,Counts,MorphemeSeparated
0,சென்னை,சென்னை,N,NEN-3SN--,2,,0,0,"[ச, ெ, ன, ், ன, ை]"
1,அருகே,அருகே,P,PP-------,18,,0,0,"[அ, ர, ு, க, ே]"
2,ஸ்ரீ,ஸ்ரீ,N,NEN-3SN--,4,,0,0,"[ஸ, ், ர, ீ]"
3,பெரும்புதூரில்,பெரும்புதூர்,N,NEL-3SN--,18,ில்,0,136,"[ப, ெ, ர, ு, ம, ், ப, ு, த, ூ, ர, ி, ல, ்]"
4,கிரீன்,கிரீன்,N,NEN-3SN--,6,,0,0,"[க, ி, ர, ீ, ன, ்]"


In [74]:
db_string = f"postgres://postgres:{db_password}@indusscript.cljludlfcgoa.us-east-2.rds.amazonaws.com:5432/postgres"

In [75]:
# Sending df to csv and postgres. 
engine = create_engine(db_string)


In [76]:
# Sending df to csv and postgres - Commenting out because code is complete
merged_df.to_sql(name ='completetamil', con=engine)
#merged_df.to_csv('Complete_words_with_morphemes.csv', encoding='utf-8-sig')

In [20]:
# Making noun and verb morpheme dataframe
noun_verb_df = merged_df
## Removing rows without morphemes
for index, row in noun_verb_df.iterrows():
    if row["Morpheme"] == '':
        noun_verb_df.drop(index, inplace=True)

## Removing everything expect nouns and verbs
for index, row in noun_verb_df.iterrows():
    if (row["upos"] != 'N') and (row["upos"] != 'V'):
        noun_verb_df.drop(index, inplace=True)

## Rearranging columns
noun_verb_df = noun_verb_df[['form', 'Morpheme', 'Counts', 'upos', 'xpos', 'NoSpaceAfter']]

## Drop duplicates but keep copies of no space after = 1 morphemes as well
noun_verb_df = noun_verb_df.drop_duplicates(subset = ['Morpheme'])

## Sort by counts
noun_verb_df = noun_verb_df.sort_values(by='Counts', ascending=False)

## Noun and  excluding postpostions
noun_verb_df = noun_verb_df[noun_verb_df['NoSpaceAfter']<2]
noun_verb_df.reset_index(drop=True, inplace=True)
noun_verb_df.head(20)

Unnamed: 0,form,Morpheme,Counts,upos,xpos,NoSpaceAfter
0,அகதிகள்,கள்,220,N,NNN-3PA--,1
1,வகையில்,யில்,164,N,NNL-3SN--,0
2,படும்,ம்,144,V,VR-F3SNPA,0
3,பெரும்புதூரில்,ில்,136,N,NEL-3SN--,0
4,நிறைவேற்றப்,ப்,109,V,Vu-T---AA,0
5,ஆய்வுத்,த்,83,N,NNN-3SN--,0
6,கர்நாடகத்தில்,ததில்,71,N,NEL-3SN--,1
7,எடுக்கப்,க்கப்,69,V,Vu-T---AA,0
8,வந்து,ந்து,63,V,VT-T---AA,0
9,கருத்துக்,க்,62,N,NNN-3SN--,0


In [77]:
# Making postpositions/clitics morpheme list
columns = ['form', 'Counts','upos', 'xpos', 'NoSpaceAfter']
clitics_df = pd.DataFrame(columns = columns)
clitics_df["form"] = merged_df["form"]
clitics_df["Counts"] = merged_df["Counts"]
clitics_df["upos"] = merged_df["upos"]
clitics_df["xpos"] = merged_df["xpos"]
clitics_df["NoSpaceAfter"] = merged_df["NoSpaceAfter"]
clitics_df = clitics_df[clitics_df['NoSpaceAfter']==2]
clitics_df = clitics_df.drop_duplicates(subset = 'form')
clitics_df


Unnamed: 0,form,Counts,upos,xpos,NoSpaceAfter
11,ஆன,69,T,Tg-------,2
23,உள்ளார்,32,V,VR-T3SHAA,2
50,உம்,234,T,Tv-------,2
79,பட்டு,19,V,VT-T---PA,2
105,ப்பட,7,V,VU-T---PA,2
106,உள்ள,27,J,Jd-T----A,2
126,உள்ளனர்,20,V,VR-T3PAAA,2
183,பகுதியில்,1,N,NNL-3SN--,2
195,இலிருந்து,25,P,PP-------,2
200,ஆக,127,P,PP-------,2


In [79]:
#Sending clitics df to sql - Commenting out because code is complete
clitics_df.to_sql(name ='clitics', con=engine)

# Getting Morpheme list for each type of noun and verb

In [23]:
# Creating function to make morphlist with counts
def morphcount(input_df):
    for i in range(len(input_df)):
        word = input_df.loc[i, 'Morpheme']
        a = [ch for ch in word]
        b= []
        n = len(a)
        for num_splits in range(n):
            for splits in itertools.combinations(range(1, n), num_splits):
                splices = zip([0] + list(splits), list(splits) + [n])
                b.append([a[i:j] for i, j in splices])

        df = pd.DataFrame.from_records(b)
        if i == 0:
            morphs = df[i]
        for j in range(len(df.columns)):
            morphs = morphs.append(df[j])
        
        morphs = morphs.map(lambda x: ''.join(x),na_action='ignore')
        morphs.dropna(inplace=True)


In [25]:
#Creating a function to create a morpheme dataframe
def morphemegen(input_df, splitmorph, re):
    i = len(input_df)
    morph = ''.join(splitmorph)
    input_df.loc[i+1, 'MorphemeSeparated'] = splitmorph
    input_df.loc[i+1, 'Morpheme'] = morph
    input_df.loc[i+1, 'xpos Regex'] = re
    

    

In [26]:
#Creating a function to make add counts to a column
def morphcount(input_df):
    value_counts_df = pd.DataFrame(input_df["Morpheme"].value_counts())
    value_counts_df.reset_index(inplace=True)
    value_counts_df = value_counts_df.rename(columns = {'index':'Morpheme', 'Morpheme':'Counts'})
    value_counts_df["Counts"][0]=0
    input_df = pd.merge(input_df, value_counts_df, on="Morpheme", how = "left")
    return(input_df)

In [27]:
#Creating a function to filter merged dataframe based on case
def casefilter(input_df):
    input_df.reset_index(drop=True, inplace=True)
    input_df.drop(columns='Counts', inplace=True)
    input_df.reset_index(drop=True, inplace=True)
    input_df = morphcount(input_df)
    input_df.drop_duplicates(subset=['Morpheme'], inplace=True)
    for index, row in input_df.iterrows():
        if row["Morpheme"] == '':
            input_df.drop(index, inplace=True)
    input_df.reset_index(drop=True, inplace=True)
    input_df = input_df.sort_values(by='Counts', ascending=False)
    separator(input_df)
    input_df.reset_index(drop=True, inplace=True)
    return(input_df)

## Noun - Accusative Case

Singular

In [462]:
#Creating singular noun accusative case morpheme list
re = '(N.A..S...)'
NAS_df = merged_df[merged_df['xpos'].str.contains(rf'{re}')]
NAS_df = casefilter(NAS_df)
NAS_df

  return func(self, *args, **kwargs)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Unnamed: 0,form,lemma,upos,xpos,head,Morpheme,NoSpaceAfter,Counts,MorphemeSeparated
0,தமிழகத்தை,தமிழகம்,N,NEA-3SN--,12,ததை,0,19,"[த, த, ை]"
1,ஜெயலலிதாவை,ஜெயலலிதா,N,NEA-3SH--,15,வை,0,12,"[வ, ை]"
2,சட்டத்தைய்,சட்டம்,N,NNA-3SN--,20,ததைய்,1,11,"[த, த, ை, ய, ்]"
3,பண்டிகையைய்,பண்டிகை,N,NNA-3SN--,3,யைய்,1,9,"[ய, ை, ய, ்]"
4,பாகிஸ்தானைச்,பாகிஸ்தான்,N,NEA-3SN--,10,ைச்,0,5,"[ை, ச, ்]"
5,படிப்பைப்,படிப்பு,N,NNA-3SN--,15,ைப்,0,4,"[ை, ப, ்]"
6,போக்கைக்,போக்கு,N,NNA-3SN--,7,ைக்,0,4,"[ை, க, ்]"
7,தோனியைத்,தோனி,N,NEA-3SH--,4,யைத்,1,4,"[ய, ை, த, ்]"
8,அதிரடிப்படையைச்,அதிரடிப்படை,N,NNA-3SN--,3,யைச்,0,3,"[ய, ை, ச, ்]"
9,பெருமையைப்,பெருமை,N,NNA-3SN--,14,யைப்,0,3,"[ய, ை, ப, ்]"


In [473]:
#Creating separate morpheme dataframe
columns = ['MorphemeSeparated', 'Morpheme', 'xpos Regex', 'Morpheme continued']
morph_df = pd.DataFrame(columns=columns)

In [474]:
#Adding an instance manually
splitmorph = NAS_df.loc[14, 'MorphemeSeparated']
ai = splitmorph[0]
ai

'ை'

In [475]:
#Adding morphemes to the dataframe one by one
splitmorph = NAS_df.loc[14, 'MorphemeSeparated']
morphemegen(morph_df, splitmorph, re )
splitmorph = NAS_df.loc[29, 'MorphemeSeparated']
morphemegen(morph_df, splitmorph, re )
morphemegen(morph_df, ai, re )
morph_df

Unnamed: 0,MorphemeSeparated,Morpheme,xpos Regex,Morpheme continued
1,"[ை, ய, ்]",ைய்,(N.D..S...),
2,"[ய, ை]",யை,(N.D..S...),
3,ை,ை,(N.D..S...),


Plural

In [476]:
#Creating plural noun accusative case morpheme list
re = '(N.A..P...)'
NAP_df = merged_df[merged_df['xpos'].str.contains(rf'{re}')]
NAP_df = casefilter(NAP_df)
NAP_df

  return func(self, *args, **kwargs)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Unnamed: 0,form,lemma,upos,xpos,head,Morpheme,NoSpaceAfter,Counts,MorphemeSeparated
0,நிலையங்களை,நிலையம்,N,NNA-3PN--,16,ஙகளை,0,14,"[ங, க, ள, ை]"
1,வீரர்களைய்,வீரர்,N,NNA-3PA--,29,களைய்,1,11,"[க, ள, ை, ய, ்]"
2,உரிமைகளைப்,உரிமை,N,NNA-3PN--,12,களைப்,0,7,"[க, ள, ை, ப, ்]"
3,குறைகளைக்,குறை,N,NNA-3PN--,16,களைக்,0,5,"[க, ள, ை, க, ்]"
4,அடையாளங்களைப்,அடையாளம்,N,NNA-3PN--,3,ஙகளைப்,0,4,"[ங, க, ள, ை, ப, ்]"
5,உபகரணங்களைய்,உபகரணம்,N,NNA-3PN--,16,ஙகளைய்,1,3,"[ங, க, ள, ை, ய, ்]"
6,வாழ்த்துகளைத்,வாழ்த்து,N,NNA-3PN--,9,களைத்,0,2,"[க, ள, ை, த, ்]"
7,தமிழர்களைச்,தமிழர்,N,NEA-3PA--,13,களைச்,0,2,"[க, ள, ை, ச, ்]"
8,எமெலேக்களை,எமெலே,N,NEA-3PA--,15,க்களை,0,2,"[க, ், க, ள, ை]"
9,குழுவினரைத்,குழுவினர்,N,NNA-3PA--,9,ைத்,0,1,"[ை, த, ்]"


## Noun - Dative Case

Singular

In [477]:
re = '(N.D..S...)'
NDS_df = merged_df[merged_df['xpos'].str.contains(rf'{re}')]
NDS_df = casefilter(NDS_df)
NDS_df

  return func(self, *args, **kwargs)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Unnamed: 0,form,lemma,upos,xpos,head,Morpheme,NoSpaceAfter,Counts,MorphemeSeparated
0,இந்தியாவுக்கு,இந்தியா,N,NED-3SN--,9,வுக்கு,0,14,"[வ, ு, க, ், க, ு]"
1,இலங்கைக்க்,இலங்கை,N,NED-3SN--,3,க்க்,1,14,"[க, ், க, ்]"
2,நுழைவாயிலுக்கு,நுழைவாயில்,N,NND-3SN--,4,ுக்கு,0,11,"[ு, க, ், க, ு]"
3,நிலையத்துக்கு,நிலையம்,N,NND-3SN--,15,ததுக்கு,0,11,"[த, த, ு, க, ், க, ு]"
4,மக்களுக்க்,மக்கள்,N,NND-3SN--,16,ுக்க்,1,8,"[ு, க, ், க, ்]"
5,லட்சத்துக்க்,லட்சம்,N,NND-3SN--,8,ததுக்க்,1,6,"[த, த, ு, க, ், க, ்]"
6,அமெரிக்காவுக்க்,அமெரிக்கா,N,NED-3SN--,13,வுக்க்,1,3,"[வ, ு, க, ், க, ்]"
7,ஊருக்குப்,ஊர்,N,NND-3SN--,9,ுக்குப்,0,3,"[ு, க, ், க, ு, ப, ்]"
8,பாகிஸ்தானுக்குச்,பாகிஸ்தான்,N,NED-3SN--,6,ுக்குச்,0,2,"[ு, க, ், க, ு, ச, ்]"
9,நூற்றுக்க்,நூறு,N,NND-3SN--,19,்றுக்க்,1,2,"[், ற, ு, க, ், க, ்]"


In [478]:
#Adding morphemes to the dataframe one by one
splitmorph = NDS_df.loc[1, 'MorphemeSeparated']
morphemegen(morph_df, splitmorph, re )
morph_df

Unnamed: 0,MorphemeSeparated,Morpheme,xpos Regex,Morpheme continued
1,"[ை, ய, ்]",ைய்,(N.D..S...),
2,"[ய, ை]",யை,(N.D..S...),
3,ை,ை,(N.D..S...),
4,"[க, ், க, ்]",க்க்,(N.D..S...),


In [479]:
morph_df.loc[4, 'Morpheme continued'] =1
morph_df

Unnamed: 0,MorphemeSeparated,Morpheme,xpos Regex,Morpheme continued
1,"[ை, ய, ்]",ைய்,(N.D..S...),
2,"[ய, ை]",யை,(N.D..S...),
3,ை,ை,(N.D..S...),
4,"[க, ், க, ்]",க்க்,(N.D..S...),1.0


In [480]:
#Adding an instance manually
splitmorph = NDS_df.loc[0, 'MorphemeSeparated']
kku = splitmorph
kku.pop(0)
kku.pop(0)
kku

['க', '்', 'க', 'ு']

In [481]:
#Adding an instance manually
splitmorph = NDS_df.loc[24, 'MorphemeSeparated']
ku = splitmorph
ku.pop(0)
ku.pop(0)
ku.pop(0)
ku.pop(0)
ku

['க', 'ு']

In [482]:
morphemegen(morph_df, kku, re )
morph_df

Unnamed: 0,MorphemeSeparated,Morpheme,xpos Regex,Morpheme continued
1,"[ை, ய, ்]",ைய்,(N.D..S...),
2,"[ய, ை]",யை,(N.D..S...),
3,ை,ை,(N.D..S...),
4,"[க, ், க, ்]",க்க்,(N.D..S...),1.0
5,"[க, ், க, ு]",க்கு,(N.D..S...),


In [483]:
morphemegen(morph_df, ku, re )
morph_df

Unnamed: 0,MorphemeSeparated,Morpheme,xpos Regex,Morpheme continued
1,"[ை, ய, ்]",ைய்,(N.D..S...),
2,"[ய, ை]",யை,(N.D..S...),
3,ை,ை,(N.D..S...),
4,"[க, ், க, ்]",க்க்,(N.D..S...),1.0
5,"[க, ், க, ு]",க்கு,(N.D..S...),
6,"[க, ு]",கு,(N.D..S...),


Plural

In [484]:
re = '(N.D..P...)'
NDP_df = merged_df[merged_df['xpos'].str.contains(rf'{re}')]
NDP_df = casefilter(NDP_df)
NDP_df

  return func(self, *args, **kwargs)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Unnamed: 0,form,lemma,upos,xpos,head,Morpheme,NoSpaceAfter,Counts,MorphemeSeparated
0,வதந்திகளுக்க்,வதந்தி,N,NND-3PN--,12,களுக்க்,1,14,"[க, ள, ு, க, ், க, ்]"
1,மீறுவோருக்கு,மீறுவோர்,N,NPDF3PH-A,10,ுக்கு,0,7,"[ு, க, ், க, ு]"
2,மாதங்களுக்கு,மாதம்,N,NND-3PN--,10,ஙகளுக்கு,0,5,"[ங, க, ள, ு, க, ், க, ு]"
3,இடங்களுக்க்,இடம்,N,NND-3PN--,16,ஙகளுக்க்,1,4,"[ங, க, ள, ு, க, ், க, ்]"
4,ஆண்டுகளுக்குப்,ஆண்டு,N,NND-3PN--,4,களுக்குப்,0,3,"[க, ள, ு, க, ், க, ு, ப, ்]"
5,எம்பிக்களுக்க்,எம்பி,N,NED-3PA--,4,க்களுக்க்,1,2,"[க, ், க, ள, ு, க, ், க, ்]"
6,நிலையங்களுக்குத்,நிலையம்,N,NND-3PN--,8,ஙகளுக்குத்,0,2,"[ங, க, ள, ு, க, ், க, ு, த, ்]"
7,எம்பிக்களுக்கு,எம்பி,N,NED-3PA--,8,க்களுக்கு,0,1,"[க, ், க, ள, ு, க, ், க, ு]"
8,வாழ்வாதரங்களுக்கு,வாழ்வாதாரம்,N,NND-3PN--,8,ரங்களுக்கு,0,1,"[ர, ங, ், க, ள, ு, க, ், க, ு]"
9,தமிழர்களுக்குத்,தமிழர்,N,NED-3PA--,5,களுக்குத்,0,1,"[க, ள, ு, க, ், க, ு, த, ்]"


In [485]:
#Adding an instance manually
splitmorph = NDP_df.loc[0, 'MorphemeSeparated']
kalu = splitmorph
kalu.pop(3)
kalu.pop(3)
kalu.pop(3)
kalu.pop(3)
kalu

['க', 'ள', 'ு']

In [486]:
morphemegen(morph_df, kalu, re )
morph_df

Unnamed: 0,MorphemeSeparated,Morpheme,xpos Regex,Morpheme continued
1,"[ை, ய, ்]",ைய்,(N.D..S...),
2,"[ய, ை]",யை,(N.D..S...),
3,ை,ை,(N.D..S...),
4,"[க, ், க, ்]",க்க்,(N.D..S...),1.0
5,"[க, ், க, ு]",க்கு,(N.D..S...),
6,"[க, ு]",கு,(N.D..S...),
7,"[க, ள, ு]",களு,(N.D..P...),


## Noun - Instrumental Case

In [487]:
re = '(N.I......)'
NIP_df = merged_df[merged_df['xpos'].str.contains(rf'{re}')]
NIP_df = casefilter(NIP_df)
NIP_df

  return func(self, *args, **kwargs)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Unnamed: 0,form,lemma,upos,xpos,head,Morpheme,NoSpaceAfter,Counts,MorphemeSeparated
0,சிவாச்சாரியர்களால்,சிவாச்சாரியர்,N,NEI-3PA--,43,களால்,0,3,"[க, ள, ா, ல, ்]"
1,முயற்சியால்,முயற்சி,N,NNI-3SN--,8,யால்,0,3,"[ய, ா, ல, ்]"
2,போரினால்,போர்,N,NNI-3SN--,3,ினால்,0,1,"[ி, ன, ா, ல, ்]"
3,நோயால்,நோய்,N,NNI-3SN--,11,ால்,0,1,"[ா, ல, ்]"
4,காரணத்தால்,காரணம்,N,NNI-3SN--,11,ததால்,0,0,"[த, த, ா, ல, ்]"


In [488]:
#Adding an instance manually
splitmorph = NIP_df.loc[0, 'MorphemeSeparated']
al = splitmorph
al.pop(0)
al.pop(0)
al

['ா', 'ல', '்']

In [489]:
morphemegen(morph_df, al, re )
morph_df

Unnamed: 0,MorphemeSeparated,Morpheme,xpos Regex,Morpheme continued
1,"[ை, ய, ்]",ைய்,(N.D..S...),
2,"[ய, ை]",யை,(N.D..S...),
3,ை,ை,(N.D..S...),
4,"[க, ், க, ்]",க்க்,(N.D..S...),1.0
5,"[க, ், க, ு]",க்கு,(N.D..S...),
6,"[க, ு]",கு,(N.D..S...),
7,"[க, ள, ு]",களு,(N.D..P...),
8,"[ா, ல, ்]",ால்,(N.I......),


## Noun - Genitive Case

In [490]:
re = '(N.G.*)'
NG_df = merged_df[merged_df['xpos'].str.contains(rf'{re}')]
NG_df = casefilter(NG_df)
NG_df

  return func(self, *args, **kwargs)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Unnamed: 0,form,lemma,upos,xpos,head,Morpheme,NoSpaceAfter,Counts,MorphemeSeparated
0,சட்டத்தின்,சட்டம்,N,NNG-3SN--,3,ததின்,0,28,"[த, த, ி, ன, ்]"
1,ஆய்வுத்துறையின்,ஆய்வுத்துறை,N,NNG-3SN--,7,யின்,0,26,"[ய, ி, ன, ்]"
2,இந்தியாவின்,இந்தியா,N,NEG-3SN--,3,வின்,0,19,"[வ, ி, ன, ்]"
3,அவர்களின்,அவர்,N,NNG-3PA--,15,களின்,0,13,"[க, ள, ி, ன, ்]"
4,வீட்டின்,வீடு,N,NNG-3SN--,32,்டின்,0,8,"[், ட, ி, ன, ்]"
5,பதக்கங்களின்,பதக்கம்,N,NNG-3PN--,7,ஙகளின்,0,3,"[ங, க, ள, ி, ன, ்]"
6,சட்டத்தின்,சட்டம்,N,NNG-3SN--,30,சட்டத்தின்,2,1,"[ச, ட, ், ட, த, ், த, ி, ன, ்]"
7,தீட்சித்தின்,தீட்சித்,N,NEG-3SH--,13,தின்,0,1,"[த, ி, ன, ்]"
8,அரசினுடைய,அரசு,N,NNG-3SN--,4,ினுடைய,0,1,"[ி, ன, ு, ட, ை, ய]"
9,பிறருடைய,பிறர்,N,NNG-3SH--,2,ுடைய,0,1,"[ு, ட, ை, ய]"


In [491]:
splitmorph = NG_df.loc[10, 'MorphemeSeparated']
morphemegen(morph_df, splitmorph, re )
splitmorph = NG_df.loc[11, 'MorphemeSeparated']
morphemegen(morph_df, splitmorph, re )
morph_df

Unnamed: 0,MorphemeSeparated,Morpheme,xpos Regex,Morpheme continued
1,"[ை, ய, ்]",ைய்,(N.D..S...),
2,"[ய, ை]",யை,(N.D..S...),
3,ை,ை,(N.D..S...),
4,"[க, ், க, ்]",க்க்,(N.D..S...),1.0
5,"[க, ், க, ு]",க்கு,(N.D..S...),
6,"[க, ு]",கு,(N.D..S...),
7,"[க, ள, ு]",களு,(N.D..P...),
8,"[ா, ல, ்]",ால்,(N.I......),
9,"[த, ு]",து,(N.G.*),
10,"[ி, ன, ்]",ின்,(N.G.*),


## Noun - Locative Case

In [492]:
re = '(N.L.*)'
NL_df = merged_df[merged_df['xpos'].str.contains(rf'{re}')]
NL_df = casefilter(NL_df)
NL_df

  return func(self, *args, **kwargs)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Unnamed: 0,form,lemma,upos,xpos,head,Morpheme,NoSpaceAfter,Counts,MorphemeSeparated
0,பெரும்புதூரில்,பெரும்புதூர்,N,NEL-3SN--,18,ில்,0,110,"[ி, ல, ்]"
1,கர்நாடகத்தில்,கர்நாடகம்,N,NEL-3SN--,5,ததில்,1,66,"[த, த, ி, ல, ்]"
2,மொழிகளில்,மொழி,N,NNL-3PN--,9,களில்,1,44,"[க, ள, ி, ல, ்]"
3,இந்தியாவில்,இந்தியா,N,NEL-3SN--,6,வில்,0,25,"[வ, ி, ல, ்]"
4,நகரங்களில்,நகரம்,N,NNL-3PN--,13,ஙகளில்,0,20,"[ங, க, ள, ி, ல, ்]"
5,நாட்டில்,நாடு,N,NNL-3SN--,21,்டில்,0,10,"[், ட, ி, ல, ்]"
6,வாசகர்களிடம்,வாசகர்,N,NNL-3PN--,27,களிடம்,0,5,"[க, ள, ி, ட, ம, ்]"
7,மக்களிடம்,மக்கள்,N,NNL-3PA--,6,ிடம்,0,3,"[ி, ட, ம, ்]"
8,உள்ளிட்டவற்றில்,உள்ளிட்டவை,N,NNL-3PN--,14,ற்றில்,0,2,"[ற, ், ற, ி, ல, ்]"
9,கிரிக்கெட்டில்,கிரிக்கெட்,N,NNL-3SN--,4,டில்,0,2,"[ட, ி, ல, ்]"


In [493]:
splitmorph = NL_df.loc[0, 'MorphemeSeparated']
morphemegen(morph_df, splitmorph, re )


In [494]:

splitmorph = NL_df.loc[7, 'MorphemeSeparated']
morphemegen(morph_df, splitmorph, re )
morph_df

Unnamed: 0,MorphemeSeparated,Morpheme,xpos Regex,Morpheme continued
1,"[ை, ய, ்]",ைய்,(N.D..S...),
2,"[ய, ை]",யை,(N.D..S...),
3,ை,ை,(N.D..S...),
4,"[க, ், க, ்]",க்க்,(N.D..S...),1.0
5,"[க, ், க, ு]",க்கு,(N.D..S...),
6,"[க, ு]",கு,(N.D..S...),
7,"[க, ள, ு]",களு,(N.D..P...),
8,"[ா, ல, ்]",ால்,(N.I......),
9,"[த, ு]",து,(N.G.*),
10,"[ி, ன, ்]",ின்,(N.G.*),


## Noun - Sociative Case

In [496]:
re = '(N.S.*)'
NS_df = merged_df[merged_df['xpos'].str.contains(rf'{re}')]
NS_df = casefilter(NS_df)
NS_df

  return func(self, *args, **kwargs)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Unnamed: 0,form,lemma,upos,xpos,head,Morpheme,NoSpaceAfter,Counts,MorphemeSeparated
0,துணையோடு,துணை,N,NNS-3SN--,24,யோடு,0,3,"[ய, ோ, ட, ு]"
1,மனிதாபிமானத்தோடு,மனிதாபிமானம்,N,NNS-3SN--,10,ததோடு,0,3,"[த, த, ோ, ட, ு]"
2,தங்கப்பதக்கத்துடன்,தங்கப்பதக்கம்,N,NNS-3SN--,5,ததுடன்,0,2,"[த, த, ு, ட, ன, ்]"
3,மோசடியுடன்,மோசடி,N,NNS-3SN--,9,யுடன்,0,1,"[ய, ு, ட, ன, ்]"
4,அமைப்புடன்,அமைப்பு,N,NNS-3SN--,20,டன்,0,1,"[ட, ன, ்]"
5,அவருடன்,அவர்,N,NNS-3SN--,16,ுடன்,0,0,"[ு, ட, ன, ்]"


In [497]:
#Adding an instance manually
splitmorph = NS_df.loc[1, 'MorphemeSeparated']
ootu = splitmorph
ootu.pop(0)
ootu.pop(0)
ootu

['ோ', 'ட', 'ு']

In [498]:
morphemegen(morph_df, ootu, re )

In [499]:
splitmorph = NS_df.loc[5, 'MorphemeSeparated']
morphemegen(morph_df, splitmorph, re )
morph_df

Unnamed: 0,MorphemeSeparated,Morpheme,xpos Regex,Morpheme continued
1,"[ை, ய, ்]",ைய்,(N.D..S...),
2,"[ய, ை]",யை,(N.D..S...),
3,ை,ை,(N.D..S...),
4,"[க, ், க, ்]",க்க்,(N.D..S...),1.0
5,"[க, ், க, ு]",க்கு,(N.D..S...),
6,"[க, ு]",கு,(N.D..S...),
7,"[க, ள, ு]",களு,(N.D..P...),
8,"[ா, ல, ்]",ால்,(N.I......),
9,"[த, ு]",து,(N.G.*),
10,"[ி, ன, ்]",ின்,(N.G.*),


# Noun - Plural

In [501]:
re = '(N....P...)'
Np_df = merged_df[merged_df['xpos'].str.contains(rf'{re}')]
Np_df = casefilter(Np_df)
Np_df

  return func(self, *args, **kwargs)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Unnamed: 0,form,lemma,upos,xpos,head,Morpheme,NoSpaceAfter,Counts,MorphemeSeparated
0,படிகளை,படி,N,NNA-3PN--,19,களை,0,46,"[க, ள, ை]"
1,மொழிகளில்,மொழி,N,NNL-3PN--,9,களில்,1,43,"[க, ள, ி, ல, ்]"
2,திட்டங்கள்,திட்டம்,N,NNN-3PN--,18,ஙகள்,0,36,"[ங, க, ள, ்]"
3,நகரங்களில்,நகரம்,N,NNL-3PN--,13,ஙகளில்,0,20,"[ங, க, ள, ி, ல, ்]"
4,அவர்களின்,அவர்,N,NNG-3PA--,15,களின்,0,20,"[க, ள, ி, ன, ்]"
5,ஆண்டுகளுக்கு,ஆண்டு,N,NND-3PN--,13,களுக்கு,0,16,"[க, ள, ு, க, ், க, ு]"
6,நிலையங்களை,நிலையம்,N,NNA-3PN--,16,ஙகளை,0,14,"[ங, க, ள, ை]"
7,வதந்திகளுக்க்,வதந்தி,N,NND-3PN--,12,களுக்க்,1,14,"[க, ள, ு, க, ், க, ்]"
8,வீரர்களைய்,வீரர்,N,NNA-3PA--,29,களைய்,1,11,"[க, ள, ை, ய, ்]"
9,நாட்கள்,நாள்,N,NNN-3PN--,5,டகள்,0,8,"[ட, க, ள, ்]"


In [502]:
splitmorph = Np_df.loc[55, 'MorphemeSeparated']
morphemegen(morph_df, splitmorph, re )
morph_df

Unnamed: 0,MorphemeSeparated,Morpheme,xpos Regex,Morpheme continued
1,"[ை, ய, ்]",ைய்,(N.D..S...),
2,"[ய, ை]",யை,(N.D..S...),
3,ை,ை,(N.D..S...),
4,"[க, ், க, ்]",க்க்,(N.D..S...),1.0
5,"[க, ், க, ு]",க்கு,(N.D..S...),
6,"[க, ு]",கு,(N.D..S...),
7,"[க, ள, ு]",களு,(N.D..P...),
8,"[ா, ல, ்]",ால்,(N.I......),
9,"[த, ு]",து,(N.G.*),
10,"[ி, ன, ்]",ின்,(N.G.*),


In [503]:
# Sending noun morphemes to SQL. Commenting out because code is complete
#morph_df.to_sql(name ='nounmorphemes', con=engine)

# Pronouns

In [507]:
P_df = merged_df[merged_df['upos']== 'R']
P_df = P_df[P_df['NoSpaceAfter']== 0]
P_df.reset_index(inplace=True)
P_df.drop_duplicates(subset = 'lemma', inplace=True)
P_df

Unnamed: 0,index,form,lemma,upos,xpos,head,Morpheme,NoSpaceAfter,Counts
0,13,யாருக்கும்,யார்,R,RBD-3SA--,15,ுக்கும்,0,2
1,25,இது,இது,R,RpN-3SN--,2,,0,0
2,28,அவர்,அவர்,R,RpN-3SH--,6,,0,0
3,142,அதை,அது,R,RpA-3SN--,16,,0,0
6,276,தங்களின்,தன்,R,RhG-3PA--,9,ஙகளின்,0,4
7,322,அவற்றை,அவை,R,RpA-3PN--,14,ற்றை,0,2
8,527,நீங்கள்,நீங்கள்,R,RpN-2SH--,18,,0,0
17,1013,அனைவரும்,அனைவர்,R,RpN-3PA--,9,ும்,0,16
25,1323,தான்,தான்,R,RpN-3SA--,3,,0,0
44,2031,நான்,நான்,R,RpN-1SA--,11,,0,0


In [61]:
# Sending pronouns to SQL. Commenting out because code is complete
#P_df.to_sql(name ='pronouns', con=engine)

# Verbs - Indicative

### 1s

In [508]:
re = '(V...1S...)'
V1s_df = merged_df[merged_df['xpos'].str.contains(rf'{re}')]
V1s_df = casefilter(V1s_df)
V1s_df

  return func(self, *args, **kwargs)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Unnamed: 0,form,lemma,upos,xpos,head,Morpheme,NoSpaceAfter,Counts,MorphemeSeparated
0,மாட்டேன்,மாட்டு,V,VR-T1SAAA,11,ேன்,0,2,"[ே, ன, ்]"
1,புறக்கணிக்கிறேன்,புறக்கணி,V,Vr-P1SAAA,0,க்கிறேன்,0,2,"[க, ், க, ி, ற, ே, ன, ்]"
2,உள்ளேன்,உள்,V,VR-T1SAAA,14,உள்ளேன்,2,2,"[உ, ள, ், ள, ே, ன, ்]"
3,உள்ளேன்,உள்,V,Vr-T1SAAA,0,ளேன்,0,1,"[ள, ே, ன, ்]"
4,கேட்டுக்கொள்கிறேன்,கேள்,V,Vr-P1SAAA,0,டடுக்கொள்கிறேன்,0,1,"[ட, ட, ு, க, ், க, ொ, ள, ், க, ி, ற, ே, ன, ்]"
5,போனேன்,போ,V,VR-D1SAAA,5,னேன்,0,1,"[ன, ே, ன, ்]"
6,மேற்கொண்டேன்,மேற்கொள்,V,Vr-D1SAAA,0,ணடேன்,0,1,"[ண, ட, ே, ன, ்]"
7,கருதுகிறேன்,கருது,V,Vr-P1SAAA,16,கிறேன்,0,0,"[க, ி, ற, ே, ன, ்]"


In [509]:
splitmorph = V1s_df.loc[0, 'MorphemeSeparated']
morphemegen(morph_df, splitmorph, re )


In [510]:
morph_df

Unnamed: 0,MorphemeSeparated,Morpheme,xpos Regex,Morpheme continued
1,"[ை, ய, ்]",ைய்,(N.D..S...),
2,"[ய, ை]",யை,(N.D..S...),
3,ை,ை,(N.D..S...),
4,"[க, ், க, ்]",க்க்,(N.D..S...),1.0
5,"[க, ், க, ு]",க்கு,(N.D..S...),
6,"[க, ு]",கு,(N.D..S...),
7,"[க, ள, ு]",களு,(N.D..P...),
8,"[ா, ல, ்]",ால்,(N.I......),
9,"[த, ு]",து,(N.G.*),
10,"[ி, ன, ்]",ின்,(N.G.*),


In [511]:
#Adding an instance manually
splitmorph = V1s_df.loc[1, 'MorphemeSeparated']
kir = splitmorph
kir.pop(0)
kir.pop(0)
kir.pop(3)
kir.pop(3)
kir.pop(3)
kir

['க', 'ி', 'ற']

In [512]:
morphemegen(morph_df, kir, re )


In [513]:
#Adding an instance manually
splitmorph = V1s_df.loc[6, 'MorphemeSeparated']
nt = splitmorph
nt.pop(2)
nt.pop(2)
nt.pop(2)

nt

['ண', 'ட']

In [514]:
morphemegen(morph_df, nt, re )
morph_df

Unnamed: 0,MorphemeSeparated,Morpheme,xpos Regex,Morpheme continued
1,"[ை, ய, ்]",ைய்,(N.D..S...),
2,"[ய, ை]",யை,(N.D..S...),
3,ை,ை,(N.D..S...),
4,"[க, ், க, ்]",க்க்,(N.D..S...),1.0
5,"[க, ், க, ு]",க்கு,(N.D..S...),
6,"[க, ு]",கு,(N.D..S...),
7,"[க, ள, ு]",களு,(N.D..P...),
8,"[ா, ல, ்]",ால்,(N.I......),
9,"[த, ு]",து,(N.G.*),
10,"[ி, ன, ்]",ின்,(N.G.*),


In [516]:
re = '(V...1S...)'
V1s_df = merged_df[merged_df['xpos'].str.contains(rf'{re}')]
V1s_df = casefilter(V1s_df)

  return func(self, *args, **kwargs)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [519]:
splitmorph = V1s_df.loc[1, 'MorphemeSeparated']
splitmorph = splitmorph[0:5]
splitmorph

['க', '்', 'க', 'ி', 'ற']

In [520]:
morphemegen(morph_df, splitmorph, re )
morph_df

Unnamed: 0,MorphemeSeparated,Morpheme,xpos Regex,Morpheme continued
1,"[ை, ய, ்]",ைய்,(N.D..S...),
2,"[ய, ை]",யை,(N.D..S...),
3,ை,ை,(N.D..S...),
4,"[க, ், க, ்]",க்க்,(N.D..S...),1.0
5,"[க, ், க, ு]",க்கு,(N.D..S...),
6,"[க, ு]",கு,(N.D..S...),
7,"[க, ள, ு]",களு,(N.D..P...),
8,"[ா, ல, ்]",ால்,(N.I......),
9,"[த, ு]",து,(N.G.*),
10,"[ி, ன, ்]",ின்,(N.G.*),


In [522]:
splitmorph = V1s_df.loc[5, 'MorphemeSeparated']
splitmorph = splitmorph[0:1]
splitmorph

['ன']

In [524]:
morphemegen(morph_df, splitmorph, re )
morph_df.tail()

Unnamed: 0,MorphemeSeparated,Morpheme,xpos Regex,Morpheme continued
16,"[ே, ன, ்]",ேன்,(V...1S...),
17,"[க, ி, ற]",கிற,(V...1S...),
18,"[ண, ட]",ணட,(V...1S...),
19,"[க, ், க, ி, ற]",க்கிற,(V...1S...),
20,[ன],ன,(V...1S...),


### 2s

In [525]:
re = '(V...2S...)'
V2s_df = merged_df[merged_df['xpos'].str.contains(rf'{re}')]
V2s_df = casefilter(V2s_df)
V2s_df

  return func(self, *args, **kwargs)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Unnamed: 0,form,lemma,upos,xpos,head,Morpheme,NoSpaceAfter,Counts,MorphemeSeparated
0,கவலைப்படாதீர்கள்,கவலைப்படு,V,Vr-T2SH-N,0,ாதீர்கள்,0,0,"[ா, த, ீ, ர, ், க, ள, ்]"


In [526]:
splitmorph = V2s_df.loc[0, 'MorphemeSeparated']
irkal = splitmorph
irkal.pop(0)
irkal.pop(0)
irkal

['ீ', 'ர', '்', 'க', 'ள', '்']

In [527]:
morphemegen(morph_df, irkal, re )
morph_df

Unnamed: 0,MorphemeSeparated,Morpheme,xpos Regex,Morpheme continued
1,"[ை, ய, ்]",ைய்,(N.D..S...),
2,"[ய, ை]",யை,(N.D..S...),
3,ை,ை,(N.D..S...),
4,"[க, ், க, ்]",க்க்,(N.D..S...),1.0
5,"[க, ், க, ு]",க்கு,(N.D..S...),
6,"[க, ு]",கு,(N.D..S...),
7,"[க, ள, ு]",களு,(N.D..P...),
8,"[ா, ல, ்]",ால்,(N.I......),
9,"[த, ு]",து,(N.G.*),
10,"[ி, ன, ்]",ின்,(N.G.*),


### 3sm

In [65]:
V3SM_df = merged_df[merged_df['xpos'].str.contains(r'(V...3SM.)')]
V3SM_df.reset_index(inplace=True)
V3SM_df.drop_duplicates(subset = 'lemma', inplace=True)
V3SM_df

  return func(self, *args, **kwargs)


Unnamed: 0,index,form,lemma,upos,xpos,head,Morpheme,NoSpaceAfter,Counts


### 3sf

In [66]:
V3SF_df = merged_df[merged_df['xpos'].str.contains(r'(V...3SF.)')]
V3SF_df.reset_index(inplace=True)
V3SF_df.drop_duplicates(subset = 'lemma', inplace=True)
V3SF_df

  return func(self, *args, **kwargs)


Unnamed: 0,index,form,lemma,upos,xpos,head,Morpheme,NoSpaceAfter,Counts


### 3sn

Past

In [529]:
re = '(V.-D3SN.)'
VD3SN_df = merged_df[merged_df['xpos'].str.contains(rf'{re}')]
VD3SN_df = casefilter(VD3SN_df)
VD3SN_df


  return func(self, *args, **kwargs)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Unnamed: 0,form,lemma,upos,xpos,head,Morpheme,NoSpaceAfter,Counts,MorphemeSeparated
0,இருந்தது,இரு,V,VR-D3SNAA,22,ந்தது,0,16,"[ந, ், த, த, ு]"
1,தொடங்கியது,தொடங்கு,V,Vr-D3SNAA,0,ியது,0,12,"[ி, ய, த, ு]"
2,தக்கது,தகு,V,VR-D3SNAA,25,்கது,0,6,"[், க, த, ு]"
3,நடைபெற்றது,நடைபெறு,V,Vr-D3SNAA,0,்றது,0,6,"[், ற, த, ு]"
4,அறிவித்தது,அறிவி,V,Vr-D3SNAA,0,த்தது,0,5,"[த, ், த, த, ு]"
5,விட்டது,விடு,V,VR-D3SNAA,10,்டது,0,4,"[், ட, த, ு]"
6,இருந்தது,இரு,V,VR-D3SNAA,16,இருந்தது,2,3,"[இ, ர, ு, ந, ், த, த, ு]"
7,செய்தது,செய்,V,Vr-D3SNAA,27,தது,0,2,"[த, த, ு]"
8,வென்றது,வெல்,V,Vr-D3SNAA,0,னறது,0,2,"[ன, ற, த, ு]"
9,கொண்டது,கொள்,V,VR-D3SNAA,16,ணடது,0,2,"[ண, ட, த, ு]"


In [530]:
atu = VD3SN_df.loc[1, 'MorphemeSeparated']
atu.pop(0)
atu.pop(0)
atu

['த', 'ு']

In [531]:
morphemegen(morph_df, atu, re )
morph_df

Unnamed: 0,MorphemeSeparated,Morpheme,xpos Regex,Morpheme continued
1,"[ை, ய, ்]",ைய்,(N.D..S...),
2,"[ய, ை]",யை,(N.D..S...),
3,ை,ை,(N.D..S...),
4,"[க, ், க, ்]",க்க்,(N.D..S...),1.0
5,"[க, ், க, ு]",க்கு,(N.D..S...),
6,"[க, ு]",கு,(N.D..S...),
7,"[க, ள, ு]",களு,(N.D..P...),
8,"[ா, ல, ்]",ால்,(N.I......),
9,"[த, ு]",து,(N.G.*),
10,"[ி, ன, ்]",ின்,(N.G.*),


In [532]:
nt = VD3SN_df.loc[0, 'MorphemeSeparated']
nt.pop(3)
nt.pop(3)
nt

['ந', '்', 'த']

In [533]:
morphemegen(morph_df, nt, re )
morph_df.tail()

Unnamed: 0,MorphemeSeparated,Morpheme,xpos Regex,Morpheme continued
19,"[க, ், க, ி, ற]",க்கிற,(V...1S...),
20,[ன],ன,(V...1S...),
21,"[ீ, ர, ், க, ள, ்]",ீர்கள்,(V...2S...),
22,"[த, ு]",து,(V.-D3SN.),
23,"[ந, ், த]",ந்த,(V.-D3SN.),


In [537]:
re = '(V.-D3SN.)'
VD3SN_df = merged_df[merged_df['xpos'].str.contains(rf'{re}')]
VD3SN_df = casefilter(VD3SN_df)

  return func(self, *args, **kwargs)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [539]:
iy = VD3SN_df.loc[1, 'MorphemeSeparated']
iy = iy[0:2]
iy

['ி', 'ய']

In [540]:
morphemegen(morph_df, iy, re )
morph_df.tail()

Unnamed: 0,MorphemeSeparated,Morpheme,xpos Regex,Morpheme continued
20,[ன],ன,(V...1S...),
21,"[ீ, ர, ், க, ள, ்]",ீர்கள்,(V...2S...),
22,"[த, ு]",து,(V.-D3SN.),
23,"[ந, ், த]",ந்த,(V.-D3SN.),
24,"[ி, ய]",ிய,(V.-D3SN.),


In [543]:
t = VD3SN_df.loc[4, 'MorphemeSeparated']
t = t[0:3]
t

['த', '்', 'த']

In [544]:
morphemegen(morph_df, t, re )
morph_df.tail()

Unnamed: 0,MorphemeSeparated,Morpheme,xpos Regex,Morpheme continued
21,"[ீ, ர, ், க, ள, ்]",ீர்கள்,(V...2S...),
22,"[த, ு]",து,(V.-D3SN.),
23,"[ந, ், த]",ந்த,(V.-D3SN.),
24,"[ி, ய]",ிய,(V.-D3SN.),
25,"[த, ், த]",த்த,(V.-D3SN.),


In [546]:
t = VD3SN_df.loc[5, 'MorphemeSeparated']
t = t[0:2]
t

['்', 'ட']

In [547]:
morphemegen(morph_df, t, re )
morph_df.tail()

Unnamed: 0,MorphemeSeparated,Morpheme,xpos Regex,Morpheme continued
22,"[த, ு]",து,(V.-D3SN.),
23,"[ந, ், த]",ந்த,(V.-D3SN.),
24,"[ி, ய]",ிய,(V.-D3SN.),
25,"[த, ், த]",த்த,(V.-D3SN.),
26,"[், ட]",்ட,(V.-D3SN.),


In [549]:
t = VD3SN_df.loc[7, 'MorphemeSeparated']
t = t[0:1]
t

['த']

In [550]:
morphemegen(morph_df, t, re )
morph_df.tail()

Unnamed: 0,MorphemeSeparated,Morpheme,xpos Regex,Morpheme continued
23,"[ந, ், த]",ந்த,(V.-D3SN.),
24,"[ி, ய]",ிய,(V.-D3SN.),
25,"[த, ், த]",த்த,(V.-D3SN.),
26,"[், ட]",்ட,(V.-D3SN.),
27,[த],த,(V.-D3SN.),


In [551]:
n = VD3SN_df.loc[8, 'MorphemeSeparated']
n = n[0:1]
n

['ன']

In [552]:
morphemegen(morph_df, n, re )
morph_df.tail()

Unnamed: 0,MorphemeSeparated,Morpheme,xpos Regex,Morpheme continued
24,"[ி, ய]",ிய,(V.-D3SN.),
25,"[த, ், த]",த்த,(V.-D3SN.),
26,"[், ட]",்ட,(V.-D3SN.),
27,[த],த,(V.-D3SN.),
28,[ன],ன,(V.-D3SN.),


In [554]:
nt = VD3SN_df.loc[9, 'MorphemeSeparated']
nt = nt[0:2]
nt

['ண', 'ட']

In [555]:
morphemegen(morph_df, nt, re )
morph_df.reset_index(drop=True, inplace=True)
morph_df.tail()

Unnamed: 0,MorphemeSeparated,Morpheme,xpos Regex,Morpheme continued
24,"[த, ், த]",த்த,(V.-D3SN.),
25,"[், ட]",்ட,(V.-D3SN.),
26,[த],த,(V.-D3SN.),
27,[ன],ன,(V.-D3SN.),
28,"[ண, ட]",ணட,(V.-D3SN.),


In [557]:
t = VD3SN_df.loc[12, 'MorphemeSeparated']
t = t[1:4]
t

['ட', '்', 'ட']

In [558]:
morphemegen(morph_df, t, re )
morph_df.reset_index(drop=True, inplace=True)
morph_df.tail()

Unnamed: 0,MorphemeSeparated,Morpheme,xpos Regex,Morpheme continued
25,"[், ட]",்ட,(V.-D3SN.),
26,[த],த,(V.-D3SN.),
27,[ன],ன,(V.-D3SN.),
28,"[ண, ட]",ணட,(V.-D3SN.),
29,"[ட, ், ட]",ட்ட,(V.-D3SN.),


Present

In [559]:
re = '(V..P3SN.)'
VP3SN_df = merged_df[merged_df['xpos'].str.contains(rf'{re}')]
VP3SN_df = casefilter(VP3SN_df)
VP3SN_df

  return func(self, *args, **kwargs)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Unnamed: 0,form,lemma,upos,xpos,head,Morpheme,NoSpaceAfter,Counts,MorphemeSeparated
0,அறிவிக்கிறது,அறிவி,V,Vr-P3SNAA,0,க்கிறது,0,8,"[க, ், க, ி, ற, த, ு]"
1,இருக்கிறது,இரு,V,Vr-P3SNAA,6,இருக்கிறது,2,4,"[இ, ர, ு, க, ், க, ி, ற, த, ு]"
2,கூறுகிறார்,கூறு,V,VzNP3SNAA,5,கிறார்,0,1,"[க, ி, ற, ா, ர, ்]"
3,நடைபெறுகிறது,நடைபெறு,V,Vr-P3SNAA,0,கிறது,0,0,"[க, ி, ற, த, ு]"


In [560]:
kir = VP3SN_df.loc[0, 'MorphemeSeparated']
kir.pop(5)
kir.pop(5)
kir

['க', '்', 'க', 'ி', 'ற']

In [561]:
morphemegen(morph_df, kir, re )
morph_df.reset_index(drop=True, inplace=True)
morph_df.tail()

Unnamed: 0,MorphemeSeparated,Morpheme,xpos Regex,Morpheme continued
26,[த],த,(V.-D3SN.),
27,[ன],ன,(V.-D3SN.),
28,"[ண, ட]",ணட,(V.-D3SN.),
29,"[ட, ், ட]",ட்ட,(V.-D3SN.),
30,"[க, ், க, ி, ற]",க்கிற,(V..P3SN.),


In [562]:
kir.pop(0)
kir.pop(0)
kir

['க', 'ி', 'ற']

In [563]:
morphemegen(morph_df, kir, re )
morph_df.reset_index(drop=True, inplace=True)
morph_df.tail()

Unnamed: 0,MorphemeSeparated,Morpheme,xpos Regex,Morpheme continued
27,[ன],ன,(V.-D3SN.),
28,"[ண, ட]",ணட,(V.-D3SN.),
29,"[ட, ், ட]",ட்ட,(V.-D3SN.),
30,"[க, ி, ற]",க்கிற,(V..P3SN.),
31,"[க, ி, ற]",கிற,(V..P3SN.),


In [564]:
atu = VP3SN_df.loc[3, 'MorphemeSeparated']
atu.pop(0)
atu.pop(0)
atu.pop(0)
atu

['த', 'ு']

In [565]:
morphemegen(morph_df, atu, re )
morph_df.reset_index(drop=True, inplace=True)
morph_df.tail()

Unnamed: 0,MorphemeSeparated,Morpheme,xpos Regex,Morpheme continued
28,"[ண, ட]",ணட,(V.-D3SN.),
29,"[ட, ், ட]",ட்ட,(V.-D3SN.),
30,"[க, ி, ற]",க்கிற,(V..P3SN.),
31,"[க, ி, ற]",கிற,(V..P3SN.),
32,"[த, ு]",து,(V..P3SN.),


Future

In [566]:
re = '(V.-F3SN.)'
VF3SN_df = merged_df[merged_df['xpos'].str.contains(rf'{re}')]
VF3SN_df = casefilter(VF3SN_df)
VF3SN_df

  return func(self, *args, **kwargs)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Unnamed: 0,form,lemma,upos,xpos,head,Morpheme,NoSpaceAfter,Counts,MorphemeSeparated
0,ப்படும்,படு,V,VR-F3SNPA,13,்படும்,0,12,"[், ப, ட, ு, ம, ்]"
1,செய்யும்,செய்,V,Vr-F3SNAA,15,யும்,0,8,"[ய, ு, ம, ்]"
2,எடுக்கும்,எடு,V,Vr-F3SNAA,6,க்கும்,0,7,"[க, ், க, ு, ம, ்]"
3,அவசியமாகும்,அவசியம்,V,Vr-F3SNAA,0,ாகும்,0,1,"[ா, க, ு, ம, ்]"
4,துறையாகும்,துறை,V,Vr-F3SNAA,0,யாகும்,0,1,"[ய, ா, க, ு, ம, ்]"
5,ஆராயப்படும்,ஆராய்,V,Vr-F3SNAA,0,ப்படும்,0,1,"[ப, ், ப, ட, ு, ம, ்]"
6,ஏற்கும்,ஏல்,V,Vr-F3SNAA,8,றகும்,0,1,"[ற, க, ு, ம, ்]"
7,மேற்கொள்ளும்,மேற்கொள்,V,Vr-F3SNAA,15,ளும்,0,1,"[ள, ு, ம, ்]"
8,படும்,படு,V,VR-F3SNPA,18,ம்,0,0,"[ம, ்]"


In [567]:
kk = VF3SN_df.loc[2, 'MorphemeSeparated']
kk.pop(3)
kk.pop(3)
kk.pop(3)
kk

['க', '்', 'க']

In [568]:
morphemegen(morph_df, kk, re )
morph_df.reset_index(drop=True, inplace=True)
morph_df.tail()

Unnamed: 0,MorphemeSeparated,Morpheme,xpos Regex,Morpheme continued
29,"[ட, ், ட]",ட்ட,(V.-D3SN.),
30,"[க, ி, ற]",க்கிற,(V..P3SN.),
31,"[க, ி, ற]",கிற,(V..P3SN.),
32,"[த, ு]",து,(V..P3SN.),
33,"[க, ், க]",க்க,(V.-F3SN.),


In [569]:
k = VF3SN_df.loc[3, 'MorphemeSeparated']
k.pop(0)
k.pop(1)
k.pop(1)
k.pop(1)
k

['க']

In [570]:
morphemegen(morph_df, k, re )
morph_df.reset_index(drop=True, inplace=True)
morph_df.tail()

Unnamed: 0,MorphemeSeparated,Morpheme,xpos Regex,Morpheme continued
30,"[க, ி, ற]",க்கிற,(V..P3SN.),
31,"[க, ி, ற]",கிற,(V..P3SN.),
32,"[த, ு]",து,(V..P3SN.),
33,"[க, ், க]",க்க,(V.-F3SN.),
34,[க],க,(V.-F3SN.),


In [571]:
um = VF3SN_df.loc[7, 'MorphemeSeparated']
um.pop(0)
um

['ு', 'ம', '்']

In [572]:
morphemegen(morph_df, um, re )
morph_df.reset_index(drop=True, inplace=True)
morph_df.tail()

Unnamed: 0,MorphemeSeparated,Morpheme,xpos Regex,Morpheme continued
31,"[க, ி, ற]",கிற,(V..P3SN.),
32,"[த, ு]",து,(V..P3SN.),
33,"[க, ், க]",க்க,(V.-F3SN.),
34,[க],க,(V.-F3SN.),
35,"[ு, ம, ்]",ும்,(V.-F3SN.),


Tenseless - has negatives

In [573]:
re = '(V.-T3SN.)'
VT3SN_df = merged_df[merged_df['xpos'].str.contains(rf'{re}')]
VT3SN_df = casefilter(VT3SN_df)
VT3SN_df

  return func(self, *args, **kwargs)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Unnamed: 0,form,lemma,upos,xpos,head,Morpheme,NoSpaceAfter,Counts,MorphemeSeparated
0,உள்ளது,உள்,V,VR-T3SNAA,28,ளது,0,41,"[ள, த, ு]"
1,மாட்டாது,மாட்டு,V,VR-T3SN-N,7,ாது,0,6,"[ா, த, ு]"
2,முடியாது,முடி,V,VR-T3SN-N,10,யாது,0,4,"[ய, ா, த, ு]"
3,ஆம்,ஆகு,V,VR-T3SNAA,8,ஆம்,2,4,"[ஆ, ம, ்]"
4,பெற்றுள்ளது,பெறு,V,VR-T3SNAA,7,்றுள்ளது,0,2,"[், ற, ு, ள, ், ள, த, ு]"
5,வேண்டாம்,வேண்டு,V,VR-T3SN-N,1,ாம்,0,1,"[ா, ம, ்]"
6,உள்ளது,உள்,V,VR-T3SNAA,14,உள்ளது,2,0,"[உ, ள, ், ள, த, ு]"


In [574]:
tu = VT3SN_df.loc[0, 'MorphemeSeparated']
tu.pop(0)
tu

['த', 'ு']

In [575]:
morphemegen(morph_df, tu, re )
morph_df.reset_index(drop=True, inplace=True)
morph_df.tail()

Unnamed: 0,MorphemeSeparated,Morpheme,xpos Regex,Morpheme continued
32,"[த, ு]",து,(V..P3SN.),
33,"[க, ், க]",க்க,(V.-F3SN.),
34,[க],க,(V.-F3SN.),
35,"[ு, ம, ்]",ும்,(V.-F3SN.),
36,"[த, ு]",து,(V.-T3SN.),


Negative verbs

In [576]:
re = '(V.......N)'
VN_df = merged_df[merged_df['xpos'].str.contains(rf'{re}')]
VN_df = casefilter(VN_df)
VN_df

  return func(self, *args, **kwargs)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Unnamed: 0,form,lemma,upos,xpos,head,Morpheme,NoSpaceAfter,Counts,MorphemeSeparated
0,பெறாமல்,பெறு,V,Vt-T----N,9,ாமல்,0,4,"[ா, ம, ல, ்]"
1,முடியாது,முடி,V,VR-T3SN-N,10,யாது,0,4,"[ய, ா, த, ு]"
2,ஏற்றுக்கொள்ளாமல்,ஏற்றுக்கொள்,V,Vt-T----N,28,ளாமல்,0,2,"[ள, ா, ம, ல, ்]"
3,இல்லாமல்,இல்,V,Vt-T----N,28,லாமல்,0,2,"[ல, ா, ம, ல, ்]"
4,கவலைப்படாதீர்கள்,கவலைப்படு,V,Vr-T2SH-N,0,ாதீர்கள்,0,1,"[ா, த, ீ, ர, ், க, ள, ்]"
5,வேண்டாம்,வேண்டு,V,VR-T3SN-N,1,ாம்,0,1,"[ா, ம, ்]"
6,செய்யப்படாமல்,செய்,V,Vt-T----N,11,யப்படாமல்,0,1,"[ய, ப, ், ப, ட, ா, ம, ல, ்]"
7,முடியாதது,முடி,V,VzNT3SN-N,8,யாதது,0,1,"[ய, ா, த, த, ு]"
8,முடியாமல்,முடி,V,Vt-T----N,6,யாமல்,0,1,"[ய, ா, ம, ல, ்]"
9,மாட்டாது,மாட்டு,V,VR-T3SN-N,7,ாது,0,0,"[ா, த, ு]"


In [577]:
aa = VN_df.loc[5, 'MorphemeSeparated']
aa.pop(1)
aa.pop(1)
aa

['ா']

In [578]:
morphemegen(morph_df, aa, re )
morph_df.reset_index(drop=True, inplace=True)
morph_df.tail()

Unnamed: 0,MorphemeSeparated,Morpheme,xpos Regex,Morpheme continued
33,"[க, ், க]",க்க,(V.-F3SN.),
34,[க],க,(V.-F3SN.),
35,"[ு, ம, ்]",ும்,(V.-F3SN.),
36,"[த, ு]",து,(V.-T3SN.),
37,[ா],ா,(V.......N),


### 3sh

In [579]:
re = '(V...3SH.)'
V3SH_df = merged_df[merged_df['xpos'].str.contains(rf'{re}')]
V3SH_df = casefilter(V3SH_df)
V3SH_df

  return func(self, *args, **kwargs)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Unnamed: 0,form,lemma,upos,xpos,head,Morpheme,NoSpaceAfter,Counts,MorphemeSeparated
0,உள்ளார்,உள்,V,VR-T3SHAA,23,உள்ளார்,2,31,"[உ, ள, ், ள, ா, ர, ்]"
1,பேசினார்,பேசு,V,Vr-D3SHAA,15,ினார்,0,27,"[ி, ன, ா, ர, ்]"
2,என்றார்,என்,V,Vr-D3SHAA,0,றார்,0,16,"[ற, ா, ர, ்]"
3,வந்தார்,வா,V,Vr-D3SHAA,0,ந்தார்,0,12,"[ந, ், த, ா, ர, ்]"
4,பார்வையிட்டார்,பார்வையிடு,V,Vr-D3SHAA,0,்டார்,0,6,"[், ட, ா, ர, ்]"
5,இருந்தார்,இரு,V,VR-D3SHAA,10,இருந்தார்,2,5,"[இ, ர, ு, ந, ், த, ா, ர, ்]"
6,கொண்டார்,கொள்,V,VR-D3SHAA,9,ணடார்,0,4,"[ண, ட, ா, ர, ்]"
7,பெற்றார்,பெறு,V,Vr-D3SHAA,0,்றார்,0,3,"[், ற, ா, ர, ்]"
8,கொள்வார்,கொள்,V,VR-F3SHAA,23,வார்,0,3,"[வ, ா, ர, ்]"
9,இருக்கிறார்,இரு,V,VR-P3SHAA,11,க்கிறார்,0,3,"[க, ், க, ி, ற, ா, ர, ்]"


In [580]:
ar = V3SH_df.loc[2, 'MorphemeSeparated']
ar.pop(0)
ar

['ா', 'ர', '்']

In [581]:
morphemegen(morph_df, ar, re )
morph_df.reset_index(drop=True, inplace=True)
morph_df.tail()

Unnamed: 0,MorphemeSeparated,Morpheme,xpos Regex,Morpheme continued
34,[க],க,(V.-F3SN.),
35,"[ு, ம, ்]",ும்,(V.-F3SN.),
36,"[த, ு]",து,(V.-T3SN.),
37,[ா],ா,(V.......N),
38,"[ா, ர, ்]",ார்,(V...3SH.),


In [582]:
inf = V3SH_df.loc[1, 'MorphemeSeparated']
inf.pop(2)
inf.pop(2)
inf.pop(2)
inf

['ி', 'ன']

In [583]:
morphemegen(morph_df, inf, re )
morph_df.reset_index(drop=True, inplace=True)
morph_df.tail()

Unnamed: 0,MorphemeSeparated,Morpheme,xpos Regex,Morpheme continued
35,"[ு, ம, ்]",ும்,(V.-F3SN.),
36,"[த, ு]",து,(V.-T3SN.),
37,[ா],ா,(V.......N),
38,"[ா, ர, ்]",ார்,(V...3SH.),
39,"[ி, ன]",ின,(V...3SH.),


In [584]:
nt = V3SH_df.loc[3, 'MorphemeSeparated']
nt.pop(3)
nt.pop(3)
nt.pop(3)
nt

['ந', '்', 'த']

In [585]:
morphemegen(morph_df, nt, re )
morph_df.reset_index(drop=True, inplace=True)
morph_df.tail()

Unnamed: 0,MorphemeSeparated,Morpheme,xpos Regex,Morpheme continued
36,"[த, ு]",து,(V.-T3SN.),
37,[ா],ா,(V.......N),
38,"[ா, ர, ்]",ார்,(V...3SH.),
39,"[ி, ன]",ின,(V...3SH.),
40,"[ந, ், த]",ந்த,(V...3SH.),


In [586]:
t = V3SH_df.loc[4, 'MorphemeSeparated']
t.pop(2)
t.pop(2)
t.pop(2)
t

['்', 'ட']

In [587]:
morphemegen(morph_df, t, re )
morph_df.reset_index(drop=True, inplace=True)
morph_df.tail()

Unnamed: 0,MorphemeSeparated,Morpheme,xpos Regex,Morpheme continued
37,[ா],ா,(V.......N),
38,"[ா, ர, ்]",ார்,(V...3SH.),
39,"[ி, ன]",ின,(V...3SH.),
40,"[ந, ், த]",ந்த,(V...3SH.),
41,"[், ட]",்ட,(V...3SH.),


In [588]:
nt = V3SH_df.loc[6, 'MorphemeSeparated']
nt.pop(2)
nt.pop(2)
nt.pop(2)
nt

['ண', 'ட']

In [589]:
morphemegen(morph_df, nt, re )
morph_df.reset_index(drop=True, inplace=True)
morph_df.tail()

Unnamed: 0,MorphemeSeparated,Morpheme,xpos Regex,Morpheme continued
38,"[ா, ர, ்]",ார்,(V...3SH.),
39,"[ி, ன]",ின,(V...3SH.),
40,"[ந, ், த]",ந்த,(V...3SH.),
41,"[், ட]",்ட,(V...3SH.),
42,"[ண, ட]",ணட,(V...3SH.),


In [590]:
v = V3SH_df.loc[8, 'MorphemeSeparated']
v.pop(1)
v.pop(1)
v.pop(1)
v

['வ']

In [591]:
morphemegen(morph_df, v, re )
morph_df.reset_index(drop=True, inplace=True)
morph_df.tail()

Unnamed: 0,MorphemeSeparated,Morpheme,xpos Regex,Morpheme continued
39,"[ி, ன]",ின,(V...3SH.),
40,"[ந, ், த]",ந்த,(V...3SH.),
41,"[், ட]",்ட,(V...3SH.),
42,"[ண, ட]",ணட,(V...3SH.),
43,[வ],வ,(V...3SH.),


In [592]:
kir = V3SH_df.loc[9, 'MorphemeSeparated']
kir.pop(5)
kir.pop(5)
kir.pop(5)
kir

['க', '்', 'க', 'ி', 'ற']

In [593]:
morphemegen(morph_df, kir, re )
morph_df.reset_index(drop=True, inplace=True)
morph_df.tail()

Unnamed: 0,MorphemeSeparated,Morpheme,xpos Regex,Morpheme continued
40,"[ந, ், த]",ந்த,(V...3SH.),
41,"[், ட]",்ட,(V...3SH.),
42,"[ண, ட]",ணட,(V...3SH.),
43,[வ],வ,(V...3SH.),
44,"[க, ், க, ி, ற]",க்கிற,(V...3SH.),


In [594]:
kir = V3SH_df.loc[12, 'MorphemeSeparated']
kir.pop(3)
kir.pop(3)
kir.pop(3)
kir

['க', 'ி', 'ற']

In [595]:
morphemegen(morph_df, kir, re )
morph_df.reset_index(drop=True, inplace=True)
morph_df.tail()

Unnamed: 0,MorphemeSeparated,Morpheme,xpos Regex,Morpheme continued
41,"[், ட]",்ட,(V...3SH.),
42,"[ண, ட]",ணட,(V...3SH.),
43,[வ],வ,(V...3SH.),
44,"[க, ், க, ி, ற]",க்கிற,(V...3SH.),
45,"[க, ி, ற]",கிற,(V...3SH.),


In [596]:
a = V3SH_df.loc[13, 'MorphemeSeparated']
a.pop(0)
a.pop(3)
a.pop(3)
a.pop(3)
a

['ட', '்', 'ட']

In [597]:
morphemegen(morph_df, a, re )
morph_df.reset_index(drop=True, inplace=True)
morph_df.tail()

Unnamed: 0,MorphemeSeparated,Morpheme,xpos Regex,Morpheme continued
42,"[ண, ட]",ணட,(V...3SH.),
43,[வ],வ,(V...3SH.),
44,"[க, ், க, ி, ற]",க்கிற,(V...3SH.),
45,"[க, ி, ற]",கிற,(V...3SH.),
46,"[ட, ், ட]",ட்ட,(V...3SH.),


In [598]:
b = V3SH_df.loc[18, 'MorphemeSeparated']
b.pop(1)
b.pop(1)
b.pop(1)
b

['த']

In [599]:
morphemegen(morph_df, b, re )
morph_df.reset_index(drop=True, inplace=True)
morph_df.tail()

Unnamed: 0,MorphemeSeparated,Morpheme,xpos Regex,Morpheme continued
43,[வ],வ,(V...3SH.),
44,"[க, ், க, ி, ற]",க்கிற,(V...3SH.),
45,"[க, ி, ற]",கிற,(V...3SH.),
46,"[ட, ், ட]",ட்ட,(V...3SH.),
47,[த],த,(V...3SH.),


In [600]:
c = V3SH_df.loc[20, 'MorphemeSeparated']
c.pop(3)
c.pop(3)
c.pop(3)
c

['த', '்', 'த']

In [601]:
morphemegen(morph_df, c, re )
morph_df.reset_index(drop=True, inplace=True)
morph_df.tail()

Unnamed: 0,MorphemeSeparated,Morpheme,xpos Regex,Morpheme continued
44,"[க, ், க, ி, ற]",க்கிற,(V...3SH.),
45,"[க, ி, ற]",கிற,(V...3SH.),
46,"[ட, ், ட]",ட்ட,(V...3SH.),
47,[த],த,(V...3SH.),
48,"[த, ், த]",த்த,(V...3SH.),


### 1pl

In [607]:
re = '(V...1P..)'
V1P_df = merged_df[merged_df['xpos'].str.contains(rf'{re}')]
V1P_df = casefilter(V1P_df)
V1P_df

  return func(self, *args, **kwargs)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Unnamed: 0,form,lemma,upos,xpos,head,Morpheme,NoSpaceAfter,Counts,MorphemeSeparated
0,வற்புறுத்துகிறோம்,வற்புறுத்து,V,Vr-P1P-AA,0,கிறோம்,0,5,"[க, ி, ற, ோ, ம, ்]"
1,உள்ளோம்,உள்,V,VR-T1PAAA,11,ளோம்,0,4,"[ள, ோ, ம, ்]"
2,இருக்கிறோம்,இரு,V,VR-P1PAAA,33,க்கிறோம்,0,2,"[க, ், க, ி, ற, ோ, ம, ்]"
3,வாழ்ந்தோம்,வாழ்,V,Vr-D1P-AA,7,ந்தோம்,0,2,"[ந, ், த, ோ, ம, ்]"
4,உள்ளோம்,உள்,V,VR-T1PAAA,11,உள்ளோம்,2,1,"[உ, ள, ், ள, ோ, ம, ்]"
5,வைப்போம்,வை,V,Vr-F1P-AA,8,ப்போம்,0,1,"[ப, ், ப, ோ, ம, ்]"
6,கட்டுவோம்,கட்டு,V,Vr-F1P-AA,11,வோம்,0,0,"[வ, ோ, ம, ்]"


In [608]:
c = V1P_df.loc[0, 'MorphemeSeparated']
c = c[3:]
c

['ோ', 'ம', '்']

In [604]:
morphemegen(morph_df, c, re )
morph_df.reset_index(drop=True, inplace=True)
morph_df.tail()

Unnamed: 0,MorphemeSeparated,Morpheme,xpos Regex,Morpheme continued
45,"[க, ி, ற]",கிற,(V...3SH.),
46,"[ட, ், ட]",ட்ட,(V...3SH.),
47,[த],த,(V...3SH.),
48,"[த, ், த]",த்த,(V...3SH.),
49,"[ோ, ம, ்]",ோம்,(V...1P..),


In [610]:
kir= V1P_df.loc[0, 'MorphemeSeparated']
kir = kir[0:3]
kir

['க', 'ி', 'ற']

In [611]:
morphemegen(morph_df, kir, re )
morph_df.reset_index(drop=True, inplace=True)
morph_df.tail()

Unnamed: 0,MorphemeSeparated,Morpheme,xpos Regex,Morpheme continued
46,"[ட, ், ட]",ட்ட,(V...3SH.),
47,[த],த,(V...3SH.),
48,"[த, ், த]",த்த,(V...3SH.),
49,"[ோ, ம, ்]",ோம்,(V...1P..),
50,"[க, ி, ற]",கிற,(V...1P..),


In [612]:
kir= V1P_df.loc[2, 'MorphemeSeparated']
kir.pop(5)
kir.pop(5)
kir.pop(5)
kir

['க', '்', 'க', 'ி', 'ற']

In [613]:
morphemegen(morph_df, kir, re )
morph_df.reset_index(drop=True, inplace=True)
morph_df.tail()

Unnamed: 0,MorphemeSeparated,Morpheme,xpos Regex,Morpheme continued
47,[த],த,(V...3SH.),
48,"[த, ், த]",த்த,(V...3SH.),
49,"[ோ, ம, ்]",ோம்,(V...1P..),
50,"[க, ி, ற]",கிற,(V...1P..),
51,"[க, ், க, ி, ற]",க்கிற,(V...1P..),


In [614]:
nt= V1P_df.loc[3, 'MorphemeSeparated']
nt.pop(3)
nt.pop(3)
nt.pop(3)
nt

['ந', '்', 'த']

In [615]:
morphemegen(morph_df, nt, re )
morph_df.reset_index(drop=True, inplace=True)
morph_df.tail()

Unnamed: 0,MorphemeSeparated,Morpheme,xpos Regex,Morpheme continued
48,"[த, ், த]",த்த,(V...3SH.),
49,"[ோ, ம, ்]",ோம்,(V...1P..),
50,"[க, ி, ற]",கிற,(V...1P..),
51,"[க, ், க, ி, ற]",க்கிற,(V...1P..),
52,"[ந, ், த]",ந்த,(V...1P..),


In [616]:
p= V1P_df.loc[5, 'MorphemeSeparated']
p.pop(3)
p.pop(3)
p.pop(3)
p

['ப', '்', 'ப']

In [617]:
morphemegen(morph_df, p, re )
morph_df.reset_index(drop=True, inplace=True)
morph_df.tail()

Unnamed: 0,MorphemeSeparated,Morpheme,xpos Regex,Morpheme continued
49,"[ோ, ம, ்]",ோம்,(V...1P..),
50,"[க, ி, ற]",கிற,(V...1P..),
51,"[க, ், க, ி, ற]",க்கிற,(V...1P..),
52,"[ந, ், த]",ந்த,(V...1P..),
53,"[ப, ், ப]",ப்ப,(V...1P..),


In [618]:
v= V1P_df.loc[6, 'MorphemeSeparated']
v.pop(1)
v.pop(1)
v.pop(1)
v

['வ']

In [619]:
morphemegen(morph_df, v, re )
morph_df.reset_index(drop=True, inplace=True)
morph_df.tail()

Unnamed: 0,MorphemeSeparated,Morpheme,xpos Regex,Morpheme continued
50,"[க, ி, ற]",கிற,(V...1P..),
51,"[க, ், க, ி, ற]",க்கிற,(V...1P..),
52,"[ந, ், த]",ந்த,(V...1P..),
53,"[ப, ், ப]",ப்ப,(V...1P..),
54,[வ],வ,(V...1P..),


### 2pl

In [620]:
re = '(V...2P..)'
V2P_df = merged_df[merged_df['xpos'].str.contains(rf'{re}')]
V2P_df = casefilter(V2P_df)
V2P_df



  return func(self, *args, **kwargs)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Unnamed: 0,form,lemma,upos,xpos,head,Morpheme,NoSpaceAfter,Counts,MorphemeSeparated
0,இருங்கள்,இரு,V,Vj-T2PAAA,0,ங்கள்,0,1,"[ங, ், க, ள, ்]"
1,இருக்கிறீர்கள்,இரு,V,VR-P2PHAA,4,இருக்கிறீர்கள்,2,1,"[இ, ர, ு, க, ், க, ி, ற, ீ, ர, ், க, ள, ்]"
2,விரும்புகிறீர்கள்,விரும்பு,V,Vr-P2PHAA,25,கிறீர்கள்,0,0,"[க, ி, ற, ீ, ர, ், க, ள, ்]"


In [622]:
kir= V2P_df.loc[1, 'MorphemeSeparated']
kir = kir[3:8]
kir

['க', '்', 'க', 'ி', 'ற']

In [624]:
morphemegen(morph_df, kir, re )
morph_df.reset_index(drop=True, inplace=True)
morph_df.tail()

Unnamed: 0,MorphemeSeparated,Morpheme,xpos Regex,Morpheme continued
51,"[க, ், க, ி, ற]",க்கிற,(V...1P..),
52,"[ந, ், த]",ந்த,(V...1P..),
53,"[ப, ், ப]",ப்ப,(V...1P..),
54,[வ],வ,(V...1P..),
55,"[க, ், க, ி, ற]",க்கிற,(V...2P..),


In [625]:
kir= V2P_df.loc[2, 'MorphemeSeparated']
kir.pop(3)
kir.pop(3)
kir.pop(3)
kir.pop(3)
kir.pop(3)
kir.pop(3)
kir

['க', 'ி', 'ற']

In [626]:
morphemegen(morph_df, kir, re )
morph_df.reset_index(drop=True, inplace=True)
morph_df.tail()

Unnamed: 0,MorphemeSeparated,Morpheme,xpos Regex,Morpheme continued
52,"[ந, ், த]",ந்த,(V...1P..),
53,"[ப, ், ப]",ப்ப,(V...1P..),
54,[வ],வ,(V...1P..),
55,"[க, ், க, ி, ற]",க்கிற,(V...2P..),
56,"[க, ி, ற]",கிற,(V...2P..),


In [627]:
irkal= V2P_df.loc[1, 'MorphemeSeparated']
irkal = irkal[8:]
irkal

['ீ', 'ர', '்', 'க', 'ள', '்']

In [628]:
morphemegen(morph_df, irkal, re )
morph_df.reset_index(drop=True, inplace=True)
morph_df.tail()

Unnamed: 0,MorphemeSeparated,Morpheme,xpos Regex,Morpheme continued
53,"[ப, ், ப]",ப்ப,(V...1P..),
54,[வ],வ,(V...1P..),
55,"[க, ், க, ி, ற]",க்கிற,(V...2P..),
56,"[க, ி, ற]",கிற,(V...2P..),
57,"[ீ, ர, ், க, ள, ்]",ீர்கள்,(V...2P..),


### 3plmf

Past

In [639]:
re = '(V..D3PH.)'
VD3PH_df = merged_df[merged_df['xpos'].str.contains(rf'{re}')]
VD3PH_df = casefilter(VD3PH_df)
VD3PH_df

  return func(self, *args, **kwargs)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Unnamed: 0,form,lemma,upos,xpos,head,Morpheme,NoSpaceAfter,Counts,MorphemeSeparated
0,தெரிவித்தனர்,தெரிவி,V,Vr-D3PHAA,0,த்தனர்,0,8,"[த, ், த, ன, ர, ்]"
1,விட்டனர்,விடு,V,VR-D3PHAA,12,்டனர்,0,6,"[், ட, ன, ர, ்]"
2,சென்றனர்,செல்,V,Vr-D3PHAA,9,னறனர்,0,3,"[ன, ற, ன, ர, ்]"
3,வரவேற்றனர்,வரவேல்,V,Vr-D3PHAA,0,றறனர்,0,2,"[ற, ற, ன, ர, ்]"
4,தயாராகினர்,தயாராகு,V,Vr-D3PHAA,0,ினர்,0,2,"[ி, ன, ர, ்]"
5,செய்தனர்,செய்,V,Vr-D3PHAA,11,தனர்,0,2,"[த, ன, ர, ்]"
6,கொண்டனர்,கொள்,V,VR-D3PHAA,23,ணடனர்,0,2,"[ண, ட, ன, ர, ்]"
7,கேட்டனர்,கேள்,V,Vr-D3PHAA,0,டடனர்,0,1,"[ட, ட, ன, ர, ்]"
8,என்றனர்,என்,V,Vr-D3PHAA,0,றனர்,0,1,"[ற, ன, ர, ்]"
9,வந்தனர்,வா,V,VR-D3PHAA,11,ந்தனர்,0,0,"[ந, ், த, ன, ர, ்]"


In [640]:
n= VD3PH_df.loc[1, 'MorphemeSeparated']
n.pop(0)
n.pop(0)
n.pop(1)
n.pop(1)
n

['ன']

In [643]:
morphemegen(morph_df, n, re )
morph_df.reset_index(drop=True, inplace=True)
morph_df.tail()

Unnamed: 0,MorphemeSeparated,Morpheme,xpos Regex,Morpheme continued
55,"[க, ், க, ி, ற]",க்கிற,(V...2P..),
56,"[க, ி, ற]",கிற,(V...2P..),
57,"[ீ, ர, ், க, ள, ்]",ீர்கள்,(V...2P..),
58,"[த, ், த]",த்த,(V..D3PH.),
59,[ன],ன,(V..D3PH.),


In [644]:
ar= VD3PH_df.loc[2, 'MorphemeSeparated']
ar.pop(0)
ar.pop(0)
ar.pop(0)
ar

['ர', '்']

In [645]:
morphemegen(morph_df, ar, re )
morph_df.reset_index(drop=True, inplace=True)
morph_df.tail()

Unnamed: 0,MorphemeSeparated,Morpheme,xpos Regex,Morpheme continued
56,"[க, ி, ற]",கிற,(V...2P..),
57,"[ீ, ர, ், க, ள, ்]",ீர்கள்,(V...2P..),
58,"[த, ், த]",த்த,(V..D3PH.),
59,[ன],ன,(V..D3PH.),
60,"[ர, ்]",ர்,(V..D3PH.),


Present

In [669]:
re = '(V..P3PH.)'
VP3PH_df = merged_df[merged_df['xpos'].str.contains(rf'{re}')]
VP3PH_df = casefilter(VP3PH_df)
VP3PH_df

  return func(self, *args, **kwargs)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Unnamed: 0,form,lemma,upos,xpos,head,Morpheme,NoSpaceAfter,Counts,MorphemeSeparated
0,தெரிவிக்கின்றனர்,தெரிவி,V,Vr-P3PHAA,0,க்கின்றனர்,0,3,"[க, ், க, ி, ன, ், ற, ன, ர, ்]"
1,இருக்கினறனர்,இரு,V,VR-P3PHAA,16,இருக்கினறனர்,2,1,"[இ, ர, ு, க, ், க, ி, ன, ற, ன, ர, ்]"
2,இருக்கிறார்கள்,இரு,V,VR-P3PHAA,12,க்கிறார்கள்,0,1,"[க, ், க, ி, ற, ா, ர, ், க, ள, ்]"
3,வழிபடுகிறார்கள்,வழிபடு,V,Vr-P3PHAA,0,கிறார்கள்,0,1,"[க, ி, ற, ா, ர, ், க, ள, ்]"
4,படுகின்றனர்,படு,V,VR-P3PHPA,13,கின்றனர்,0,0,"[க, ி, ன, ், ற, ன, ர, ்]"


In [670]:
kinr= VP3PH_df.loc[0, 'MorphemeSeparated']
kinr.pop(7)
kinr.pop(7)
kinr.pop(7)
kinr

['க', '்', 'க', 'ி', 'ன', '்', 'ற']

In [671]:
morphemegen(morph_df, kinr, re )
morph_df.reset_index(drop=True, inplace=True)
morph_df.tail()

Unnamed: 0,MorphemeSeparated,Morpheme,xpos Regex,Morpheme continued
56,"[க, ி, ற]",கிற,(V...2P..),
57,"[ீ, ர, ், க, ள, ்]",ீர்கள்,(V...2P..),
58,[ன],ன,(V..D3PH.),
59,"[ர, ்]",ர்,(V..D3PH.),
60,"[க, ், க, ி, ன, ், ற]",க்கின்ற,(V..P3PH.),


In [672]:
kir= VP3PH_df.loc[2, 'MorphemeSeparated']
kir.pop(5)
kir.pop(5)
kir.pop(5)
kir.pop(5)
kir.pop(5)
kir.pop(5)
kir

['க', '்', 'க', 'ி', 'ற']

In [673]:
morphemegen(morph_df, kir, re )
morph_df.reset_index(drop=True, inplace=True)
morph_df.tail()

Unnamed: 0,MorphemeSeparated,Morpheme,xpos Regex,Morpheme continued
57,"[ீ, ர, ், க, ள, ்]",ீர்கள்,(V...2P..),
58,[ன],ன,(V..D3PH.),
59,"[ர, ்]",ர்,(V..D3PH.),
60,"[க, ், க, ி, ன, ், ற]",க்கின்ற,(V..P3PH.),
61,"[க, ், க, ி, ற]",க்கிற,(V..P3PH.),


In [674]:
kir= VP3PH_df.loc[3, 'MorphemeSeparated']
kir = kir[0:3]
kir

['க', 'ி', 'ற']

In [675]:
morphemegen(morph_df, kir, re )
morph_df.reset_index(drop=True, inplace=True)
morph_df.tail()

Unnamed: 0,MorphemeSeparated,Morpheme,xpos Regex,Morpheme continued
58,[ன],ன,(V..D3PH.),
59,"[ர, ்]",ர்,(V..D3PH.),
60,"[க, ், க, ி, ன, ், ற]",க்கின்ற,(V..P3PH.),
61,"[க, ், க, ி, ற]",க்கிற,(V..P3PH.),
62,"[க, ி, ற]",கிற,(V..P3PH.),


In [676]:
ar= VP3PH_df.loc[1, 'MorphemeSeparated']
ar = ar[10:]
ar

['ர', '்']

In [677]:
morphemegen(morph_df, ar, re )
morph_df.reset_index(drop=True, inplace=True)
morph_df.tail()

Unnamed: 0,MorphemeSeparated,Morpheme,xpos Regex,Morpheme continued
59,"[ர, ்]",ர்,(V..D3PH.),
60,"[க, ், க, ி, ன, ், ற]",க்கின்ற,(V..P3PH.),
61,"[க, ், க, ி, ற]",க்கிற,(V..P3PH.),
62,"[க, ி, ற]",கிற,(V..P3PH.),
63,"[ர, ்]",ர்,(V..P3PH.),


In [678]:
arkal= VP3PH_df.loc[3, 'MorphemeSeparated']
arkal.pop(0)
arkal.pop(0)
arkal.pop(0)
arkal

['ா', 'ர', '்', 'க', 'ள', '்']

In [679]:
morphemegen(morph_df, arkal, re )
morph_df.reset_index(drop=True, inplace=True)
morph_df.tail()

Unnamed: 0,MorphemeSeparated,Morpheme,xpos Regex,Morpheme continued
60,"[க, ், க, ி, ன, ், ற]",க்கின்ற,(V..P3PH.),
61,"[க, ், க, ி, ற]",க்கிற,(V..P3PH.),
62,"[க, ி, ற]",கிற,(V..P3PH.),
63,"[ர, ்]",ர்,(V..P3PH.),
64,"[ா, ர, ், க, ள, ்]",ார்கள்,(V..P3PH.),


Future

In [680]:

re = '(V..F3PH.)'
VF3PH_df = merged_df[merged_df['xpos'].str.contains(rf'{re}')]
VF3PH_df = casefilter(VF3PH_df)
VF3PH_df

  return func(self, *args, **kwargs)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Unnamed: 0,form,lemma,upos,xpos,head,Morpheme,NoSpaceAfter,Counts,MorphemeSeparated
0,நடத்துவர்,நடத்து,V,Vr-F3PHAA,0,வர்,0,1,"[வ, ர, ்]"
1,படுவார்கள்,படு,V,VR-F3PHPA,39,வார்கள்,0,0,"[வ, ா, ர, ், க, ள, ்]"


In [681]:
morphemegen(morph_df, arkal, re )
morph_df.reset_index(drop=True, inplace=True)
morph_df.tail()

Unnamed: 0,MorphemeSeparated,Morpheme,xpos Regex,Morpheme continued
61,"[க, ், க, ி, ற]",க்கிற,(V..P3PH.),
62,"[க, ி, ற]",கிற,(V..P3PH.),
63,"[ர, ்]",ர்,(V..P3PH.),
64,"[ா, ர, ், க, ள, ்]",ார்கள்,(V..P3PH.),
65,"[ா, ர, ், க, ள, ்]",ார்கள்,(V..F3PH.),


In [682]:
morphemegen(morph_df, ar, re )
morph_df.reset_index(drop=True, inplace=True)
morph_df.tail()

Unnamed: 0,MorphemeSeparated,Morpheme,xpos Regex,Morpheme continued
62,"[க, ி, ற]",கிற,(V..P3PH.),
63,"[ர, ்]",ர்,(V..P3PH.),
64,"[ா, ர, ், க, ள, ்]",ார்கள்,(V..P3PH.),
65,"[ா, ர, ், க, ள, ்]",ார்கள்,(V..F3PH.),
66,"[ர, ்]",ர்,(V..F3PH.),


In [683]:
var= VF3PH_df.loc[0, 'MorphemeSeparated']
var.pop(1)
var.pop(1)
var


['வ']

In [684]:
morphemegen(morph_df, var, re )
morph_df.reset_index(drop=True, inplace=True)
morph_df.tail()

Unnamed: 0,MorphemeSeparated,Morpheme,xpos Regex,Morpheme continued
63,"[ர, ்]",ர்,(V..P3PH.),
64,"[ா, ர, ், க, ள, ்]",ார்கள்,(V..P3PH.),
65,"[ா, ர, ், க, ள, ்]",ார்கள்,(V..F3PH.),
66,"[ர, ்]",ர்,(V..F3PH.),
67,[வ],வ,(V..F3PH.),


Tenseless

In [685]:
re = '(V..T3PH.)'
VT3PH_df = merged_df[merged_df['xpos'].str.contains(rf'{re}')]
VT3PH_df = casefilter(VT3PH_df)
VT3PH_df

  return func(self, *args, **kwargs)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Unnamed: 0,form,lemma,upos,xpos,head,Morpheme,NoSpaceAfter,Counts,MorphemeSeparated
0,மாட்டார்கள்,மாட்டு,V,VR-T3PHAA,9,ார்கள்,0,0,"[ா, ர, ், க, ள, ்]"


In [686]:
morphemegen(morph_df, arkal, re )
morph_df.reset_index(drop=True, inplace=True)
morph_df.tail()

Unnamed: 0,MorphemeSeparated,Morpheme,xpos Regex,Morpheme continued
64,"[ா, ர, ், க, ள, ்]",ார்கள்,(V..P3PH.),
65,"[ா, ர, ், க, ள, ்]",ார்கள்,(V..F3PH.),
66,"[ர, ்]",ர்,(V..F3PH.),
67,[வ],வ,(V..F3PH.),
68,"[ா, ர, ், க, ள, ்]",ார்கள்,(V..T3PH.),


### 3pln

Past

In [687]:
re = '(V..D3PN.)'
VD3PN_df = merged_df[merged_df['xpos'].str.contains(rf'{re}')]
VD3PN_df = casefilter(VD3PN_df)
VD3PN_df

  return func(self, *args, **kwargs)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Unnamed: 0,form,lemma,upos,xpos,head,Morpheme,NoSpaceAfter,Counts,MorphemeSeparated
0,தெரிவித்தன,தெரிவி,V,Vr-D3PNAA,0,த்தன,0,7,"[த, ், த, ன]"
1,பட்டன,படு,V,VR-D3PNPA,25,்டன,0,3,"[், ட, ன]"
2,பதிவாயின,பதிவாகு,V,Vr-D3PNAA,0,யின,0,1,"[ய, ி, ன]"
3,வந்தன,வா,V,VR-D3PNAA,15,ந்தன,0,0,"[ந, ், த, ன]"


In [688]:
tt= VD3PN_df.loc[0, 'MorphemeSeparated']
tt = tt[0:3]
tt

['த', '்', 'த']

In [689]:
morphemegen(morph_df, tt, re )
morph_df.reset_index(drop=True, inplace=True)
morph_df.tail()

Unnamed: 0,MorphemeSeparated,Morpheme,xpos Regex,Morpheme continued
65,"[ா, ர, ், க, ள, ்]",ார்கள்,(V..F3PH.),
66,"[ர, ்]",ர்,(V..F3PH.),
67,[வ],வ,(V..F3PH.),
68,"[ா, ர, ், க, ள, ்]",ார்கள்,(V..T3PH.),
69,"[த, ், த]",த்த,(V..D3PN.),


In [690]:
tt= VD3PN_df.loc[1, 'MorphemeSeparated']
tt = tt[0:2]
tt

['்', 'ட']

In [691]:
morphemegen(morph_df, tt, re )
morph_df.reset_index(drop=True, inplace=True)
morph_df.tail()

Unnamed: 0,MorphemeSeparated,Morpheme,xpos Regex,Morpheme continued
66,"[ர, ்]",ர்,(V..F3PH.),
67,[வ],வ,(V..F3PH.),
68,"[ா, ர, ், க, ள, ்]",ார்கள்,(V..T3PH.),
69,"[த, ், த]",த்த,(V..D3PN.),
70,"[், ட]",்ட,(V..D3PN.),


In [692]:
inn= VD3PN_df.loc[2, 'MorphemeSeparated']
inn = inn[1:2]
inn

['ி']

In [693]:
morphemegen(morph_df, inn, re )
morph_df.reset_index(drop=True, inplace=True)
morph_df.tail()

Unnamed: 0,MorphemeSeparated,Morpheme,xpos Regex,Morpheme continued
67,[வ],வ,(V..F3PH.),
68,"[ா, ர, ், க, ள, ்]",ார்கள்,(V..T3PH.),
69,"[த, ், த]",த்த,(V..D3PN.),
70,"[், ட]",்ட,(V..D3PN.),
71,[ி],ி,(V..D3PN.),


In [694]:
nt= VD3PN_df.loc[3, 'MorphemeSeparated']
nt = nt[0:3]
nt

['ந', '்', 'த']

In [695]:
morphemegen(morph_df, nt, re )
morph_df.reset_index(drop=True, inplace=True)
morph_df.tail()

Unnamed: 0,MorphemeSeparated,Morpheme,xpos Regex,Morpheme continued
68,"[ா, ர, ், க, ள, ்]",ார்கள்,(V..T3PH.),
69,"[த, ், த]",த்த,(V..D3PN.),
70,"[், ட]",்ட,(V..D3PN.),
71,[ி],ி,(V..D3PN.),
72,"[ந, ், த]",ந்த,(V..D3PN.),


In [696]:
n= VD3PN_df.loc[0, 'MorphemeSeparated']
n = n[3:]
n

['ன']

In [697]:
morphemegen(morph_df, n, re )
morph_df.reset_index(drop=True, inplace=True)
morph_df.tail()

Unnamed: 0,MorphemeSeparated,Morpheme,xpos Regex,Morpheme continued
69,"[த, ், த]",த்த,(V..D3PN.),
70,"[், ட]",்ட,(V..D3PN.),
71,[ி],ி,(V..D3PN.),
72,"[ந, ், த]",ந்த,(V..D3PN.),
73,[ன],ன,(V..D3PN.),


Present

In [698]:
re = '(V..P3PN.)'
VP3PN_df = merged_df[merged_df['xpos'].str.contains(rf'{re}')]
VP3PN_df = casefilter(VP3PN_df)
VP3PN_df

  return func(self, *args, **kwargs)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Unnamed: 0,form,lemma,upos,xpos,head,Morpheme,NoSpaceAfter,Counts,MorphemeSeparated
0,இருக்கின்றன,இரு,V,VR-P3PNAA,20,க்கின்றன,0,1,"[க, ், க, ி, ன, ், ற, ன]"
1,படுகின்றன,படு,V,VR-P3PNPA,18,கின்றன,0,0,"[க, ி, ன, ், ற, ன]"


In [699]:
kinr= VP3PN_df.loc[0, 'MorphemeSeparated']
kinr = kinr[0:7]
kinr

['க', '்', 'க', 'ி', 'ன', '்', 'ற']

In [700]:
morphemegen(morph_df, kinr, re )
morph_df.reset_index(drop=True, inplace=True)
morph_df.tail()

Unnamed: 0,MorphemeSeparated,Morpheme,xpos Regex,Morpheme continued
70,"[், ட]",்ட,(V..D3PN.),
71,[ி],ி,(V..D3PN.),
72,"[ந, ், த]",ந்த,(V..D3PN.),
73,[ன],ன,(V..D3PN.),
74,"[க, ், க, ி, ன, ், ற]",க்கின்ற,(V..P3PN.),


In [701]:
kinr= VP3PN_df.loc[1, 'MorphemeSeparated']
kinr = kinr[0:5]
kinr

['க', 'ி', 'ன', '்', 'ற']

In [702]:
morphemegen(morph_df, kinr, re )
morph_df.reset_index(drop=True, inplace=True)
morph_df.tail()

Unnamed: 0,MorphemeSeparated,Morpheme,xpos Regex,Morpheme continued
71,[ி],ி,(V..D3PN.),
72,"[ந, ், த]",ந்த,(V..D3PN.),
73,[ன],ன,(V..D3PN.),
74,"[க, ், க, ி, ன, ், ற]",க்கின்ற,(V..P3PN.),
75,"[க, ி, ன, ், ற]",கின்ற,(V..P3PN.),


In [703]:
morphemegen(morph_df, n, re )
morph_df.reset_index(drop=True, inplace=True)
morph_df.tail()

Unnamed: 0,MorphemeSeparated,Morpheme,xpos Regex,Morpheme continued
72,"[ந, ், த]",ந்த,(V..D3PN.),
73,[ன],ன,(V..D3PN.),
74,"[க, ், க, ி, ன, ், ற]",க்கின்ற,(V..P3PN.),
75,"[க, ி, ன, ், ற]",கின்ற,(V..P3PN.),
76,[ன],ன,(V..P3PN.),


Future

In [704]:
re = '(V..F3PN.)'
VF3PN_df = merged_df[merged_df['xpos'].str.contains(rf'{re}')]
VF3PN_df = casefilter(VF3PN_df)
VF3PN_df

  return func(self, *args, **kwargs)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Unnamed: 0,form,lemma,upos,xpos,head,Morpheme,NoSpaceAfter,Counts,MorphemeSeparated


Tenseless

In [705]:
re = '(V..T3PN.)'
VT3PN_df = merged_df[merged_df['xpos'].str.contains(rf'{re}')]
VT3PN_df = casefilter(VT3PN_df)
VT3PN_df

  return func(self, *args, **kwargs)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Unnamed: 0,form,lemma,upos,xpos,head,Morpheme,NoSpaceAfter,Counts,MorphemeSeparated
0,உள்ளன,உள்,V,VR-T3PNAA,12,ளன,0,13,"[ள, ன]"
1,இல்லை,இல்,V,Vr-T3PNAA,17,லை,0,7,"[ல, ை]"
2,உள்ளன,உள்,V,VR-T3PNAA,0,உள்ளன,2,3,"[உ, ள, ், ள, ன]"
3,கிடைக்காது,கிடை,V,Vr-T3PNAA,0,க்காது,0,2,"[க, ், க, ா, த, ு]"
4,அல்ல,அல்,V,VR-T3PNAA,0,ல,0,2,[ல]
5,பலனில்லை,பலன்,V,Vr-T3PNAA,0,ில்லை,0,2,"[ி, ல, ், ல, ை]"
6,கட்டுப்படுத்தாது,கட்டுப்பாடு,V,Vr-T3PNAA,0,டுத்தாது,0,1,"[ட, ு, த, ், த, ா, த, ு]"
7,வேண்டாம்,வேண்டு,V,Vr-T3PNAA,0,ாம்,0,1,"[ா, ம, ்]"
8,சரியில்லை,சரி,V,Vr-T3PNAA,0,யில்லை,0,1,"[ய, ி, ல, ், ல, ை]"
9,இல்லை,இல்,V,VR-T3PNAA,3,இல்லை,2,0,"[இ, ல, ், ல, ை]"


# Verb - Infinitive

In [335]:
re = '(Vu.......)'
Vu_df = merged_df[merged_df['xpos'].str.contains(rf'{re}')]
Vu_df = casefilter(Vu_df)
Vu_df

  return func(self, *args, **kwargs)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Unnamed: 0,form,lemma,upos,xpos,head,Morpheme,NoSpaceAfter,Counts,MorphemeSeparated
0,நிறைவேற்றப்,நிறைவேற்று,V,Vu-T---AA,0,ப்,0,49,"[ப, ்]"
1,அமைக்க,அமை,V,Vu-T---AA,9,க்க,1,48,"[க, ், க]"
2,தெரிய,தெரி,V,Vu-T---AA,0,ய,0,19,[ய]
3,செய்யப்,செய்,V,Vu-T---AA,17,யப்,1,15,"[ய, ப, ்]"
4,விரிவுபடுத்தவ்,விரிவுபடுத்து,V,Vu-T---AA,24,வ்,1,11,"[வ, ்]"
5,குறிப்பிடத்,குறிப்பிடு,V,Vu-T---AA,0,த்,0,9,"[த, ்]"
6,அமைக்கவ்,அமை,V,Vu-T---AA,24,க்கவ்,1,8,"[க, ், க, வ, ்]"
7,ஏற்க,ஏல்,V,Vu-T---AA,29,றக,0,5,"[ற, க]"
8,இடம்தரக்,இடம்தரு,V,Vu-T---AA,6,க்,0,5,"[க, ்]"
9,ஏற்படுத்த,ஏற்படு,V,Vu-T---AA,12,த்த,0,4,"[த, ், த]"


# Verb - Participle

In [706]:
re = '(VT.......)'
VT_df = merged_df[merged_df['xpos'].str.contains(rf'{re}')]
VT_df = casefilter(VT_df)
VT_df

  return func(self, *args, **kwargs)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Unnamed: 0,form,lemma,upos,xpos,head,Morpheme,NoSpaceAfter,Counts,MorphemeSeparated
0,பட்டு,படு,V,VT-T---PA,16,பட்டு,2,19,"[ப, ட, ், ட, ு]"
1,கொண்டு,கொள்,V,VT-T---AA,12,ணடு,0,7,"[ண, ட, ு]"
2,விட்டு,வீடு,V,VT-T---AA,9,ி்டு,0,4,"[ி, ், ட, ு]"
3,வந்து,வா,V,VT-T---AA,28,ந்து,0,3,"[ந, ், த, ு]"
4,கொண்ட்,கொள்,V,VT-T---AA,14,ணட்,1,3,"[ண, ட, ்]"
5,வந்த்,வா,V,VT-T---AA,12,ந்த்,1,3,"[ந, ், த, ்]"
6,ப்பட்ட்,படு,V,VT-T---PA,3,்பட்ட்,1,2,"[், ப, ட, ், ட, ்]"
7,பிடித்த்,பிடு,V,VT-T---AA,25,ித்த்,1,1,"[ி, த, ், த, ்]"
8,இருந்து,இரு,V,VT-T---AA,7,இருந்து,2,1,"[இ, ர, ு, ந, ், த, ு]"
9,வைத்து,வை,V,VT-T---AA,37,த்து,0,1,"[த, ், த, ு]"


In [707]:
tu= VT_df.loc[0, 'MorphemeSeparated']
tu = tu[3:]
tu

['ட', 'ு']

In [708]:
morphemegen(morph_df, tu, re )
morph_df.reset_index(drop=True, inplace=True)
morph_df.tail()

Unnamed: 0,MorphemeSeparated,Morpheme,xpos Regex,Morpheme continued
73,[ன],ன,(V..D3PN.),
74,"[க, ், க, ி, ன, ், ற]",க்கின்ற,(V..P3PN.),
75,"[க, ி, ன, ், ற]",கின்ற,(V..P3PN.),
76,[ன],ன,(V..P3PN.),
77,"[ட, ு]",டு,(VT.......),


In [709]:
ntu= VT_df.loc[3, 'MorphemeSeparated']
ntu

['ந', '்', 'த', 'ு']

In [710]:
morphemegen(morph_df, ntu, re )
morph_df.reset_index(drop=True, inplace=True)
morph_df.tail()

Unnamed: 0,MorphemeSeparated,Morpheme,xpos Regex,Morpheme continued
74,"[க, ், க, ி, ன, ், ற]",க்கின்ற,(V..P3PN.),
75,"[க, ி, ன, ், ற]",கின்ற,(V..P3PN.),
76,[ன],ன,(V..P3PN.),
77,"[ட, ு]",டு,(VT.......),
78,"[ந, ், த, ு]",ந்து,(VT.......),


In [711]:
tu= VT_df.loc[9, 'MorphemeSeparated']
tu

['த', '்', 'த', 'ு']

In [712]:
morphemegen(morph_df, tu, re )
morph_df.reset_index(drop=True, inplace=True)
morph_df.tail()

Unnamed: 0,MorphemeSeparated,Morpheme,xpos Regex,Morpheme continued
75,"[க, ி, ன, ், ற]",கின்ற,(V..P3PN.),
76,[ன],ன,(V..P3PN.),
77,"[ட, ு]",டு,(VT.......),
78,"[ந, ், த, ு]",ந்து,(VT.......),
79,"[த, ், த, ு]",த்து,(VT.......),


In [713]:
tu= VT_df.loc[10, 'MorphemeSeparated']
tu

['த', 'ு']

In [714]:
morphemegen(morph_df, tu, re )
morph_df.reset_index(drop=True, inplace=True)
morph_df.tail()

Unnamed: 0,MorphemeSeparated,Morpheme,xpos Regex,Morpheme continued
76,[ன],ன,(V..P3PN.),
77,"[ட, ு]",டு,(VT.......),
78,"[ந, ், த, ு]",ந்து,(VT.......),
79,"[த, ், த, ு]",த்து,(VT.......),
80,"[த, ு]",து,(VT.......),


In [715]:
tu= VT_df.loc[4, 'MorphemeSeparated']
tu = tu[1:]
tu

['ட', '்']

In [716]:
morphemegen(morph_df, tu, re )
morph_df.reset_index(drop=True, inplace=True)
morph_df.tail()

Unnamed: 0,MorphemeSeparated,Morpheme,xpos Regex,Morpheme continued
77,"[ட, ு]",டு,(VT.......),
78,"[ந, ், த, ு]",ந்து,(VT.......),
79,"[த, ், த, ு]",த்து,(VT.......),
80,"[த, ு]",து,(VT.......),
81,"[ட, ்]",ட்,(VT.......),


In [717]:
morph_df.loc[81, 'Morpheme continued']=1
morph_df.tail()


Unnamed: 0,MorphemeSeparated,Morpheme,xpos Regex,Morpheme continued
77,"[ட, ு]",டு,(VT.......),
78,"[ந, ், த, ு]",ந்து,(VT.......),
79,"[த, ், த, ு]",த்து,(VT.......),
80,"[த, ு]",து,(VT.......),
81,"[ட, ்]",ட்,(VT.......),1.0


In [718]:
tu= VT_df.loc[5, 'MorphemeSeparated']
tu = tu[2:]
tu

['த', '்']

In [719]:
morphemegen(morph_df, tu, re )
morph_df.reset_index(drop=True, inplace=True)
morph_df.tail()

Unnamed: 0,MorphemeSeparated,Morpheme,xpos Regex,Morpheme continued
78,"[ந, ், த, ு]",ந்து,(VT.......),
79,"[த, ், த, ு]",த்து,(VT.......),
80,"[த, ு]",து,(VT.......),
81,"[ட, ்]",ட்,(VT.......),1.0
82,"[த, ்]",த்,(VT.......),


In [720]:
morph_df.loc[82, 'Morpheme continued']=1
morph_df.tail()

Unnamed: 0,MorphemeSeparated,Morpheme,xpos Regex,Morpheme continued
78,"[ந, ், த, ு]",ந்து,(VT.......),
79,"[த, ், த, ு]",த்து,(VT.......),
80,"[த, ு]",து,(VT.......),
81,"[ட, ்]",ட்,(VT.......),1.0
82,"[த, ்]",த்,(VT.......),1.0


In [724]:
tu= VT_df.loc[0, 'MorphemeSeparated']
tu = tu[1:]
tu


['ட', '்', 'ட', 'ு']

In [725]:
morphemegen(morph_df, tu, re )
morph_df.reset_index(drop=True, inplace=True)
morph_df.tail()

Unnamed: 0,MorphemeSeparated,Morpheme,xpos Regex,Morpheme continued
79,"[த, ், த, ு]",த்து,(VT.......),
80,"[த, ு]",து,(VT.......),
81,"[ட, ்]",ட்,(VT.......),1.0
82,"[த, ்]",த்,(VT.......),1.0
83,"[ட, ், ட, ு]",ட்டு,(VT.......),


In [729]:
ntu= VT_df.loc[1, 'MorphemeSeparated']
ntu
ntu


['ண', 'ட', 'ு']

In [730]:
morphemegen(morph_df, ntu, re )
morph_df.reset_index(drop=True, inplace=True)
morph_df.tail()

Unnamed: 0,MorphemeSeparated,Morpheme,xpos Regex,Morpheme continued
80,"[த, ு]",து,(VT.......),
81,"[ட, ்]",ட்,(VT.......),1.0
82,"[த, ்]",த்,(VT.......),1.0
83,"[ட, ், ட, ு]",ட்டு,(VT.......),
84,"[ண, ட, ு]",ணடு,(VT.......),


In [736]:
nt= VT_df.loc[4, 'MorphemeSeparated']
nt

['ண', 'ட', '்']

In [737]:
morphemegen(morph_df, nt, re )
morph_df.reset_index(drop=True, inplace=True)
morph_df.tail()

Unnamed: 0,MorphemeSeparated,Morpheme,xpos Regex,Morpheme continued
81,"[ட, ்]",ட்,(VT.......),1.0
82,"[த, ்]",த்,(VT.......),1.0
83,"[ட, ், ட, ு]",ட்டு,(VT.......),
84,"[ண, ட, ு]",ணடு,(VT.......),
85,"[ண, ட, ்]",ணட்,(VT.......),


In [738]:
morph_df.loc[85, 'Morpheme continued'] = 1
morph_df.tail()

Unnamed: 0,MorphemeSeparated,Morpheme,xpos Regex,Morpheme continued
81,"[ட, ்]",ட்,(VT.......),1.0
82,"[த, ்]",த்,(VT.......),1.0
83,"[ட, ், ட, ு]",ட்டு,(VT.......),
84,"[ண, ட, ு]",ணடு,(VT.......),
85,"[ண, ட, ்]",ணட்,(VT.......),1.0


In [739]:
nt= VT_df.loc[5, 'MorphemeSeparated']
nt

['ந', '்', 'த', '்']

In [740]:
morphemegen(morph_df, nt, re )
morph_df.reset_index(drop=True, inplace=True)
morph_df.tail()

Unnamed: 0,MorphemeSeparated,Morpheme,xpos Regex,Morpheme continued
82,"[த, ்]",த்,(VT.......),1.0
83,"[ட, ், ட, ு]",ட்டு,(VT.......),
84,"[ண, ட, ு]",ணடு,(VT.......),
85,"[ண, ட, ்]",ணட்,(VT.......),1.0
86,"[ந, ், த, ்]",ந்த்,(VT.......),


In [741]:
morph_df.loc[86, 'Morpheme continued'] = 1
morph_df.tail()

Unnamed: 0,MorphemeSeparated,Morpheme,xpos Regex,Morpheme continued
82,"[த, ்]",த்,(VT.......),1.0
83,"[ட, ், ட, ு]",ட்டு,(VT.......),
84,"[ண, ட, ு]",ணடு,(VT.......),
85,"[ண, ட, ்]",ணட்,(VT.......),1.0
86,"[ந, ், த, ்]",ந்த்,(VT.......),1.0


In [747]:
t= VT_df.loc[6, 'MorphemeSeparated']
t = t[2:]
t

['ட', '்', 'ட', '்']

In [748]:
morphemegen(morph_df, t, re )
morph_df.reset_index(drop=True, inplace=True)
morph_df.tail()

Unnamed: 0,MorphemeSeparated,Morpheme,xpos Regex,Morpheme continued
84,"[ண, ட, ு]",ணடு,(VT.......),
85,"[ண, ட, ்]",ணட்,(VT.......),1.0
86,"[ந, ், த, ்]",ந்த்,(VT.......),1.0
87,"[த, ், த, ்]",த்த்,(VT.......),
88,"[ட, ், ட, ்]",ட்ட்,(VT.......),


In [745]:
t= VT_df.loc[7, 'MorphemeSeparated']
t = t[1:]
t

['த', '்', 'த', '்']

In [None]:
morphemegen(morph_df, t, re )
morph_df.reset_index(drop=True, inplace=True)
morph_df.tail()

In [749]:
morph_df.loc[87, 'Morpheme continued'] = 1
morph_df.tail()

Unnamed: 0,MorphemeSeparated,Morpheme,xpos Regex,Morpheme continued
84,"[ண, ட, ு]",ணடு,(VT.......),
85,"[ண, ட, ்]",ணட்,(VT.......),1.0
86,"[ந, ், த, ்]",ந்த்,(VT.......),1.0
87,"[த, ், த, ்]",த்த்,(VT.......),1.0
88,"[ட, ், ட, ்]",ட்ட்,(VT.......),


In [750]:
morph_df.loc[88, 'Morpheme continued'] = 1
morph_df.tail()

Unnamed: 0,MorphemeSeparated,Morpheme,xpos Regex,Morpheme continued
84,"[ண, ட, ு]",ணடு,(VT.......),
85,"[ண, ட, ்]",ணட்,(VT.......),1.0
86,"[ந, ், த, ்]",ந்த்,(VT.......),1.0
87,"[த, ், த, ்]",த்த்,(VT.......),1.0
88,"[ட, ், ட, ்]",ட்ட்,(VT.......),1.0


# Verb - Conditional

In [751]:
re = '(VW.......)'
VW_df = merged_df[merged_df['xpos'].str.contains(rf'{re}')]
VW_df = casefilter(VW_df)
VW_df

  return func(self, *args, **kwargs)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Unnamed: 0,form,lemma,upos,xpos,head,Morpheme,NoSpaceAfter,Counts,MorphemeSeparated
0,வந்தால்,வா,V,VW-T---AA,10,ந்தால்,0,2,"[ந, ், த, ா, ல, ்]"
1,இருந்தால்,இரு,V,VW-T---AA,5,இருந்தால்,2,2,"[இ, ர, ு, ந, ், த, ா, ல, ்]"
2,இருப்பின்,இரு,V,VW-T---AA,9,ப்பின்,0,1,"[ப, ், ப, ி, ன, ்]"
3,வைத்தால்,வை,V,VW-T---AA,11,த்தால்,1,1,"[த, ், த, ா, ல, ்]"
4,விடில்,விடு,V,VW-T---AA,7,ில்,0,1,"[ி, ல, ்]"
5,பட்டால்,படு,V,VW-T---PA,3,்டால்,0,0,"[், ட, ா, ல, ்]"


In [752]:
aal= VW_df.loc[5, 'MorphemeSeparated']
aal = aal[2:]
aal

['ா', 'ல', '்']

In [753]:
morphemegen(morph_df, aal, re )
morph_df.reset_index(drop=True, inplace=True)
morph_df.tail()

Unnamed: 0,MorphemeSeparated,Morpheme,xpos Regex,Morpheme continued
85,"[ண, ட, ்]",ணட்,(VT.......),1.0
86,"[ந, ், த, ்]",ந்த்,(VT.......),1.0
87,"[த, ், த, ்]",த்த்,(VT.......),1.0
88,"[ட, ், ட, ்]",ட்ட்,(VT.......),1.0
89,"[ா, ல, ்]",ால்,(VW.......),


# Verb - Adjectival Participle

In [84]:
Vd_df = merged_df[merged_df['xpos'].str.contains(r'(.d.......)')]
Vd_df.reset_index(inplace=True)
Vd_df.drop_duplicates(subset = 'lemma', inplace=True)
Vd_df

  return func(self, *args, **kwargs)


Unnamed: 0,index,form,lemma,upos,xpos,head,Morpheme,NoSpaceAfter,Counts


# Verb - Verbal nouns

In [85]:
Vz_df = merged_df[merged_df['xpos'].str.contains(r'(Vz.......)')]
Vz_df.reset_index(inplace=True)
Vz_df.drop_duplicates(subset = 'lemma', inplace=True)
Vz_df.head()

  return func(self, *args, **kwargs)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Unnamed: 0,index,form,lemma,upos,xpos,head,Morpheme,NoSpaceAfter,Counts
0,141,உள்ளதால்,உள்,V,VzLT3SNAA,24,ளதால்,0,2
1,180,உருவாவதற்கு,உருவாகு,V,VzDF3SNAA,6,வதற்கு,0,11
2,323,இடிப்பதற்க்,இடி,V,VzDF3SNAA,17,ப்பதற்க்,1,3
3,345,வாழ்வதற்கு,வாழ்,V,VzDF3SNAA,7,வதற்கு,0,11
4,365,போராடியதைப்,போராடு,V,VzAD3SNAA,11,ியதைப்,0,1


# Verb - Participal Nouns

In [755]:
re = '(NP.......)'
VP_df = merged_df[merged_df['xpos'].str.contains(rf'{re}')]
VP_df = casefilter(VP_df)
VP_df

  return func(self, *args, **kwargs)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Unnamed: 0,form,lemma,upos,xpos,head,Morpheme,NoSpaceAfter,Counts,MorphemeSeparated
0,சேர்ந்தவர்,சேர்,N,NPND3SH-A,0,ந்தவர்,0,3,"[ந, ், த, வ, ர, ்]"
1,என்பவர்,என்,N,NPNF3SH-A,8,பவர்,0,3,"[ப, வ, ர, ்]"
2,மீறுவோருக்கு,மீறுவோர்,N,NPDF3PH-A,10,ுக்கு,0,1,"[ு, க, ், க, ு]"
3,செய்தவர்களில்,செய்,N,NPLF3PH-A,6,தவர்களில்,0,1,"[த, வ, ர, ், க, ள, ி, ல, ்]"
4,செய்தவர்கள்,செய்,N,NPNF3PH-A,39,தவர்கள்,1,1,"[த, வ, ர, ், க, ள, ்]"
5,வாழ்ந்தவர்கள்,வாழ்,N,NPND3PH-A,11,ந்தவர்கள்,0,0,"[ந, ், த, வ, ர, ், க, ள, ்]"


In [756]:
t= VP_df.loc[0, 'MorphemeSeparated']
t = t[2:3]
t

['த']

In [757]:
morphemegen(morph_df, t, re )
morph_df.reset_index(drop=True, inplace=True)
morph_df.tail()

Unnamed: 0,MorphemeSeparated,Morpheme,xpos Regex,Morpheme continued
86,"[ந, ், த, ்]",ந்த்,(VT.......),1.0
87,"[த, ், த, ்]",த்த்,(VT.......),1.0
88,"[ட, ், ட, ்]",ட்ட்,(VT.......),1.0
89,"[ா, ல, ்]",ால்,(VW.......),
90,[த],த,(NP.......),


In [758]:
var= VP_df.loc[0, 'MorphemeSeparated']
var = var[3:5]
var

['வ', 'ர']

In [759]:
morphemegen(morph_df, var, re )
morph_df.reset_index(drop=True, inplace=True)
morph_df.tail()

Unnamed: 0,MorphemeSeparated,Morpheme,xpos Regex,Morpheme continued
87,"[த, ், த, ்]",த்த்,(VT.......),1.0
88,"[ட, ், ட, ்]",ட்ட்,(VT.......),1.0
89,"[ா, ல, ்]",ால்,(VW.......),
90,[த],த,(NP.......),
91,"[வ, ர]",வர,(NP.......),


In [760]:
kal= VP_df.loc[5, 'MorphemeSeparated']
kal = kal[6:8]
kal

['க', 'ள']

In [761]:
morphemegen(morph_df, kal, re )
morph_df.reset_index(drop=True, inplace=True)
morph_df.tail()

Unnamed: 0,MorphemeSeparated,Morpheme,xpos Regex,Morpheme continued
88,"[ட, ், ட, ்]",ட்ட்,(VT.......),1.0
89,"[ா, ல, ்]",ால்,(VW.......),
90,[த],த,(NP.......),
91,"[வ, ர]",வர,(NP.......),
92,"[க, ள]",கள,(NP.......),


In [763]:
p= VP_df.loc[1, 'MorphemeSeparated']
p = p[0:1]
p

['ப']

In [764]:
morphemegen(morph_df, p, re )
morph_df.reset_index(drop=True, inplace=True)
morph_df.tail()

Unnamed: 0,MorphemeSeparated,Morpheme,xpos Regex,Morpheme continued
89,"[ா, ல, ்]",ால்,(VW.......),
90,[த],த,(NP.......),
91,"[வ, ர]",வர,(NP.......),
92,"[க, ள]",கள,(NP.......),
93,[ப],ப,(NP.......),


# Verb Accusative

In [766]:
re = '(V.A......)'
VA_df = merged_df[merged_df['xpos'].str.contains(rf'{re}')]
VA_df = casefilter(VA_df)
VA_df

  return func(self, *args, **kwargs)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Unnamed: 0,form,lemma,upos,xpos,head,Morpheme,NoSpaceAfter,Counts,MorphemeSeparated
0,போராடியதைப்,போராடு,V,VzAD3SNAA,11,ியதைப்,0,1,"[ி, ய, த, ை, ப, ்]"
1,முடிவடைந்ததைத்,முடிவடை,V,VzAD3SNAA,9,ந்ததைத்,0,1,"[ந, ், த, த, ை, த, ்]"
2,ஓய்வுபெற்றதைத்,ஓய்வுபெறு,V,VzAD3SNAA,7,்றதைத்,0,1,"[், ற, த, ை, த, ்]"
3,உள்ளதைய்,உள்,V,VZAT3SNAA,5,ளதைய்,1,1,"[ள, த, ை, ய, ்]"
4,செலுத்துவதை,செலுத்து,V,VzAF3SNAA,11,வதை,0,1,"[வ, த, ை]"
5,கொள்வதைய்,கொள்,V,VZAF3SNAA,12,வதைய்,1,0,"[வ, த, ை, ய, ்]"


In [772]:
ai= VA_df.loc[4, 'MorphemeSeparated']
ai = ai[2:]
ai

['ை']

In [773]:
morphemegen(morph_df, ai, re )
morph_df.reset_index(drop=True, inplace=True)
morph_df.tail()

Unnamed: 0,MorphemeSeparated,Morpheme,xpos Regex,Morpheme continued
90,[த],த,(NP.......),
91,"[வ, ர]",வர,(NP.......),
92,"[க, ள]",கள,(NP.......),
93,[ப],ப,(NP.......),
94,[ை],ை,(V.A......),


# Verb - Dative

In [774]:
re = '(V.D......)'
VD_df = merged_df[merged_df['xpos'].str.contains(rf'{re}')]
VD_df = casefilter(VD_df)
VD_df

  return func(self, *args, **kwargs)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Unnamed: 0,form,lemma,upos,xpos,head,Morpheme,NoSpaceAfter,Counts,MorphemeSeparated
0,உருவாவதற்கு,உருவாகு,V,VzDF3SNAA,6,வதற்கு,0,11,"[வ, த, ற, ், க, ு]"
1,பட்டதற்கு,படு,V,VZDD3SNPA,5,்டதற்கு,0,3,"[், ட, த, ற, ், க, ு]"
2,இடிப்பதற்க்,இடி,V,VzDF3SNAA,17,ப்பதற்க்,1,2,"[ப, ், ப, த, ற, ், க, ்]"
3,தெரிவித்ததற்க்,தெரிவி,V,VzDD3SNAA,11,த்ததற்க்,1,1,"[த, ், த, த, ற, ், க, ்]"
4,பங்கேற்பதற்கு,பங்கேல்,V,VzDF3SNAA,11,றபதற்கு,0,1,"[ற, ப, த, ற, ், க, ு]"
5,கொண்டதற்க்,கொள்,V,VZDD3SNAA,2,ணடதற்க்,1,1,"[ண, ட, த, ற, ், க, ்]"
6,எதிர்கொள்வதற்க்,எதிர்கொள்,V,VzDF3SNAA,19,வதற்க்,1,0,"[வ, த, ற, ், க, ்]"


In [778]:
ku= VD_df.loc[0, 'MorphemeSeparated']
ku = ku[4:]
ku

['க', 'ு']

In [779]:
morphemegen(morph_df, ku, re )
morph_df.reset_index(drop=True, inplace=True)
morph_df.tail()

Unnamed: 0,MorphemeSeparated,Morpheme,xpos Regex,Morpheme continued
91,"[வ, ர]",வர,(NP.......),
92,"[க, ள]",கள,(NP.......),
93,[ப],ப,(NP.......),
94,[ை],ை,(V.A......),
95,"[க, ு]",கு,(V.D......),


In [781]:
ku= VD_df.loc[2, 'MorphemeSeparated']
ku = ku[6:]
ku

['க', '்']

In [782]:
morphemegen(morph_df, ku, re )
morph_df.reset_index(drop=True, inplace=True)
morph_df.tail()

Unnamed: 0,MorphemeSeparated,Morpheme,xpos Regex,Morpheme continued
92,"[க, ள]",கள,(NP.......),
93,[ப],ப,(NP.......),
94,[ை],ை,(V.A......),
95,"[க, ு]",கு,(V.D......),
96,"[க, ்]",க்,(V.D......),


In [783]:
morph_df.loc[96, 'Morpheme continued'] = 1
morph_df.tail()

Unnamed: 0,MorphemeSeparated,Morpheme,xpos Regex,Morpheme continued
92,"[க, ள]",கள,(NP.......),
93,[ப],ப,(NP.......),
94,[ை],ை,(V.A......),
95,"[க, ு]",கு,(V.D......),
96,"[க, ்]",க்,(V.D......),1.0


# Verb - Instrumental

In [784]:
re = '(V.I......)'
VI_df = merged_df[merged_df['xpos'].str.contains(rf'{re}')]
VI_df = casefilter(VI_df)
VI_df

  return func(self, *args, **kwargs)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Unnamed: 0,form,lemma,upos,xpos,head,Morpheme,NoSpaceAfter,Counts,MorphemeSeparated
0,உள்ளதால்,உள்,V,VZIT3SNAA,8,உள்ளதால்,2,1,"[உ, ள, ், ள, த, ா, ல, ்]"
1,போட்டியிடுவதால்,போட்டியிடு,V,VzIF3SNAA,19,வதால்,0,1,"[வ, த, ா, ல, ்]"
2,ஆகியதால்,ஆகு,V,VzID3SNAA,5,ஆகியதால்,2,1,"[ஆ, க, ி, ய, த, ா, ல, ்]"
3,உள்ளதால்,உள்,V,VZIT3SNAA,6,ளதால்,0,1,"[ள, த, ா, ல, ்]"
4,இல்லாததால்,இல்,V,VzIT3SNAA,9,லாததால்,0,1,"[ல, ா, த, த, ா, ல, ்]"
5,அளிக்காததால்,அளி,V,VzIT3SNAA,12,க்காததால்,0,1,"[க, ், க, ா, த, த, ா, ல, ்]"
6,புறக்கணிப்பதால்,புறக்கணி,V,VzIF3SNAA,29,ப்பதால்,0,1,"[ப, ், ப, த, ா, ல, ்]"


In [787]:
aal= VI_df.loc[1, 'MorphemeSeparated']
aal = aal[2:]
aal

['ா', 'ல', '்']

In [788]:
morphemegen(morph_df, aal, re )
morph_df.reset_index(drop=True, inplace=True)
morph_df.tail()

Unnamed: 0,MorphemeSeparated,Morpheme,xpos Regex,Morpheme continued
93,[ப],ப,(NP.......),
94,[ை],ை,(V.A......),
95,"[க, ு]",கு,(V.D......),
96,"[க, ்]",க்,(V.D......),1.0
97,"[ா, ல, ்]",ால்,(V.I......),


# Verb - Sociative

In [789]:
re = '(V.S......)'
VS_df = merged_df[merged_df['xpos'].str.contains(rf'{re}')]
VS_df = casefilter(VS_df)
VS_df

  return func(self, *args, **kwargs)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Unnamed: 0,form,lemma,upos,xpos,head,NoSpaceAfter,Morpheme,Counts,MorphemeSeparated


# Verb - Genitive

In [790]:
re = '(V.G......)'
VG_df = merged_df[merged_df['xpos'].str.contains(rf'{re}')]
VG_df = casefilter(VG_df)
VG_df

  return func(self, *args, **kwargs)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Unnamed: 0,form,lemma,upos,xpos,head,Morpheme,NoSpaceAfter,Counts,MorphemeSeparated
0,விடுத்ததன்,விடு,V,VzGD3SNAA,9,த்ததன்,0,0,"[த, ், த, த, ன, ்]"


# Verb - Locative

In [791]:
re = '(V.L......)'
VL_df = merged_df[merged_df['xpos'].str.contains(rf'{re}')]
VL_df = casefilter(VL_df)
VL_df

  return func(self, *args, **kwargs)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Unnamed: 0,form,lemma,upos,xpos,head,Morpheme,NoSpaceAfter,Counts,MorphemeSeparated
0,தொகுத்ததில்,தொகு,V,VzLD3SNAA,23,த்ததில்,0,3,"[த, ், த, த, ி, ல, ்]"
1,உள்ளதால்,உள்,V,VzLT3SNAA,24,ளதால்,0,1,"[ள, த, ா, ல, ்]"
2,விபத்துக்குள்ளானதில்,விபத்துக்குள்ளாகு,V,VzLD3SNAA,18,னதில்,0,1,"[ன, த, ி, ல, ்]"
3,மோதியதில்,மோது,V,VzLD3SNAA,10,ியதில்,0,1,"[ி, ய, த, ி, ல, ்]"
4,சுத்தப்படுத்துவதில்,சுத்தப்படுத்து,V,VzLF3SNAA,28,வதில்,0,0,"[வ, த, ி, ல, ்]"


In [794]:
il= VL_df.loc[0, 'MorphemeSeparated']
il = il[4:]
il

['ி', 'ல', '்']

In [795]:
morphemegen(morph_df, il, re )
morph_df.reset_index(drop=True, inplace=True)
morph_df.tail()

Unnamed: 0,MorphemeSeparated,Morpheme,xpos Regex,Morpheme continued
94,[ை],ை,(V.A......),
95,"[க, ு]",கு,(V.D......),
96,"[க, ்]",க்,(V.D......),1.0
97,"[ா, ல, ்]",ால்,(V.I......),
98,"[ி, ல, ்]",ில்,(V.L......),


In [797]:
morph_df.to_sql(name ='morphemes', con=engine)

# Cardinals

In [801]:
re = '(.x.......)'
C_df = merged_df[merged_df['xpos'].str.contains(rf'{re}')]
C_df = casefilter(C_df)
C_df

  return func(self, *args, **kwargs)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Unnamed: 0,form,lemma,upos,xpos,head,Morpheme,NoSpaceAfter,Counts,MorphemeSeparated
0,லட்சத்து,லட்சம்,U,Ux-------,9,தது,0,6,"[த, த, ு]"
1,ஐந்தில்,ஐந்து,U,UxL-3SN-A,7,ில்,0,2,"[ி, ல, ்]"
2,கோடியே,கோடி,U,Ux-------,13,யே,0,2,"[ய, ே]"
3,ஆயிரத்த்,ஆயிரம்,U,UxL-3SN-A,9,தத்,1,1,"[த, த, ்]"
4,ஒன்றுக்கு,ஒன்று,U,UxD-3SN-A,20,க்கு,0,1,"[க, ், க, ு]"


# Personal Pronouns

In [803]:
re = '(.p.......)'
p_df = merged_df[merged_df['xpos'].str.contains(rf'{re}')]
p_df = casefilter(p_df)
p_df

  return func(self, *args, **kwargs)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Unnamed: 0,form,lemma,upos,xpos,head,Morpheme,NoSpaceAfter,Counts,MorphemeSeparated
0,எனது,என்,R,RpG-1SA--,10,து,0,11,"[த, ு]"
1,அவர்கள்,அவர்,R,RpN-3PA--,7,கள்,0,10,"[க, ள, ்]"
2,அனைவரும்,அனைவர்,R,RpN-3PA--,9,ும்,0,10,"[ு, ம, ்]"
3,அவர்களை,அவர்,R,RpA-3PA--,9,களை,0,6,"[க, ள, ை]"
4,இதன்,இது,R,RpG-3SN--,2,ன்,0,5,"[ன, ்]"
5,அவர்களுக்கு,அவர்,R,RpD-3PA--,16,களுக்கு,0,5,"[க, ள, ு, க, ், க, ு]"
6,அவருக்கு,அவர்,R,RpD-3SH--,22,ுக்கு,0,5,"[ு, க, ், க, ு]"
7,அதில்,அது,R,RpL-3SN--,14,ில்,0,4,"[ி, ல, ்]"
8,அவர்களது,அவர்,R,RpG-3PA--,6,களது,0,4,"[க, ள, த, ு]"
9,தங்கள்,தன்,R,RpG-3PA--,8,ஙகள்,0,3,"[ங, க, ள, ்]"


# Ordinals

In [802]:
re = '(.y.......)'
O_df = merged_df[merged_df['xpos'].str.contains(rf'{re}')]
O_df = casefilter(O_df)
O_df

  return func(self, *args, **kwargs)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Unnamed: 0,form,lemma,upos,xpos,head,Morpheme,NoSpaceAfter,Counts,MorphemeSeparated
0,78வது,78,U,Uy-------,8,வது,0,8,"[வ, த, ு]"
1,இரண்டாவது,இரண்டு,U,Uy-------,31,ாவது,0,5,"[ா, வ, த, ு]"
2,9ம்,9,U,Uy-------,12,ம்,0,1,"[ம, ்]"
3,இரண்டாம்,இரண்டு,U,Uy-------,17,ாம்,0,1,"[ா, ம, ்]"
4,2009-2010ஆம்,2009-2010,U,Uy-------,13,ஆம்,0,1,"[ஆ, ம, ்]"
5,2005-வது,2005,U,Uy-------,16,-வது,0,1,"[-, வ, த, ு]"
6,28-ம்,28,U,Uy-------,3,-ம்,0,0,"[-, ம, ்]"


In [809]:
morph_df.drop([0,1], axis=0, inplace=True)
morph_df.head()

Unnamed: 0,MorphemeSeparated,Morpheme,xpos Regex,Morpheme continued
2,ை,ை,(N.A..S...),
3,"[க, ், க, ்]",க்க்,(N.D..S...),1.0
4,"[க, ், க, ு]",க்கு,(N.D..S...),
5,"[க, ு]",கு,(N.D..S...),
6,"[க, ள, ு]",களு,(N.D..P...),


In [811]:
morph_df.to_sql(name ='morphemes', con=engine)

In [817]:
merged_df.tail()

Unnamed: 0,form,lemma,upos,xpos,head,FormWithoutLemma,NoSpaceAfter,Counts
9576,வேண்டும்,வேண்டு,V,VR-F3SNAA,9,ம்,0,144
9577,என்பது,என்,T,TQ-------,13,பது,0,8
9578,தான்,தான்,T,Tq-------,11,,0,0
9579,அது,அது,R,RpN-3SN--,0,,0,0
9580,.,.,Z,Z#-------,0,,0,0
