# Example-Based Model

## Initialization of the Datasets


### Source Dataset

#### Storing the Tagalog Part of Speech Data Set

In [None]:
import pandas as pd

# Read the tagalog POS dataset
tl_pos_data = pd.read_json('src/json data/tl_pos.json')

tl_pos_data.head()

#### Storing the Ilokano Part of Speech Data Set

In [None]:
# Read the ilokano POS dataset
il_pos_data = pd.read_json('src/json data/il_pos.json')

il_pos_data.head()

#### Storing the Tagalog Part of Speech Structure

In [None]:
dict_sen_poss = pd.DataFrame(tl_pos_data['POS'])

dict_sen_poss.columns = ['Tagalog POS']

#### Storing the Ilokano Part of Speech 

In [None]:
dict_sen_poss['Ilokano POS'] = il_pos_data['POS']

dict_sen_poss.head(50)

### Verb Tagalog to Ilokano Dictionary

In [None]:
tl_sen_poss_list = dict_sen_poss['Tagalog POS']
il_sen_poss_list = dict_sen_poss['Ilokano POS']
"""
putting the POS of the sentences in a list object
"""

dict_tl_il_vb = pd.DataFrame(columns=['Tagalog Verb', 'Ilokano Verb'])
dict_tl_il_nn = pd.DataFrame(columns=['Tagalog Noun', 'Ilokano Noun'])
dict_tl_il_jj = pd.DataFrame(columns=['Tagalog Adjective', 'Ilokano Adjective'])

## Appending in the List

### For Verb List

In [None]:
def append_vb_list(tl_verb, tl_verb_list, il_verb, il_verb_list, curr_il_pos, next_il_pos, next2_il_pos, matched, sp_index, wp_index):
    if tl_verb not in tl_verb_list:
        """
        if the verb is not in the list
        """
        tl_verb_list.append(tl_verb)
        inDict = False
        
    else:
        """
        if the verb is in the list
        """
        temp_index = tl_verb_list.index(tl_verb)
        inDict = True

    """
    append the the verb in the tagalog verb
    """

    if curr_il_pos == 'VB' and wp_index not in matched:
        """
        if VB : VB
        if the Ilokano POS is a verb
        """
        temp_verb = il_pos_data['Tokenized'][sp_index][wp_index]
        il_verb.append(temp_verb)
        matched.append(wp_index)
        
    elif curr_il_pos == 'DT' and next_il_pos == 'VB' and wp_index not in matched:    
        """
        if VB : DT VB
        if the Ilokano POS is a determiner and the next POS is a verb
        eg. Nilalang : ti Aramid
        """
        temp_curr_verb = il_pos_data['Tokenized'][sp_index][wp_index]
        temp_next_verb = il_pos_data['Tokenized'][sp_index][wp_index + 1]
        temp_verb = temp_curr_verb + ' ' + temp_next_verb
        il_verb.append(temp_verb)
        matched.append(wp_index)
        matched.append(wp_index + 1) 

    elif curr_il_pos == 'NN' and next_il_pos == 'VB' and wp_index not in matched:    
        """
        if VB : NN VB
        if the Ilokano POS is a determiner and the next POS is a verb
        """
        temp_verb = il_pos_data['Tokenized'][sp_index][wp_index + 1]
        il_verb.append(temp_verb)
        matched.append(wp_index + 1)
        
    elif curr_il_pos == 'DT' and next_il_pos == 'NN' and next2_il_pos == 'VB' and wp_index not in matched:
        """
        if VB DT NN : DT NN VB
        if the Ilokano POS is a determiner and the next POS is a verb
        """
        temp_verb = il_pos_data['Tokenized'][sp_index][wp_index + 2]
        il_verb.append(temp_verb)
        matched.append(wp_index + 2)
        
    else:
        """
        if VB : Other POS
        if the Ilokano POS is not a verb
        """
        il_verb.append('None')
        matched.append(wp_index)

    if not inDict:
        il_verb_list.append(il_verb)
    else:
        if il_verb[0] not in il_verb_list[temp_index]:
            il_verb_list[temp_index].append(il_verb[0])
            

### For Noun List

### For Adjective List

In [None]:
def append_jj_list(tl_adj, tl_adj_list, il_adj, il_adj_list, curr_il_pos, next_il_pos, next2_il_pos, matched, sp_index, wp_index):
    if tl_adj not in tl_adj_list:
        """
        if the adj is not in the list
        """
        tl_adj_list.append(tl_adj)
        inDict = False
        
    else:
        """
        if the adj is in the list
        """
        temp_index = tl_adj_list.index(tl_adj)
        inDict = True

    """
    append the the adj in the tagalog adj
    """

    if curr_il_pos == 'JJ':
        """
        if JJ : JJ
        if the Ilokano POS is an adj
        """
        temp_adj = il_pos_data['Tokenized'][sp_index][wp_index]
        il_adj.append(temp_adj)
        
    elif curr_il_pos == 'DT' and next_il_pos == 'JJ':    
        """
        if JJ : DT JJ
        if the Ilokano POS is a determiner and the next POS is an adj
        eg. mabubuting : ken naimbag
        """
        temp_curr_adj = il_pos_data['Tokenized'][sp_index][wp_index]
        temp_next_adj = il_pos_data['Tokenized'][sp_index][wp_index + 1]
        temp_adj = temp_curr_adj + ' ' + temp_next_adj
        il_adj.append(temp_adj) 

    else:
        """
        if JJ : Other POS
        if the Ilokano POS is not an adj
        """
        il_adj.append('None')

    if not inDict:
        il_adj_list.append(il_adj)
    else:
        if il_adj[0] not in il_adj_list[temp_index]:
            il_adj_list[temp_index].append(il_adj[0])

### Tagalog to Ilokano Matcher

In [None]:

wp_index = None # word position index

"""
instantiating the verb lists
"""

def match_tl_il_pos():
    """
    This function matches the POS of the sentences in the Tagalog and Ilokano datasets
    """
    tl_verb_list = []
    il_verb_list = []
    il_noun_list = []
    tl_adj_list = []
    il_adj_list = []
    tl_to_il_verb_list = []
    sp_index = 0
    """
    instantiating the verb lists
    """
    
    for tl_sen_pos in tl_sen_poss_list:
        """
        tl_sen is a list of POS of a sentence
        eg. ['VB', 'DT', 'NN', 'DT', 'NN']
        """
        matched = []
        il_sen = il_sen_poss_list[sp_index]
    
        wp_index = 0
        """
        instantiating the variables
        """
        
        for tl_word_pos in tl_sen_pos:
            """
            tl_word_pos is a POS of a word
            eg. 'VB'
            """
            il_verb = []
            il_adj = []
            
            tl_word = tl_pos_data['Tokenized'][sp_index][wp_index]
            
            try:
                curr_il_pos = il_sen[wp_index] # ti
            except IndexError:
                curr_il_pos = 'None'
            try:
                next_il_pos = il_sen[wp_index + 1]
            except IndexError:
                next_il_pos = 'None'
            try:
                next2_il_pos = il_sen[wp_index + 2]
            except IndexError:
                next2_il_pos = 'None'
            try:
                prev_il_pos = il_sen[wp_index - 1]
                if (wp_index - 1) < 0:
                    prev_il_pos = 'None'
            except IndexError:
                prev_il_pos = 'None'
            """
            getting the current, next, and previous POS in the sentence
            """
            
            # Matching Conditions
            # 1. VB
            if tl_word_pos == 'VB':
                """
                Verb Matching
                if the POS is a verb, append the index of the verb to the verb list
                """
                append_vb_list(tl_word, tl_verb_list, il_verb, il_verb_list, curr_il_pos, next_il_pos, next2_il_pos, matched, sp_index, wp_index)
            # 3. JJ
            if tl_word_pos == 'JJ':
                """
                Adj Matching
                if the POS is a adj, append the index of the adj to the adj list
                """
                append_jj_list(tl_word, tl_adj_list, il_adj, il_adj_list, curr_il_pos, next_il_pos, next2_il_pos, matched, sp_index, wp_index)
                            
            wp_index += 1     
        sp_index += 1
    
    dict_tl_il_vb['Tagalog Verb'] = tl_verb_list
    dict_tl_il_vb['Ilokano Verb'] = il_verb_list
    dict_tl_il_jj['Tagalog Adjective'] = tl_adj_list
    dict_tl_il_jj['Ilokano Adjective'] = il_adj_list
    
    
match_tl_il_pos()
dict_tl_il_vb.head(7)
dict_tl_il_jj.head(7)



In [None]:
il_pos_data['Tokenized'].head(7)


In [None]:
dict_tl_il_vb['Ilokano Verb'][3]