In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics import classification_report
from tqdm import tqdm

Mounted at /content/drive


## Objective - MLE Wordwise Training and Parameter Optimization

In [None]:
from camel_tools.tokenizers.word import simple_word_tokenize

In [None]:
## imports for training data

base_path_aligned = '../../data/readability_data'
dev_aligned = pd.read_csv(base_path_aligned + '/dev_pnx.tsv', sep = '\t')
test_aligned = pd.read_csv(base_path_aligned + '/test_pnx.tsv', sep = '\t')
train_aligned = pd.read_csv(base_path_aligned + '/train_pnx.tsv', sep = '\t')

base_path = '../../data/splits/levelled_fragments/'


In [None]:
frag_train = pd.read_csv('../data/all_train_aligned.csv')
frag_dev = pd.read_csv('../data/all_dev_aligned.csv')
frag_test = pd.read_csv('../data/all_test_aligned.csv')


frag_train = frag_train[frag_train.apply(lambda x: type(x['0']) == str, axis = 1)]
frag_dev = frag_dev[frag_dev.apply(lambda x: type(x['0']) == str, axis = 1)]
frag_test = frag_test[frag_test.apply(lambda x: type(x['0']) == str, axis = 1)]

### Strategy

In [None]:
def get_mle_counts_aligned(words, levels):
  dict_levels = {}
  for word, level in zip(words, levels):
      try:
          #assume every entry of dict_levels : {3: int, 4: int, 5: int}
          dict_levels[word][level] += 1
      except:
          dict_levels[word] = {3: 0, 4: 0, 5: 0}
          dict_levels[word][level] += 1
  return dict_levels

def max_frequency_strategy(dict_levels):
  dict_levels_max = {}
  for token in dict_levels.keys():
    dict_levels_max[token] = max(dict_levels[token].items(), key = lambda x: x[1])[0]
  return dict_levels_max

def weighted_average_strategy(dict_levels):
  dict_levels_avg = {}
  for token in dict_levels.keys():
    dict_levels_avg[token] = np.average(list(dict_levels[token].keys()), weights = list(dict_levels[token].values()))
  return dict_levels_avg

### Setups

In [None]:


def mle_training_pipeline_aligned(data, strategy):
  counts = get_mle_counts_aligned(data['Word'], data['Label'])
  return strategy(counts)

def get_rl(token, model, oov_level = 0):
    try:
        return model[token]
    except:
        return oov_level


def mle_levels_inference_pipeline(fragment, model, backoff_freq = False):
  tokens = [t.split('#')[0] for t in fragment.split(' ')]
  levels = [get_rl(token, model, 0) for token in tokens]
  levels = [round(a) if a > 3 else 3 for a in levels]
  return levels

In [None]:
highest_aligned_model = mle_training_pipeline_aligned(train_aligned, max_frequency_strategy)
weighted_aligned_model = mle_training_pipeline_aligned(train_aligned, weighted_average_strategy)

In [37]:
def get_gt_levels(fragment):
  return [int(t.split('#')[1]) for t in fragment.split(' ')]

gt_levels = np.concatenate([get_gt_levels(f) for f in frag_test['0']])

#### Only eight experiments. Given that we are checking on word level, aggregation is not needed as an experimental variable

In [None]:
res = [ np.concatenate([mle_levels_inference_pipeline(a, highest_aligned_model) for a in frag_test['0']])
]

In [None]:

print(classification_report(gt_levels, results_high_max_aligned))


--------------------ALIGNED
--------------------high-max
              precision    recall  f1-score   support

           3       0.93      1.00      0.96     23851
           4       0.91      0.45      0.60      2032
           5       0.83      0.24      0.37      1040

    accuracy                           0.93     26923
   macro avg       0.89      0.56      0.64     26923
weighted avg       0.92      0.93      0.91     26923

--------------------weight-max
              precision    recall  f1-score   support

           3       0.93      0.99      0.96     23851
           4       0.86      0.46      0.60      2032
           5       0.84      0.24      0.37      1040

    accuracy                           0.92     26923
   macro avg       0.88      0.56      0.64     26923
weighted avg       0.92      0.92      0.91     26923

--------------------NOT ALIGNED
--------------------high-max
              precision    recall  f1-score   support

           3       0.93      0.99 