# Imports/Installs

In [37]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
pip install simpletransformers

Collecting simpletransformers
  Downloading simpletransformers-0.61.13-py3-none-any.whl (221 kB)
[?25l[K     |█▌                              | 10 kB 23.6 MB/s eta 0:00:01[K     |███                             | 20 kB 26.9 MB/s eta 0:00:01[K     |████▍                           | 30 kB 30.7 MB/s eta 0:00:01[K     |██████                          | 40 kB 32.7 MB/s eta 0:00:01[K     |███████▍                        | 51 kB 33.4 MB/s eta 0:00:01[K     |████████▉                       | 61 kB 34.5 MB/s eta 0:00:01[K     |██████████▍                     | 71 kB 33.0 MB/s eta 0:00:01[K     |███████████▉                    | 81 kB 33.2 MB/s eta 0:00:01[K     |█████████████▎                  | 92 kB 18.6 MB/s eta 0:00:01[K     |██████████████▉                 | 102 kB 19.7 MB/s eta 0:00:01[K     |████████████████▎               | 112 kB 19.7 MB/s eta 0:00:01[K     |█████████████████▊              | 122 kB 19.7 MB/s eta 0:00:01[K     |███████████████████▎            

In [3]:
import pandas as pd
import numpy as np
import tensorflow as tf
import torch
import pickle
import sklearn

from simpletransformers.classification import MultiLabelClassificationModel, MultiLabelClassificationArgs
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.metrics import classification_report, multilabel_confusion_matrix, precision_recall_fscore_support, hamming_loss, accuracy_score



# Extract data

In [4]:
data = pd.read_csv('/content/drive/MyDrive/criminal_lvl2_under200.csv', index_col=0)  

In [5]:
data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2520 entries, 0 to 2519
Data columns (total 23 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   id                  2520 non-null   int64 
 1   year                2520 non-null   int64 
 2   gr_no_x             2519 non-null   object
 3   text                2520 non-null   object
 4   text_clean          2511 non-null   object
 5   lemma_separated     2520 non-null   object
 6   lvl2_labels_counts  2520 non-null   object
 7   lvl2_labels         2520 non-null   object
 8   chastity            2520 non-null   int64 
 9   civil_status        2520 non-null   int64 
 10  drugs               2520 non-null   int64 
 11  fundamental_laws    2520 non-null   int64 
 12  honor               2520 non-null   int64 
 13  liberty             2520 non-null   int64 
 14  national_security   2520 non-null   int64 
 15  persons             2520 non-null   int64 
 16  property            2520

In [6]:
data.head()

Unnamed: 0,id,year,gr_no_x,text,text_clean,lemma_separated,lvl2_labels_counts,lvl2_labels,chastity,civil_status,drugs,fundamental_laws,honor,liberty,national_security,persons,property,public_interest,public_morals,public_officers,public_order,quasi_offense,lvl2_max
0,189096,2002,G. R. No. 144340-42,Before this for automatic review[] is the Join...,automatic review joint promulgated h lower cou...,['automatic review joint promulgated h lower c...,"{'national_security': 0, 'fundamental_laws': 0...","['public_interest', 'public_officers', 'person...",1,1,0,0,0,1,0,1,0,1,0,1,0,1,chastity
1,187362,2001,G. R. No. 131203,"SECOND In its decision promulgated on , ,[] th...",second decision promulgated found accused appe...,['second decision promulgated found accused ap...,"{'national_security': 0, 'fundamental_laws': 0...","['public_interest', 'persons', 'liberty', 'cha...",1,1,0,0,0,1,0,1,0,1,0,0,0,1,chastity
2,178138,1992,G.R. Nos.102409-10,"de appeals from the dated , in and of the , of...",de appeals dated convicting crime committed tw...,['de appeal dated convicting crime committed t...,"{'national_security': 0, 'fundamental_laws': 0...","['public_interest', 'public_officers', 'person...",1,1,0,0,1,0,0,1,0,1,0,1,0,1,chastity
3,189434,2003,G.R. No. 133926,"For automatic review is the dated , , of the ,...",automatic review dated convicting appellant st...,['automatic review dated convicting appellant ...,"{'national_security': 0, 'fundamental_laws': 0...","['public_interest', 'public_officers', 'libert...",1,1,0,0,1,1,0,0,0,1,0,1,0,1,chastity
4,189153,2002,G. R. No. 139231,"On appeal is the decision[] of the , in ,-, co...",appeal decision convicting rape sentencing rec...,['appeal decision convicting rape sentencing r...,"{'national_security': 0, 'fundamental_laws': 2...","['fundamental_laws', 'public_interest', 'drugs...",1,1,1,1,0,1,0,0,0,1,0,1,0,1,chastity


# Preprocessing

In [7]:
lvl2_labels = ['chastity', 
               'civil_status',
               'drugs', 
               'fundamental_laws', 
               'honor', 
               'liberty', 
               'national_security',
               'persons', 
               'property', 
               'public_interest', 
               'public_morals',
               'public_officers', 
               'public_order', 
               'quasi_offense']

In [8]:
data['labels'] = ''
for x in range(len(data)):
    lbls = []
    for l in lvl2_labels:
        lbls.append(data[l][x])
    data['labels'][x] = lbls

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [9]:
data.head()

Unnamed: 0,id,year,gr_no_x,text,text_clean,lemma_separated,lvl2_labels_counts,lvl2_labels,chastity,civil_status,drugs,fundamental_laws,honor,liberty,national_security,persons,property,public_interest,public_morals,public_officers,public_order,quasi_offense,lvl2_max,labels
0,189096,2002,G. R. No. 144340-42,Before this for automatic review[] is the Join...,automatic review joint promulgated h lower cou...,['automatic review joint promulgated h lower c...,"{'national_security': 0, 'fundamental_laws': 0...","['public_interest', 'public_officers', 'person...",1,1,0,0,0,1,0,1,0,1,0,1,0,1,chastity,"[1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1]"
1,187362,2001,G. R. No. 131203,"SECOND In its decision promulgated on , ,[] th...",second decision promulgated found accused appe...,['second decision promulgated found accused ap...,"{'national_security': 0, 'fundamental_laws': 0...","['public_interest', 'persons', 'liberty', 'cha...",1,1,0,0,0,1,0,1,0,1,0,0,0,1,chastity,"[1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1]"
2,178138,1992,G.R. Nos.102409-10,"de appeals from the dated , in and of the , of...",de appeals dated convicting crime committed tw...,['de appeal dated convicting crime committed t...,"{'national_security': 0, 'fundamental_laws': 0...","['public_interest', 'public_officers', 'person...",1,1,0,0,1,0,0,1,0,1,0,1,0,1,chastity,"[1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1]"
3,189434,2003,G.R. No. 133926,"For automatic review is the dated , , of the ,...",automatic review dated convicting appellant st...,['automatic review dated convicting appellant ...,"{'national_security': 0, 'fundamental_laws': 0...","['public_interest', 'public_officers', 'libert...",1,1,0,0,1,1,0,0,0,1,0,1,0,1,chastity,"[1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1]"
4,189153,2002,G. R. No. 139231,"On appeal is the decision[] of the , in ,-, co...",appeal decision convicting rape sentencing rec...,['appeal decision convicting rape sentencing r...,"{'national_security': 0, 'fundamental_laws': 2...","['fundamental_laws', 'public_interest', 'drugs...",1,1,1,1,0,1,0,0,0,1,0,1,0,1,chastity,"[1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1]"


In [10]:
df = data[['text','labels']]

In [11]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2520 entries, 0 to 2519
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   text    2520 non-null   object
 1   labels  2520 non-null   object
dtypes: object(2)
memory usage: 139.1+ KB


In [12]:
train_df, eval_df = train_test_split(df, test_size=0.2, random_state=42)

In [13]:
print(train_df.shape, eval_df.shape)

(2016, 2) (504, 2)


# Model Training (xlnet-base-cased)

In [39]:
cuda_available = torch.cuda.is_available()

In [40]:
model_args = MultiLabelClassificationArgs(learning_rate=4e-5,
                                          adam_epsilon=1e-08,
                                          num_train_epochs=5,
                                          overwrite_output_dir = True)

model = MultiLabelClassificationModel('xlnet', 'xlnet-base-cased',
                                      use_cuda=cuda_available, 
                                      num_labels=14, 
                                      args=model_args)

Downloading:   0%|          | 0.00/760 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/467M [00:00<?, ?B/s]

Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForMultiLabelSequenceClassification: ['lm_loss.weight', 'lm_loss.bias']
- This IS expected if you are initializing XLNetForMultiLabelSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForMultiLabelSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForMultiLabelSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'logits_proj.bias', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream tas

Downloading:   0%|          | 0.00/798k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.38M [00:00<?, ?B/s]

In [41]:
model.train_model(train_df)

  0%|          | 0/2016 [00:00<?, ?it/s]

Epoch:   0%|          | 0/5 [00:00<?, ?it/s]

Running Epoch 0 of 5:   0%|          | 0/252 [00:00<?, ?it/s]

  model.parameters(), args.max_grad_norm


Running Epoch 1 of 5:   0%|          | 0/252 [00:00<?, ?it/s]

Running Epoch 2 of 5:   0%|          | 0/252 [00:00<?, ?it/s]

Running Epoch 3 of 5:   0%|          | 0/252 [00:00<?, ?it/s]

Running Epoch 4 of 5:   0%|          | 0/252 [00:00<?, ?it/s]

(1260, 0.38969997898453757)

In [42]:
result, model_outputs, wrong_predictions = model.eval_model(eval_df,verbose=True)

  0%|          | 0/504 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/63 [00:00<?, ?it/s]

In [43]:
result

{'LRAP': 0.8215965277745437, 'eval_loss': 0.4127577443917592}

In [44]:
model_outputs

array([[0.9921875 , 0.64453125, 0.12487793, ..., 0.36914062, 0.0216217 ,
        0.94482422],
       [0.01548767, 0.16833496, 0.08959961, ..., 0.93945312, 0.01312256,
        0.79345703],
       [0.22485352, 0.81347656, 0.65478516, ..., 0.85693359, 0.065979  ,
        0.82861328],
       ...,
       [0.13256836, 0.14599609, 0.21569824, ..., 0.640625  , 0.03927612,
        0.65625   ],
       [0.03085327, 0.23498535, 0.39501953, ..., 0.95068359, 0.06536865,
        0.96923828],
       [0.05706787, 0.40795898, 0.75146484, ..., 0.93359375, 0.04577637,
        0.94970703]])

#Save model

In [48]:
def save_model(fname, model):
    """
        fname: path/filename.pkl
        model: model

        Saves model to a pickle file
    """
    file = open(fname, 'wb')

    pickle.dump(model, file)

    file.close()

def load_model(fname):
    """
        fname: path/filename.pkl
        Loads a model
    """
    file = open(fname, 'rb')
    data = pickle.load(file)
    file.close()

    return data

In [49]:
save_model('/content/drive/MyDrive/xlnet.pkl',model)

In [50]:
drive.flush_and_unmount()
print('All changes made in this colab session should now be visible in Drive.')

All changes made in this colab session should now be visible in Drive.


In [51]:
drive.mount('/content/drive')

Mounted at /content/drive


# Metrics

In [36]:
# from google.colab import files
# uploaded = files.upload()

In [52]:
xlnet = load_model('/content/drive/MyDrive/xlnet.pkl')
#/content/drive/MyDrive/xlnet.pkl

In [53]:
result, model_outputs, wrong_predictions = xlnet.eval_model(eval_df,verbose=True)

  0%|          | 0/504 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/63 [00:00<?, ?it/s]

In [54]:
eval_text = list(eval_df['text'])

In [55]:
pred, pred_outputs = xlnet.predict(eval_text)

  0%|          | 0/504 [00:00<?, ?it/s]

  0%|          | 0/63 [00:00<?, ?it/s]

In [56]:
y_pred = list(pred)
y_pred

[[1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1],
 [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1],
 [0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1],
 [0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1],
 [0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1],
 [1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1],
 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
 [0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0],
 [0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1],
 [0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1],
 [0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1],
 [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 [0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1],
 [0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1],
 [0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1],
 [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 [0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1],
 [0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1],
 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 [0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1],
 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 [0, 0, 0,

In [57]:
y_pred_outputs = list(pred_outputs)
#y_pred_outputs

In [62]:
from sklearn.metrics import coverage_error
from sklearn.metrics import label_ranking_average_precision_score
from sklearn.metrics import label_ranking_loss

In [63]:
y_true = list(eval_df['labels'])
#y_true

In [64]:
print(coverage_error(y_true, y_pred_outputs),  '\n',
      label_ranking_average_precision_score(y_true, y_pred_outputs), '\n',
      label_ranking_loss(y_true, y_pred_outputs))

6.882936507936508 
 0.8215965277745437 
 0.1211791397378132


In [None]:
pred_outputs

In [None]:
y_true = list(eval_df['labels'])
y_true

In [None]:
print(precision_recall_fscore_support(y_true, y_pred, average='macro'),  '\n',
      precision_recall_fscore_support(y_true, y_pred, average='micro'), '\n',
      precision_recall_fscore_support(y_true, y_pred, average='weighted'), '\n',
      precision_recall_fscore_support(y_true, y_pred, average='samples', zero_division=1))

In [None]:
accuracy_score(y_true,y_pred)

In [None]:
top_k = tf.keras.metrics.TopKCategoricalAccuracy(k=2)

In [None]:
top_k.update_state(y_true,y_pred)
top_k.result().numpy()

# Apply to the Dataframe

In [None]:
y_hats  = pd.DataFrame(pred_outputs)
y_hats = y_hats.reset_index()
y_hats

Unnamed: 0,index,0,1,2,3,4,5,6,7,8,9,10,11,12,13
0,0,0.990234,0.818848,0.222412,0.208496,0.718750,0.898438,0.017517,0.835938,0.152832,0.470703,0.053009,0.517090,0.024429,0.950195
1,1,0.013901,0.138550,0.069397,0.088013,0.779785,0.093872,0.019058,0.085083,0.156250,0.465820,0.017380,0.910645,0.012337,0.806152
2,2,0.425781,0.846680,0.828125,0.892578,0.440430,0.978516,0.061890,0.832520,0.306152,0.111267,0.053406,0.898926,0.084473,0.868164
3,3,0.093994,0.920410,0.362061,0.243774,0.492676,0.795898,0.008644,0.979004,0.521484,0.246338,0.017044,0.524414,0.018875,0.974121
4,4,0.118774,0.469482,0.500000,0.293213,0.463379,0.905273,0.014786,0.639160,0.964355,0.185181,0.048676,0.817383,0.014786,0.916992
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
499,499,0.090881,0.574219,0.557129,0.882324,0.426758,0.826660,0.544922,0.755859,0.611328,0.078369,0.022583,0.862305,0.333496,0.443115
500,500,0.034180,0.033203,0.176636,0.044586,0.032776,0.168091,0.028442,0.015961,0.040619,0.040161,0.110474,0.057373,0.021240,0.092346
501,501,0.083130,0.116760,0.109497,0.024750,0.948242,0.217285,0.022675,0.071960,0.058563,0.099976,0.123230,0.487549,0.019455,0.434814
502,502,0.016403,0.129639,0.477539,0.226562,0.445068,0.257080,0.046478,0.234741,0.641113,0.323975,0.047882,0.942871,0.037750,0.938965


In [None]:
sample_df = eval_df.reset_index()
sample_df

Unnamed: 0,index,text,labels
0,196,Not only did the accused rape his daughter-in-...,"[1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1]"
1,986,", I S I , filed a sworn complaint with the aga...","[0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1]"
2,1090,For our resolution is a petition for certiorar...,"[0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1]"
3,387,THIRD This is an appeal from the decision of t...,"[0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1]"
4,2221,"SECOND No. , , A. OF I S I was charged with sl...","[0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1]"
...,...,...,...
499,1027,This is an appeal from the decision of the of ...,"[0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1]"
500,1730,"""Los esposos y son compadres hace cinco anos d...","[0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1]"
501,911,This is a prosecution for a violation of that ...,"[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0]"
502,2225,In this petition for certiorari and prohibitio...,"[0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0]"


In [None]:
data.columns

Index(['id', 'year', 'gr_no_x', 'text', 'text_clean', 'lemma_separated',
       'lvl2_labels_counts', 'lvl2_labels', 'chastity', 'civil_status',
       'drugs', 'fundamental_laws', 'honor', 'liberty', 'national_security',
       'persons', 'property', 'public_interest', 'public_morals',
       'public_officers', 'public_order', 'quasi_offense', 'lvl2_max',
       'labels'],
      dtype='object')

In [None]:
merged_df = sample_df.merge(y_hats, left_index = True, right_index = True, how = "left")
merged_df.rename({0: 'chastity', 1: 'civil_status', 2: 'drugs', 3: 'fundamental_laws', 4: 'honor', 5: 'liberty', 6: 'national_security', 7: 'persons',8: 'property',9: 'public_interest',10: 'public_morals',11: 'public_officers',12: 'public_order',13: 'quasi_offense' }, axis=1, inplace=True)
merged_df

Unnamed: 0,index_x,text,labels,index_y,chastity,civil_status,drugs,fundamental_laws,honor,liberty,national_security,persons,property,public_interest,public_morals,public_officers,public_order,quasi_offense
0,196,Not only did the accused rape his daughter-in-...,"[1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1]",0,0.990234,0.818848,0.222412,0.208496,0.718750,0.898438,0.017517,0.835938,0.152832,0.470703,0.053009,0.517090,0.024429,0.950195
1,986,", I S I , filed a sworn complaint with the aga...","[0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1]",1,0.013901,0.138550,0.069397,0.088013,0.779785,0.093872,0.019058,0.085083,0.156250,0.465820,0.017380,0.910645,0.012337,0.806152
2,1090,For our resolution is a petition for certiorar...,"[0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1]",2,0.425781,0.846680,0.828125,0.892578,0.440430,0.978516,0.061890,0.832520,0.306152,0.111267,0.053406,0.898926,0.084473,0.868164
3,387,THIRD This is an appeal from the decision of t...,"[0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1]",3,0.093994,0.920410,0.362061,0.243774,0.492676,0.795898,0.008644,0.979004,0.521484,0.246338,0.017044,0.524414,0.018875,0.974121
4,2221,"SECOND No. , , A. OF I S I was charged with sl...","[0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1]",4,0.118774,0.469482,0.500000,0.293213,0.463379,0.905273,0.014786,0.639160,0.964355,0.185181,0.048676,0.817383,0.014786,0.916992
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
499,1027,This is an appeal from the decision of the of ...,"[0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1]",499,0.090881,0.574219,0.557129,0.882324,0.426758,0.826660,0.544922,0.755859,0.611328,0.078369,0.022583,0.862305,0.333496,0.443115
500,1730,"""Los esposos y son compadres hace cinco anos d...","[0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1]",500,0.034180,0.033203,0.176636,0.044586,0.032776,0.168091,0.028442,0.015961,0.040619,0.040161,0.110474,0.057373,0.021240,0.092346
501,911,This is a prosecution for a violation of that ...,"[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0]",501,0.083130,0.116760,0.109497,0.024750,0.948242,0.217285,0.022675,0.071960,0.058563,0.099976,0.123230,0.487549,0.019455,0.434814
502,2225,In this petition for certiorari and prohibitio...,"[0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0]",502,0.016403,0.129639,0.477539,0.226562,0.445068,0.257080,0.046478,0.234741,0.641113,0.323975,0.047882,0.942871,0.037750,0.938965


In [None]:
merged_df['Max'] = merged_df[['chastity', 'civil_status',
       'drugs', 'fundamental_laws', 'honor', 'liberty', 'national_security',
       'persons', 'property', 'public_interest', 'public_morals',
       'public_officers', 'public_order', 'quasi_offense',]].idxmax(axis=1)
merged_df

Unnamed: 0,index_x,text,labels,index_y,chastity,civil_status,drugs,fundamental_laws,honor,liberty,national_security,persons,property,public_interest,public_morals,public_officers,public_order,quasi_offense,Max
0,196,Not only did the accused rape his daughter-in-...,"[1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1]",0,0.990234,0.818848,0.222412,0.208496,0.718750,0.898438,0.017517,0.835938,0.152832,0.470703,0.053009,0.517090,0.024429,0.950195,chastity
1,986,", I S I , filed a sworn complaint with the aga...","[0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1]",1,0.013901,0.138550,0.069397,0.088013,0.779785,0.093872,0.019058,0.085083,0.156250,0.465820,0.017380,0.910645,0.012337,0.806152,public_officers
2,1090,For our resolution is a petition for certiorar...,"[0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1]",2,0.425781,0.846680,0.828125,0.892578,0.440430,0.978516,0.061890,0.832520,0.306152,0.111267,0.053406,0.898926,0.084473,0.868164,liberty
3,387,THIRD This is an appeal from the decision of t...,"[0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1]",3,0.093994,0.920410,0.362061,0.243774,0.492676,0.795898,0.008644,0.979004,0.521484,0.246338,0.017044,0.524414,0.018875,0.974121,persons
4,2221,"SECOND No. , , A. OF I S I was charged with sl...","[0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1]",4,0.118774,0.469482,0.500000,0.293213,0.463379,0.905273,0.014786,0.639160,0.964355,0.185181,0.048676,0.817383,0.014786,0.916992,property
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
499,1027,This is an appeal from the decision of the of ...,"[0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1]",499,0.090881,0.574219,0.557129,0.882324,0.426758,0.826660,0.544922,0.755859,0.611328,0.078369,0.022583,0.862305,0.333496,0.443115,fundamental_laws
500,1730,"""Los esposos y son compadres hace cinco anos d...","[0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1]",500,0.034180,0.033203,0.176636,0.044586,0.032776,0.168091,0.028442,0.015961,0.040619,0.040161,0.110474,0.057373,0.021240,0.092346,drugs
501,911,This is a prosecution for a violation of that ...,"[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0]",501,0.083130,0.116760,0.109497,0.024750,0.948242,0.217285,0.022675,0.071960,0.058563,0.099976,0.123230,0.487549,0.019455,0.434814,honor
502,2225,In this petition for certiorari and prohibitio...,"[0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0]",502,0.016403,0.129639,0.477539,0.226562,0.445068,0.257080,0.046478,0.234741,0.641113,0.323975,0.047882,0.942871,0.037750,0.938965,public_officers


In [None]:
gr_content = data[["gr_no_x", "text"]]
gr_content
train_df, eval_df_2 = train_test_split(gr_content, test_size=0.2, random_state=42)
eval_df_2 = eval_df_2.reset_index()
eval_df_2

Unnamed: 0,index,gr_no_x,text
0,196,G.R. Nos. 146352-56,Not only did the accused rape his daughter-in-...
1,986,A.M. No. MTJ-90-400,", I S I , filed a sworn complaint with the aga..."
2,1090,G.R. Nos. 154150-51,For our resolution is a petition for certiorar...
3,387,G.R. No. 89278,THIRD This is an appeal from the decision of t...
4,2221,A.C. No. 1753,"SECOND No. , , A. OF I S I was charged with sl..."
...,...,...,...
499,1027,G.R. No. L-8297,This is an appeal from the decision of the of ...
500,1730,G.R. No. 47721,"""Los esposos y son compadres hace cinco anos d..."
501,911,G.R. No. 26408,This is a prosecution for a violation of that ...
502,2225,G.R. Nos. 151380-91,In this petition for certiorari and prohibitio...


In [None]:
xlnet_predict_df = merged_df.merge(eval_df_2, left_index = True, right_index = True, how = "left")
final_xlnet_predict_df = xlnet_predict_df[["gr_no_x", "text_x", "Max"]]
final_xlnet_predict_df.rename({'gr_no_x': 'gr_no_', "text_x":"text", "Max": "XLNET_Max"}, axis=1, inplace=True)
final_xlnet_predict_df

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


Unnamed: 0,gr_no_,text,XLNET_Max
0,G.R. Nos. 146352-56,Not only did the accused rape his daughter-in-...,chastity
1,A.M. No. MTJ-90-400,", I S I , filed a sworn complaint with the aga...",public_officers
2,G.R. Nos. 154150-51,For our resolution is a petition for certiorar...,liberty
3,G.R. No. 89278,THIRD This is an appeal from the decision of t...,persons
4,A.C. No. 1753,"SECOND No. , , A. OF I S I was charged with sl...",property
...,...,...,...
499,G.R. No. L-8297,This is an appeal from the decision of the of ...,fundamental_laws
500,G.R. No. 47721,"""Los esposos y son compadres hace cinco anos d...",drugs
501,G.R. No. 26408,This is a prosecution for a violation of that ...,honor
502,G.R. Nos. 151380-91,In this petition for certiorari and prohibitio...,public_officers


In [None]:
final_xlnet_predict_df.columns

Index(['gr_no_', 'text', 'XLNET_Max'], dtype='object')

In [None]:
final_xlnet_predict_df_2 = final_xlnet_predict_df[["gr_no_", "XLNET_Max"]]
final_xlnet_predict_df_2

Unnamed: 0,gr_no_,XLNET_Max
0,G.R. Nos. 146352-56,chastity
1,A.M. No. MTJ-90-400,public_officers
2,G.R. Nos. 154150-51,liberty
3,G.R. No. 89278,persons
4,A.C. No. 1753,property
...,...,...
499,G.R. No. L-8297,fundamental_laws
500,G.R. No. 47721,drugs
501,G.R. No. 26408,honor
502,G.R. Nos. 151380-91,public_officers


In [None]:
final_xlnet_predict_df_2.to_csv("final_xlnet_predict_df_2.csv")

In [None]:
!cp final_xlnet_predict_df_2.csv "drive/My Drive/"

# Sample Prediction

In [None]:
sample_text = ['Avelino Ordono was charged in the municipal court of San Gabriel, La Union with having raped his daughter, Leonora, on October 11, 1970. The verified complaint dated November 7, 1973 was signed by the twenty-four year old victim (Criminal Case No. 104).In support of that complaint, Catalina Balanon Ordono, the mother of Leonora, executed a sworn statement wherein she disclosed that on that same date, October 11th, Leonora had apprised her of the outrage but no denunciation was filed because Avelino Ordono threatened to kill Leonora and Catalina (his daughter and wife, respectively) if they reported the crime to the police.Catalina Ordono in her sworn statement further revealed that her husband had also raped their other daughter, Rosa, on March 25 and April 7, 1973. He was charged in court with that offense.Catalina Ordono said that the rape committed by Avelino Ordono against Leonora was mentioned during the investigation and trial of Avelino Ordono for the rape committed against Rosa Ordono. Catalina\'s statement on this point is as follows:  "Q - Why did you not file the complaint against your husband concerning the incident involving Leonora Ordono?"A - We also narrated the incident during the investigation in the Fiscal\'s Office and also when I testified in court in the case of my daughter Rosa Ordono but then my daughter Leonora Ordono was still in Manila, sir."During the preliminary investigation of the rape committed against Leonora, Catalina manifested that she was no longer afraid to denounce Avelino Ordono because he was already in jail for having raped Rosa Ordono.The case against Avelino Ordono, where Leonora Ordono was the complainant, was elevated to the Court of First Instance of La Union, San Fernando Branch (Criminal Case No. 356). On May 29, 1974 the Fiscal presented Catalina Ordono as the second prosecution witness. After she had stated her personal circumstances, the defense counsel objected to her competency. He invoked the marital disqualification rule found in Rule 130 of the Rules of Court which provides:"SEC. 20.  Disqualification by reason of interest or relationship. — The following persons cannot testify as to matters in which they are interested, directly or indirectly, as herein enumerated:x x x x x x x x x(b) A husband cannot be examined for or against his wife without her consent; nor a wife for or against her husband without his consent, except in a civil case by one against the other, or in a criminal case for a crime committed by one against the other;x x x x x x x x xCounsel claimed that Avelino Ordono had not consented expressly or impliedly to his wife\'s testifying against him.The trial court overruled the objection. After the denial of Avelino Ordono\'s motion for the reconsideration of the adverse ruling, he filed the instant action for certiorari and prohibition. He was allowed to sue in forma pauperis.The issue is whether the rape committed by the husband against his daughter is a crime committed by him against his wife within the meaning of the exception found in the marital disqualification rule.Should the phrase "in a criminal case for a crime committed by one against the other" be restricted to crimes committed by one spouse against the other, such as physical injuries, bigamy, adultery or concubinage, or should it be given a latitudinarian interpretation as referring to any offense causing marital discord?There is a dictum that "where the marital and domestic relations are so strained that there is no more harmony to be preserved nor peace and tranquility which may be disturbed, the reason based upon such harmony and tranquility fails. In such a case identity of interests disappears and the consequent danger of perjury based on that identity is nonexistent. Likewise, in such a situation, the security and confidences of private life which the law aims at protecting will be nothing but ideals which, through their absence, merely leave a void in the unhappy home" (People vs. Francisco, 78 Phil. 694, 704).In the Francisco case, the wife, as a rebuttal witness, was allowed to testify against the husband who was charged with having killed his son and who testified that it was the wife who killed their son.We think that the correct rule, which may be adopted in this jurisdiction, is that laid down in Cargill vs. State, 35 ALR 133, 220 Pac. 64 25 Okl. 314, wherein the court said:"The rule that the injury must amount to a physical wrong upon the person is too narrow; and the rule that any offense remotely or indirectly affecting domestic harmony comes within the exception is too broad. The better rule is that, when an offense directly attacks, or directly and vitally impairs, the conjugal relation, it comes within the exception to the statute that one shall not be a witness against the other except in a criminal prosecution for a crime committed (by) one against the other".Using the criterion thus judiciously enunciated in the Cargill case, it can be conclude that in the law of evidence the rape perpetrated by the father against his daughter is a crime committed by him against his wife (the victim\'s mother).[*]That conclusion is in harmony with the practices and traditions of the Filipino family where, normally, the daughter is close to the mother who, having breast-fed and reared her offspring, is always ready to render her counsel and assistance in time of need. Indeed, when the daughter is in distress or suffers moral or physical pain, she usually utters the word Inay (Mother) before she invokes the name of the Lord. (Madre mia, Dios mio)Thus, in this case, when Avelino Ordono, after having raped his daughter Leonora in the early morning of October 11, 1970, tried to repeat the beastly act in the evening of that date, Leonora shouted "Mother" and, on hearing that word, Avelino desisted.That the rape of the daughter by the father, an undeniably abominable and revolting crime with incestuous implications, positively undermines the connubial relationship, is a proposition too obvious to require much elucidation.In Wilkinson vs. People, 282 Pac. 257, it was held that the wife was a competent witness against the husband in a prosecution for rape committed by the husband against his stepdaughter, who is the wife\'s natural daughter because the crime was "an outrage upon nature in its dearest and tenderest relations as well as a crime against humanity itself". The court adopted the interpretation that "a criminal action or proceeding for a crime committed by one against the other" may refer to a crime where the wife is the individual particularly and directly injured or affected by the crime for which the husband is being prosecuted (See Dill vs. People, 19 Colo. 469, 475, 36 Pac. 229, 232).In State vs. Chambers, 87 Iowa 1, 53 N.W. 1090, it was held under the statutory provision that husband or wife shall in no case be a witness for or against the other, except in a criminal proceeding for a crime committed by one against the other, that the wife was competent to testify against the husband in a case where he was prosecuted for incest committed against his stepdaughter.In State vs. Shultz, 177 Iowa 321, 158 N.W. 539, it was held that the wife may testify against the husband in a case where he was prosecuted for incest committed against their eleven-year old daughter because incest is a "crime committed against the wife". (See Owens vs. State, 32 Neb. 167, 49 N.W. 226; Lord vs. State, 23 N.W. 507, 17 Neb. 526; People vs. Segura, 60 Phil. 933).The trial court did not err in holding that Catalina Ordono could testify against her husband, Avelino Ordono, in the case where he is being tried for having raped their daughter, Leonora.WHEREFORE, the petition for certiorari and prohibition is dismissed. No costs.SO ORDERED.Makalintal, C.J., Castro, Fernando, Teehankee, Barredo, Makasiar, Esguerra, Fernandez, and Muñoz Palma, JJ., concur.Antonio, J., is on sick leave.[*] As noted by Wigmore, the marital disqualification rule consists of two parts: (1) the incapacity of one spouse to testify for the other, a disqualification designed to obviate perjury, and (2) the privilege of one spouse not to testify against the other, a right designed to present domestic disunion and unhappiness (U.S. vs. Concepcion, 31 Phil. 182, 187; 2 Wigmore on Evidence 731).Wigmore notes that the privilege has no longer any good reason for retention. "In an age which has so far rationalized, depolarized, and depolarized the marital relation and the spirit of Femininity as to be willing to enact complete legal and political equality and independence of man and woman, this marital privilege is the merest anachronism, in legal theory, and an indefensible obstruction to truth, in practice". After noting that some States had abolished the privilege, the American Bar Association\'s Committee on the Improvement of the Law of Evidence in 1937-38 recommended its abolition (8 Wigmore on Evidence 232).']
print(sample_text[0])

Avelino Ordono was charged in the municipal court of San Gabriel, La Union with having raped his daughter, Leonora, on October 11, 1970. The verified complaint dated November 7, 1973 was signed by the twenty-four year old victim (Criminal Case No. 104).In support of that complaint, Catalina Balanon Ordono, the mother of Leonora, executed a sworn statement wherein she disclosed that on that same date, October 11th, Leonora had apprised her of the outrage but no denunciation was filed because Avelino Ordono threatened to kill Leonora and Catalina (his daughter and wife, respectively) if they reported the crime to the police.Catalina Ordono in her sworn statement further revealed that her husband had also raped their other daughter, Rosa, on March 25 and April 7, 1973. He was charged in court with that offense.Catalina Ordono said that the rape committed by Avelino Ordono against Leonora was mentioned during the investigation and trial of Avelino Ordono for the rape committed against Rosa

In [None]:
prediction, prediction_outputs = xlnet.predict(sample_text)

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

In [None]:
prediction[0]

[1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1]

In [None]:
prediction_outputs[0]

array([0.97314453, 0.78417969, 0.08831787, 0.07629395, 0.60400391,
       0.68554688, 0.0114212 , 0.89501953, 0.07781982, 0.38574219,
       0.04653931, 0.30175781, 0.00989532, 0.93701172])

In [None]:
def label_ranked(probs):
    labels = list(zip(lvl2_labels,probs))
            
    return sorted(labels, key = lambda i: i[1],reverse=True)

In [None]:
prediction_labels_ranked = label_ranked(prediction_outputs[0])

In [None]:
prediction_labels_ranked

[('chastity', 0.97314453125),
 ('quasi_offense', 0.93701171875),
 ('persons', 0.89501953125),
 ('civil_status', 0.7841796875),
 ('liberty', 0.685546875),
 ('honor', 0.60400390625),
 ('public_interest', 0.3857421875),
 ('public_officers', 0.3017578125),
 ('drugs', 0.08831787109375),
 ('property', 0.07781982421875),
 ('fundamental_laws', 0.0762939453125),
 ('public_morals', 0.046539306640625),
 ('national_security', 0.01142120361328125),
 ('public_order', 0.00989532470703125)]

In [None]:
import textwrap
  
value = """This function wraps the input paragraph such that each line
in the paragraph is at most width characters long. The wrap method
returns a list of output lines. The returned list
is empty if the wrapped
output has no content."""
  
# Wrap this text.
wrapper = textwrap.TextWrapper(width=50)
  
word_list = wrapper.wrap(text=value)
  
# Print each line.
for element in word_list:
    print(element)

This function wraps the input paragraph such that
each line in the paragraph is at most width
characters long. The wrap method returns a list of
output lines. The returned list is empty if the
wrapped output has no content.
