In [1]:
import pandas as pd
import numpy as np
from src import Utils, LSTM
from skorch.dataset import ValidSplit
from torch.nn import BCEWithLogitsLoss, NLLLoss, BCELoss

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [2]:
MODEL = 'model_lstm/early_lstm_1-10'

TEST_SIZE = 0.1

In [3]:
dataset = Utils.read_csv_file('datasets/datasetall.csv')
dataset

CSV file read successfully!


Unnamed: 0,text,label
0,Binay: Patuloy ang kahirapan dahil sa maling p...,0
1,SA GOBYERNONG TAPAT WELCOME SA BAGUO ANG LAHAT...,0
2,wait so ur telling me Let Leni Lead mo pero NY...,1
3,[USERNAME]wish this is just a nightmare that ...,0
4,doc willie ong and isko sabunutan po,0
...,...,...
28456,"Bisaya, Probinsyano/a, mostly Bisaya = katulong",1
28457,Amnesia. In my whole life wala pa ako nakasala...,1
28458,Kontrabida na ilang beses na tinalo at obvious...,1
28459,Yung antagonist laging kailangang sobrang sama...,1


In [4]:
Utils.seed_random_number_generators()

Random number generators seeded.


In [5]:
X_train, X_test, y_train, y_test = Utils.get_train_test_split(dataset, TEST_SIZE)

In [6]:
X_train

0        pag hindi nanalo si Norberto Gonzales pwede ba...
1        Ngayon lang ako super proud sa PRESIDENTE na i...
2        JUST SAW SOMEONE CALL BBM BLENGBLONG HAHAHAHAH...
3        Rep. Binay on her leadership style: I am very ...
4        Liwanag o dilim? May oras pa. Kakampink Leni L...
                               ...                        
25611    "Kala ko wala andito pala si Marcos."*pertaini...
25612    cathy [USERNAME] Dec [USERNAME] parang tanga i...
25613                             Nognog+pandak= BINAY ftw
25614    BINAY:Did your enormous wealth all come from y...
25615                                Uunlad tayo kay Binay
Name: text, Length: 25616, dtype: object

In [7]:
y_train

0        1
1        0
2        1
3        0
4        0
        ..
25611    0
25612    1
25613    1
25614    1
25615    0
Name: label, Length: 25616, dtype: int64

In [8]:
X_test

0       PRESIDENTE DUTERTE I'm sure in last debateitao...
1       CHANGE IS BADLY NEEDED No To Mar Roxas2016 Dut...
2                                One Pink March Leni Kiko
3                               see youuu later Leni Kiko
4       [USERNAME] Nangyari na yan eh pero kahit anong...
                              ...                        
2840    kaya siguro umabot ng milyon yung boto kay MAR...
2841    Dedicating my 21km run for my chosen Presand V...
2842    Bakit si Mar? Because DuterteGrace Poe and VP ...
2843    patalo po ung patalastas ni Mar Roxas....malas...
2844    Kapihan with Sen. Bongbong Marcos startshe say...
Name: text, Length: 2845, dtype: object

In [9]:
y_test

0       0
1       0
2       0
3       0
4       0
       ..
2840    1
2841    0
2842    1
2843    1
2844    0
Name: label, Length: 2845, dtype: int64

In [10]:
VAL_SPLIT = 0.5

X_val, X_test, y_val, y_test = Utils.get_train_test_split(
  pd.DataFrame({
    'text': X_test,
    'label': y_test,
  }), 
  VAL_SPLIT,
)

In [11]:
X_val

0       Un ihaw na balat ng boyba kasi 5 pesos, tas 2 ...
1       . [USERNAME] PAMLYANG MULNG MAGWAWASAK SA PLPN...
2       "Sama-sama Tayong Babangon Muli!""BBM-Sara par...
3       [USERNAME] [USERNAME]and[USERNAME] Kay Leni Ta...
4       So pure.. if wala si BBM my vote will go for N...
                              ...                        
1418    Dylecious28 [USERNAME] [USERNAME] At nakita ko...
1419    [USERNAME] When asked how to safeguard growing...
1420    A President is a job but it s the person that ...
1421    Santiago at Duterte tatakbo baka mahati yung b...
1422    ABL is Angat Buhay Lahat dahil sa Gobyernong T...
Name: text, Length: 1423, dtype: object

In [12]:
y_val

0       0
1       0
2       0
3       1
4       0
       ..
1418    0
1419    0
1420    0
1421    1
1422    0
Name: label, Length: 1423, dtype: int64

In [13]:
y_val.value_counts(ascending=True)

label
0    706
1    717
Name: count, dtype: int64

In [14]:
X_test

0       lexi miner [USERNAME] Dec [USERNAME] Leody De ...
1                                   Sooo not Mar Roxas ??
2       [USERNAME]and[USERNAME] Si VP LENI ay NAGAWA a...
3       ??Only Binayonly Binay?? Tama na Binay! Kakasi...
4       An honor to be part of this to know and to be ...
                              ...                        
1417    Pulse Asia: Duterte pulls away in presidential...
1418    In reality, we should let them die, is that wh...
1419    PDI reporter Carvajalwhose reports were quoted...
1420                Nababadtrip talaga ako sa ad ni binay
1421                                        Dapat Si Leni
Name: text, Length: 1422, dtype: object

In [15]:
y_test

0       0
1       0
2       0
3       1
4       0
       ..
1417    0
1418    0
1419    0
1420    1
1421    0
Name: label, Length: 1422, dtype: int64

In [16]:
y_test.value_counts(ascending=True)

label
0    705
1    717
Name: count, dtype: int64

In [17]:
import os
import joblib

subfolders = [ f.name for f in os.scandir(MODEL) if f.is_dir() ]

total_accuracy = []
total_recall = []
total_precision = []
total_f1 = []
total_test_accuracy = []
total_test_recall = []
total_test_precision = []
total_test_f1 = []


for i in subfolders:
  model_folder = f'{MODEL}/{i}/LSTM.pkl'
  print(model_folder)
  model = joblib.load(model_folder)

  accuracy, recall, precision, f1 = Utils.get_prediction_results(
    X_val,
    y_val,
    model,
  )

  test_accuracy, test_recall, test_precision, test_f1 = Utils.get_prediction_results(
    X_test,
    y_test,
    model,
  )

  total_accuracy.append(accuracy)
  total_recall.append(recall)
  total_precision.append(precision)
  total_f1.append(f1)
  total_test_accuracy.append(test_accuracy)
  total_test_recall.append(test_recall)
  total_test_precision.append(test_precision)
  total_test_f1.append(test_f1)

model_lstm/bcewithlogitsloss/0/LSTM.pkl


Accuracy: 0.769501054111033
Recall: 0.7712691771269177
Precision: 0.7712691771269177
F1-score: 0.7712691771269177
Accuracy: 0.7876230661040787
Recall: 0.7587168758716876
Precision: 0.8083209509658247
F1-score: 0.7827338129496403
model_lstm/bcewithlogitsloss/1/LSTM.pkl
Accuracy: 0.7793394237526353
Recall: 0.8019525801952581
Precision: 0.7697456492637216
F1-score: 0.7855191256830601
Accuracy: 0.7841068917018285
Recall: 0.7907949790794979
Precision: 0.7831491712707183
F1-score: 0.7869535045107564
model_lstm/bcewithlogitsloss/2/LSTM.pkl
Accuracy: 0.7449051300070274
Recall: 0.7684797768479776
Precision: 0.7366310160427807
F1-score: 0.7522184300341296
Accuracy: 0.749648382559775
Recall: 0.7629009762900977
Precision: 0.7462482946793997
F1-score: 0.7544827586206897
model_lstm/bcewithlogitsloss/3/LSTM.pkl
Accuracy: 0.7624736472241743
Recall: 0.7601115760111576
Precision: 0.7665260196905767
F1-score: 0.7633053221288516
Accuracy: 0.7763713080168776
Recall: 0.7726638772663877
Precision: 0.78138222

In [18]:
metrics_data_frame = pd.DataFrame(
  {
    'accuracy': total_accuracy,
    'recall': total_recall,
    'precision': total_precision,
    'f1': total_f1,
    'test_accuracy': total_test_accuracy,
    'test_recall': total_test_recall,
    'test_precision': total_test_precision,
    'test_f1': total_test_f1,
  },
  columns=[
    'accuracy', 
    'recall', 
    'precision', 
    'f1',
    'test_accuracy', 
    'test_recall', 
    'test_precision', 
    'test_f1',
  ],
)
metrics_data_frame.to_csv(f'{MODEL}/eval_metrics.csv')

In [19]:
print("Done")

Done
