In [1]:
import pandas as pd
import numpy as np
from src import Utils, LSTM
from skorch.dataset import ValidSplit
from torch.nn import BCEWithLogitsLoss, NLLLoss, BCELoss

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [2]:
MODEL = 'model_lstm/test_point_2_hidden_300'

TEST_SIZE = 0.2

In [3]:
dataset = Utils.read_csv_file('datasets/datasetall.csv')
dataset

CSV file read successfully!


Unnamed: 0,text,label
0,Binay: Patuloy ang kahirapan dahil sa maling p...,0
1,SA GOBYERNONG TAPAT WELCOME SA BAGUO ANG LAHAT...,0
2,wait so ur telling me Let Leni Lead mo pero NY...,1
3,[USERNAME]wish this is just a nightmare that ...,0
4,doc willie ong and isko sabunutan po,0
...,...,...
28456,"Bisaya, Probinsyano/a, mostly Bisaya = katulong",1
28457,Amnesia. In my whole life wala pa ako nakasala...,1
28458,Kontrabida na ilang beses na tinalo at obvious...,1
28459,Yung antagonist laging kailangang sobrang sama...,1


In [4]:
Utils.seed_random_number_generators()

Random number generators seeded.


In [5]:
X_train, X_test, y_train, y_test = Utils.get_train_test_split(dataset, TEST_SIZE)

In [6]:
X_train

0         [USERNAME] Palangga ka man sang mga taga Baco...
1                      Who dafuq is Jose Montemayor Jr.???
2        Di na nakakatuwa yung mukha ni Mar Roxas sa TV...
3                      national elections. | via[USERNAME]
4        Binay will be staring in a movie called "The D...
                               ...                        
22764    "Kala ko wala andito pala si Marcos."*pertaini...
22765    sie ~ [USERNAME]Marcos Magnanakaw Marcos Dikta...
22766                    If Mar is BatMarBinay is Bane-ay.
22767    to my moots im sorry in not sorry for flooding...
22768                                Uunlad tayo kay Binay
Name: text, Length: 22769, dtype: object

In [7]:
y_train

0        0
1        0
2        1
3        0
4        1
        ..
22764    0
22765    1
22766    1
22767    1
22768    0
Name: label, Length: 22769, dtype: int64

In [8]:
X_test

0                          Bakit trending ang Only Binay?
1       Mare @ Cebu [USERNAME][USERNAME] Marcos Never ...
2       Kahit anong gawin ko bakit di ko ma appreciate...
3       Oras na para tayo'y bumoto ng taong mag tataas...
4       VP[USERNAME]is currently in Zamboanga Sibugay ...
                              ...                        
5687      [USERNAME] Laban LeniAngat Buhay LahatLeni Kiko
5688    Nagconcede ka man Maimarwala ka prinnagdala ka...
5689    Did You Know that former Philippine secretary ...
5690           Bakit nakakairita commercial ni Mar Roxas?
5691    To Doc Willie Ong I'd like to believe you are ...
Name: text, Length: 5692, dtype: object

In [9]:
y_test

0       0
1       1
2       1
3       0
4       0
       ..
5687    0
5688    1
5689    0
5690    1
5691    0
Name: label, Length: 5692, dtype: int64

In [10]:
VAL_SPLIT = 0.5

X_val, X_test, y_val, y_test = Utils.get_train_test_split(
  pd.DataFrame({
    'text': X_test,
    'label': y_test,
  }), 
  VAL_SPLIT,
)

In [11]:
X_val

0                    let leni lead Jessica Soho nterviews
1                       Oy Let Leni Lead daw sabi ni tomi
2                                           Dapat Si Leni
3       [USERNAME]and[USERNAME] Beautiful girlWise too...
4       [USERNAME] TO DONATE GCash Primitiva C TRANSPA...
                              ...                        
2842                                        bobong marcos
2843                                    My kakampink rice
2844                    Poor Binay. Too desperate. Hekhek
2845               Yup correlated to IQ Marcos Magnanakaw
2846    Lesbros and sissies look o. Hahahaha Let Leni ...
Name: text, Length: 2847, dtype: object

In [12]:
y_val

0       0
1       0
2       0
3       0
4       0
       ..
2842    1
2843    0
2844    1
2845    1
2846    0
Name: label, Length: 2847, dtype: int64

In [13]:
y_val.value_counts(ascending=True)

label
0    1412
1    1435
Name: count, dtype: int64

In [14]:
X_test

0       Chika natin mga nagawa ni[USERNAME]ha pati sa ...
1                 glad that my man is kakampink Leni Kiko
2       Ayan daw mga nagawatalo pa DPWHPakivalidate mg...
3                Rizalito david is for morality ahahahaha
4       Its our choice and our right whom to vote for ...
                              ...                        
2840    Ayaw na nilang lagyan ng mukha ni Binay yung T...
2841    Im one of the Thank you VP[USERNAME]for the in...
2842    [USERNAME] lugaw ka putang ina mo pag nakaupo ...
2843    you know whats a bad joke? mar's shady mrt dea...
2844    Robredo leads Marcos snubs advertising on Face...
Name: text, Length: 2845, dtype: object

In [15]:
y_test

0       0
1       0
2       0
3       0
4       0
       ..
2840    1
2841    0
2842    1
2843    1
2844    0
Name: label, Length: 2845, dtype: int64

In [16]:
y_test.value_counts(ascending=True)

label
0    1411
1    1434
Name: count, dtype: int64

In [17]:
import os
import joblib

subfolders = [ f.name for f in os.scandir(MODEL) if f.is_dir() ]

total_accuracy = []
total_recall = []
total_precision = []
total_f1 = []
total_test_accuracy = []
total_test_recall = []
total_test_precision = []
total_test_f1 = []


for i in subfolders:
  model_folder = f'{MODEL}/{i}/LSTM.pkl'
  print(model_folder)
  model = joblib.load(model_folder)

  accuracy, recall, precision, f1 = Utils.get_prediction_results(
    X_val,
    y_val,
    model,
  )

  test_accuracy, test_recall, test_precision, test_f1 = Utils.get_prediction_results(
    X_test,
    y_test,
    model,
  )

  total_accuracy.append(accuracy)
  total_recall.append(recall)
  total_precision.append(precision)
  total_f1.append(f1)
  total_test_accuracy.append(test_accuracy)
  total_test_recall.append(test_recall)
  total_test_precision.append(test_precision)
  total_test_f1.append(test_f1)

model_lstm/test_point_2_hidden_300/0/LSTM.pkl


Accuracy: 0.7734457323498419
Recall: 0.9045296167247386
Precision: 0.7187153931339978
F1-score: 0.800987349583462
Accuracy: 0.7701230228471002
Recall: 0.9058577405857741
Precision: 0.7145214521452146
F1-score: 0.7988929889298892
model_lstm/test_point_2_hidden_300/1/LSTM.pkl
Accuracy: 0.7650158061116965
Recall: 0.7428571428571429
Precision: 0.780380673499268
F1-score: 0.7611567297393788
Accuracy: 0.7585237258347979
Recall: 0.7315202231520224
Precision: 0.7764618800888231
F1-score: 0.7533213644524238
model_lstm/test_point_2_hidden_300/2/LSTM.pkl
Accuracy: 0.7766069546891464
Recall: 0.735191637630662
Precision: 0.8047292143401983
F1-score: 0.7683903860160233
Accuracy: 0.7708260105448155
Recall: 0.7350069735006973
Precision: 0.7948717948717948
F1-score: 0.763768115942029
model_lstm/test_point_2_hidden_300/3/LSTM.pkl
Accuracy: 0.7664207938180541
Recall: 0.9198606271777003
Precision: 0.7058823529411765
F1-score: 0.7987897125567323
Accuracy: 0.772231985940246
Recall: 0.9358437935843794
Precis

In [18]:
metrics_data_frame = pd.DataFrame(
  {
    'accuracy': total_accuracy,
    'recall': total_recall,
    'precision': total_precision,
    'f1': total_f1,
    'test_accuracy': total_test_accuracy,
    'test_recall': total_test_recall,
    'test_precision': total_test_precision,
    'test_f1': total_test_f1,
  },
  columns=[
    'accuracy', 
    'recall', 
    'precision', 
    'f1',
    'test_accuracy', 
    'test_recall', 
    'test_precision', 
    'test_f1',
  ],
)
metrics_data_frame.to_csv(f'{MODEL}/eval_metrics.csv')

In [19]:
print("Done")

Done
