In [1]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from imblearn.over_sampling import SMOTE
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, f1_score, classification_report
from sklearn.model_selection import train_test_split, GridSearchCV
from typing import List, Tuple
import os
import joblib
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore')

In [2]:
# Load data
print("üìÇ Loading processed data...")
train_df = pd.read_pickle('../data/processed/train.pkl')
test_df = pd.read_pickle('../data/processed/test.pkl')

# Variants
variants = [
    'without_lemma',
    'with_lemma',
    'with_lemma_pos',
    'with_dep_tree',
    'with_chunking'
]

üìÇ Loading processed data...


In [3]:
# Function to prepare text from tokens/ngrams
def prepare_text(tokens: List, ngrams_b: List[Tuple], ngrams_t: List[Tuple], use_ngrams: bool = False) -> str:
    """
    Convert tokens and optionally ngrams to string for TF-IDF.
    For ngrams, join tuples into space-separated strings.
    """
    if not tokens:
        return ''
    # Handle different token formats (str or tuples)
    if isinstance(tokens[0], str):
        text = ' '.join(tokens)
    elif isinstance(tokens[0], tuple):
        text = ' '.join(['_'.join(t) for t in tokens])
    else:
        text = ''
    
    if use_ngrams:
        bigrams_str = ' '.join(['_'.join(gram) for gram in ngrams_b]) if ngrams_b else ''
        trigrams_str = ' '.join(['_'.join(gram) for gram in ngrams_t]) if ngrams_t else ''
        text = f"{text} {bigrams_str} {trigrams_str}".strip()
    
    return text

In [4]:
# Results storage
results_multi = {}
results_binary = {dim: {} for dim in ['IE', 'NS', 'FT', 'JP']}

In [5]:
# Loop over variants
for var in variants:
    print(f"\nüîç Processing variant: {var}")
    
    # Prepare train and test texts
    print("üìù Preparing text features...")
    tqdm.pandas()
    train_df['text'] = train_df.progress_apply(
        lambda row: prepare_text(row[f'tokens_{var}'], row[f'Bigrams_{var}'], row[f'Trigrams_{var}'], use_ngrams=True), axis=1
    )
    test_df['text'] = test_df.progress_apply(
        lambda row: prepare_text(row[f'tokens_{var}'], row[f'Bigrams_{var}'], row[f'Trigrams_{var}'], use_ngrams=True), axis=1
    )
    
    X_train = train_df['text']
    X_test = test_df['text']
    
    # Multi-class (16 types)
    # print("üß† Training multi-class model with balanced class weights...")
    # y_train_multi = train_df['type']
    # y_test_multi = test_df['type']
    
    # pipeline_multi = Pipeline([
    #     ('tfidf', TfidfVectorizer(max_features=5000, ngram_range=(1,3))),
    #     ('clf', LogisticRegression(multi_class='multinomial', solver='lbfgs', max_iter=1000, class_weight='balanced'))
    # ])
    # params = {'clf__C': [0.1, 1, 10], 'clf__solver': ['lbfgs', 'liblinear']}
    # grid = GridSearchCV(pipeline_multi, params, cv=5, scoring='f1_weighted')
    # grid.fit(X_train, y_train_multi)
    
    # pipeline_multi.fit(X_train, y_train_multi)
    
    # y_pred_multi = pipeline_multi.predict(X_test)
    
    # acc_multi = accuracy_score(y_test_multi, y_pred_multi)
    # f1_multi = f1_score(y_test_multi, y_pred_multi, average='weighted')
    
    # results_multi[var] = {'accuracy': acc_multi, 'f1': f1_multi}
    # print(f"Multi-class - Accuracy: {acc_multi:.4f}, F1: {f1_multi:.4f}")
    # print(classification_report(y_test_multi, y_pred_multi))
    
    # Save model
    # os.makedirs('models', exist_ok=True)
    # joblib.dump(pipeline_multi, f'models/multi_{var}.pkl')
    
    # Binary classifiers for each dimension
    for dim in ['IE', 'NS', 'FT', 'JP']:
        print(f"üß† Training binary model for {dim} with SMOTE...")
        y_train_bin = train_df[dim]
        y_test_bin = test_df[dim]
        
        from imblearn.over_sampling import SMOTE
        tfidf = TfidfVectorizer(max_features=5000, ngram_range=(1,3))
        X_train_tfidf = tfidf.fit_transform(X_train).toarray()
        X_test_tfidf = tfidf.transform(X_test).toarray()  
        
        smote = SMOTE(random_state=42)
        X_train_res, y_train_res = smote.fit_resample(X_train_tfidf, y_train_bin)
        
        clf = LogisticRegression(solver='lbfgs', max_iter=1000)
        clf.fit(X_train_res, y_train_res)
        y_pred_bin = clf.predict(X_test_tfidf)
        
        acc_bin = accuracy_score(y_test_bin, y_pred_bin)
        f1_bin = f1_score(y_test_bin, y_pred_bin, average='weighted')
        
        results_binary[dim][var] = {'accuracy': acc_bin, 'f1': f1_bin}
        print(f"{dim} - Accuracy: {acc_bin:.4f}, F1: {f1_bin:.4f}")
        print(classification_report(y_test_bin, y_pred_bin))
    
        joblib.dump((tfidf, clf), f'../models/binary_{dim}_{var}.pkl') 


üîç Processing variant: without_lemma
üìù Preparing text features...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 6940/6940 [00:03<00:00, 2006.97it/s]
100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1735/1735 [00:00<00:00, 2529.57it/s]


üß† Training binary model for IE with SMOTE...
IE - Accuracy: 0.7447, F1: 0.7522
              precision    recall  f1-score   support

           0       0.46      0.55      0.50       401
           1       0.85      0.80      0.83      1334

    accuracy                           0.74      1735
   macro avg       0.66      0.68      0.66      1735
weighted avg       0.76      0.74      0.75      1735

üß† Training binary model for NS with SMOTE...
NS - Accuracy: 0.8058, F1: 0.8169
              precision    recall  f1-score   support

           0       0.35      0.46      0.40       240
           1       0.91      0.86      0.88      1495

    accuracy                           0.81      1735
   macro avg       0.63      0.66      0.64      1735
weighted avg       0.83      0.81      0.82      1735

üß† Training binary model for FT with SMOTE...
FT - Accuracy: 0.7850, F1: 0.7853
              precision    recall  f1-score   support

           0       0.75      0.79      0.77  

100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 6940/6940 [00:04<00:00, 1710.19it/s]
100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1735/1735 [00:01<00:00, 1527.06it/s]


üß† Training binary model for IE with SMOTE...
IE - Accuracy: 0.7372, F1: 0.7460
              precision    recall  f1-score   support

           0       0.44      0.55      0.49       401
           1       0.85      0.79      0.82      1334

    accuracy                           0.74      1735
   macro avg       0.65      0.67      0.66      1735
weighted avg       0.76      0.74      0.75      1735

üß† Training binary model for NS with SMOTE...
NS - Accuracy: 0.8086, F1: 0.8188
              precision    recall  f1-score   support

           0       0.35      0.46      0.40       240
           1       0.91      0.86      0.89      1495

    accuracy                           0.81      1735
   macro avg       0.63      0.66      0.64      1735
weighted avg       0.83      0.81      0.82      1735

üß† Training binary model for FT with SMOTE...
FT - Accuracy: 0.7867, F1: 0.7870
              precision    recall  f1-score   support

           0       0.75      0.79      0.77  

100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 6940/6940 [00:04<00:00, 1729.09it/s]
100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1735/1735 [00:00<00:00, 1756.89it/s]


üß† Training binary model for IE with SMOTE...
IE - Accuracy: 0.7354, F1: 0.7441
              precision    recall  f1-score   support

           0       0.44      0.54      0.48       401
           1       0.85      0.79      0.82      1334

    accuracy                           0.74      1735
   macro avg       0.65      0.67      0.65      1735
weighted avg       0.76      0.74      0.74      1735

üß† Training binary model for NS with SMOTE...
NS - Accuracy: 0.8058, F1: 0.8159
              precision    recall  f1-score   support

           0       0.34      0.45      0.39       240
           1       0.91      0.86      0.88      1495

    accuracy                           0.81      1735
   macro avg       0.63      0.65      0.64      1735
weighted avg       0.83      0.81      0.82      1735

üß† Training binary model for FT with SMOTE...
FT - Accuracy: 0.7781, F1: 0.7784
              precision    recall  f1-score   support

           0       0.74      0.79      0.77  

100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 6940/6940 [00:01<00:00, 6631.36it/s]
100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1735/1735 [00:00<00:00, 12123.86it/s]


üß† Training binary model for IE with SMOTE...
IE - Accuracy: 0.6697, F1: 0.6832
              precision    recall  f1-score   support

           0       0.33      0.43      0.38       401
           1       0.81      0.74      0.78      1334

    accuracy                           0.67      1735
   macro avg       0.57      0.59      0.58      1735
weighted avg       0.70      0.67      0.68      1735

üß† Training binary model for NS with SMOTE...
NS - Accuracy: 0.7228, F1: 0.7486
              precision    recall  f1-score   support

           0       0.19      0.31      0.24       240
           1       0.88      0.79      0.83      1495

    accuracy                           0.72      1735
   macro avg       0.53      0.55      0.53      1735
weighted avg       0.78      0.72      0.75      1735

üß† Training binary model for FT with SMOTE...
FT - Accuracy: 0.6484, F1: 0.6489
              precision    recall  f1-score   support

           0       0.61      0.65      0.63  

100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 6940/6940 [00:03<00:00, 2129.81it/s]
100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1735/1735 [00:00<00:00, 2602.26it/s]


üß† Training binary model for IE with SMOTE...
IE - Accuracy: 0.7268, F1: 0.7324
              precision    recall  f1-score   support

           0       0.42      0.47      0.44       401
           1       0.84      0.80      0.82      1334

    accuracy                           0.73      1735
   macro avg       0.63      0.64      0.63      1735
weighted avg       0.74      0.73      0.73      1735

üß† Training binary model for NS with SMOTE...
NS - Accuracy: 0.7758, F1: 0.7883
              precision    recall  f1-score   support

           0       0.26      0.35      0.30       240
           1       0.89      0.84      0.87      1495

    accuracy                           0.78      1735
   macro avg       0.58      0.60      0.58      1735
weighted avg       0.80      0.78      0.79      1735

üß† Training binary model for FT with SMOTE...
FT - Accuracy: 0.7516, F1: 0.7517
              precision    recall  f1-score   support

           0       0.73      0.74      0.73  

In [7]:

print("üìÇ Loading processed data...")
train_df = pd.read_pickle('../data/processed/train.pkl')
test_df = pd.read_pickle('../data/processed/test.pkl')

# Variants (same as before)
variants = [
    'without_lemma',
    'with_lemma',
    'with_lemma_pos',
    'with_dep_tree',
    'with_chunking'
]

# Results storage for balanced test eval
results_binary_balanced = {dim: {} for dim in ['IE', 'NS', 'FT', 'JP']}

# Loop over variants
for var in variants:
    print(f"\nüîç Processing variant: {var} for balanced test eval")
    
    # Prepare test texts (same as in training)
    print("üìù Preparing text features for test...")
    tqdm.pandas()
    test_df['text'] = test_df.progress_apply(
        lambda row: prepare_text(row[f'tokens_{var}'], row[f'Bigrams_{var}'], row[f'Trigrams_{var}'], use_ngrams=True), axis=1
    )
    X_test = test_df['text']
    
    # Binary dimensions
    for dim in ['IE', 'NS', 'FT', 'JP']:
        print(f"‚öñÔ∏è Loading model and evaluating {dim} on SMOTE-balanced test...")
        y_test_bin = test_df[dim]
        
        # Load the saved tfidf and clf
        model_path = f'../models/binary_{dim}_{var}.pkl'
        tfidf, clf = joblib.load(model_path)
        
        # Vectorize test (dense for SMOTE)
        X_test_tfidf = tfidf.transform(X_test).toarray()
        
        # Apply SMOTE to test data
        smote_test = SMOTE(random_state=42)
        X_test_res, y_test_res = smote_test.fit_resample(X_test_tfidf, y_test_bin)
        
        # Predict on balanced test
        y_pred_bin = clf.predict(X_test_res)
        
        acc_bin = accuracy_score(y_test_res, y_pred_bin)
        f1_bin = f1_score(y_test_res, y_pred_bin, average='weighted')
        
        results_binary_balanced[dim][var] = {'accuracy': acc_bin, 'f1': f1_bin}
        print(f"{dim} - Accuracy (on SMOTE-balanced test): {acc_bin:.4f}, F1: {f1_bin:.4f}")
        print(classification_report(y_test_res, y_pred_bin))

# Compare results for balanced test
for dim in ['IE', 'NS', 'FT', 'JP']:
    print(f"\nüìä Comparison of Binary {dim} Results on Balanced Test:")
    for var, res in results_binary_balanced[dim].items():
        print(f"{var}: Accuracy={res['accuracy']:.4f}, F1={res['f1']:.4f}")

print("‚úÖ Balanced test evaluation complete!")

üìÇ Loading processed data...

üîç Processing variant: without_lemma for balanced test eval
üìù Preparing text features for test...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1735/1735 [00:00<00:00, 4944.89it/s]


‚öñÔ∏è Loading model and evaluating IE on SMOTE-balanced test...
IE - Accuracy (on SMOTE-balanced test): 0.7148, F1: 0.7125
              precision    recall  f1-score   support

           0       0.76      0.63      0.69      1334
           1       0.68      0.80      0.74      1334

    accuracy                           0.71      2668
   macro avg       0.72      0.71      0.71      2668
weighted avg       0.72      0.71      0.71      2668

‚öñÔ∏è Loading model and evaluating NS on SMOTE-balanced test...
NS - Accuracy (on SMOTE-balanced test): 0.6729, F1: 0.6609
              precision    recall  f1-score   support

           0       0.78      0.48      0.60      1495
           1       0.63      0.86      0.72      1495

    accuracy                           0.67      2990
   macro avg       0.70      0.67      0.66      2990
weighted avg       0.70      0.67      0.66      2990

‚öñÔ∏è Loading model and evaluating FT on SMOTE-balanced test...
FT - Accuracy (on SMOTE-balanced 

100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1735/1735 [00:00<00:00, 3638.53it/s]


‚öñÔ∏è Loading model and evaluating IE on SMOTE-balanced test...
IE - Accuracy (on SMOTE-balanced test): 0.7009, F1: 0.6983
              precision    recall  f1-score   support

           0       0.75      0.61      0.67      1334
           1       0.67      0.79      0.73      1334

    accuracy                           0.70      2668
   macro avg       0.71      0.70      0.70      2668
weighted avg       0.71      0.70      0.70      2668

‚öñÔ∏è Loading model and evaluating NS on SMOTE-balanced test...
NS - Accuracy (on SMOTE-balanced test): 0.6736, F1: 0.6612
              precision    recall  f1-score   support

           0       0.78      0.48      0.60      1495
           1       0.63      0.86      0.73      1495

    accuracy                           0.67      2990
   macro avg       0.70      0.67      0.66      2990
weighted avg       0.70      0.67      0.66      2990

‚öñÔ∏è Loading model and evaluating FT on SMOTE-balanced test...
FT - Accuracy (on SMOTE-balanced 

100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1735/1735 [00:00<00:00, 4155.14it/s]


‚öñÔ∏è Loading model and evaluating IE on SMOTE-balanced test...
IE - Accuracy (on SMOTE-balanced test): 0.6979, F1: 0.6950
              precision    recall  f1-score   support

           0       0.75      0.60      0.67      1334
           1       0.67      0.79      0.72      1334

    accuracy                           0.70      2668
   macro avg       0.71      0.70      0.70      2668
weighted avg       0.71      0.70      0.70      2668

‚öñÔ∏è Loading model and evaluating NS on SMOTE-balanced test...
NS - Accuracy (on SMOTE-balanced test): 0.6856, F1: 0.6753
              precision    recall  f1-score   support

           0       0.79      0.51      0.62      1495
           1       0.64      0.86      0.73      1495

    accuracy                           0.69      2990
   macro avg       0.71      0.69      0.68      2990
weighted avg       0.71      0.69      0.68      2990

‚öñÔ∏è Loading model and evaluating FT on SMOTE-balanced test...
FT - Accuracy (on SMOTE-balanced 

100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1735/1735 [00:00<00:00, 22380.53it/s]

‚öñÔ∏è Loading model and evaluating IE on SMOTE-balanced test...





IE - Accuracy (on SMOTE-balanced test): 0.6192, F1: 0.6134
              precision    recall  f1-score   support

           0       0.66      0.50      0.57      1334
           1       0.60      0.74      0.66      1334

    accuracy                           0.62      2668
   macro avg       0.63      0.62      0.61      2668
weighted avg       0.63      0.62      0.61      2668

‚öñÔ∏è Loading model and evaluating NS on SMOTE-balanced test...
NS - Accuracy (on SMOTE-balanced test): 0.5666, F1: 0.5441
              precision    recall  f1-score   support

           0       0.62      0.34      0.44      1495
           1       0.55      0.79      0.65      1495

    accuracy                           0.57      2990
   macro avg       0.58      0.57      0.54      2990
weighted avg       0.58      0.57      0.54      2990

‚öñÔ∏è Loading model and evaluating FT on SMOTE-balanced test...
FT - Accuracy (on SMOTE-balanced test): 0.6544, F1: 0.6544
              precision    recall  f1-s

100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1735/1735 [00:00<00:00, 4054.07it/s]


‚öñÔ∏è Loading model and evaluating IE on SMOTE-balanced test...
IE - Accuracy (on SMOTE-balanced test): 0.6675, F1: 0.6613
              precision    recall  f1-score   support

           0       0.73      0.53      0.62      1334
           1       0.63      0.80      0.71      1334

    accuracy                           0.67      2668
   macro avg       0.68      0.67      0.66      2668
weighted avg       0.68      0.67      0.66      2668

‚öñÔ∏è Loading model and evaluating NS on SMOTE-balanced test...
NS - Accuracy (on SMOTE-balanced test): 0.6579, F1: 0.6456
              precision    recall  f1-score   support

           0       0.75      0.47      0.58      1495
           1       0.62      0.84      0.71      1495

    accuracy                           0.66      2990
   macro avg       0.68      0.66      0.65      2990
weighted avg       0.68      0.66      0.65      2990

‚öñÔ∏è Loading model and evaluating FT on SMOTE-balanced test...
FT - Accuracy (on SMOTE-balanced 