In [1]:
import pandas as pd

from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [2]:
def map_labels(x):
    if x == 'important' or x == '\'important\'' or x == '1' or x == '\'1\'':
        return 1
    elif x == 'not important' or x == '\'not important\'' or x == '0' or x == '\'0\'':
        return 0
    return x

In [3]:
aya_df = pd.read_csv('../Datasets/System_User_Prompt_Full_Text.csv')
# columns_to_evaluate = ['predicted_k_0', 'predicted_k_1', 'predicted_k_5', 'predicted_k_20', 'predicted_k_50']
columns_to_evaluate = ['predicted_k_20']

In [4]:
aya_df.head(5)

Unnamed: 0,text,text_type,real_tag,category,predicted_k_0,predicted_k_1,predicted_k_2,predicted_k_3,predicted_k_4,predicted_k_5,predicted_k_10,predicted_k_20,predicted_k_50
0,اما چرا امیرحسین مقصودلو (تتلو) قصد بازگشت به ...,full_text,0,اجتماعی,,,,,,,,0,
1,به گزارش خبرگزاری خبرآنلاین و به نقل از مرکز ر...,full_text,0,اجتماعی,,,,,,,,1,
2,بهرام عین‌الهی عصر امروز (۱۸ شهریورماه) در قرا...,full_text,1,اجتماعی,,,,,,,,1,
3,به گزارش فرارو و به نقل از سایت رسمی باشگاه اس...,full_text,1,ورزشی,,,,,,,,0,
4,به گزارش همشهری آنلاین، شورای هماهنگی تبلیغات ...,full_text,0,سیاسی,,,,,,,,0,


In [5]:
aya_df['real_tag'] = aya_df['real_tag'].apply(map_labels)
for column in columns_to_evaluate:
    aya_df[column] = aya_df[column].apply(map_labels)

In [6]:
true_labels = aya_df['real_tag'].dropna()
true_labels_first_400 = aya_df['real_tag'].dropna().iloc[:401]

num_true1 = (true_labels_first_400 == 1).sum()
num_true0 = (true_labels_first_400 == 0).sum()

# num_true1 = (true_labels == 1).sum()
# num_true0 = (true_labels == 0).sum()

print(f"  Number of true '1' labels: {num_true1}")
print(f"  Number of true '0' labels: {num_true0}")

  Number of true '1' labels: 77
  Number of true '0' labels: 324


In [7]:
for column in columns_to_evaluate:

    pred_labels = aya_df[column].dropna()

    common_indices = true_labels.index.intersection(pred_labels.index)

    true_labels = true_labels.loc[common_indices]
    pred_labels = pred_labels.loc[common_indices]

    # for label in pred_labels:
    #     print(label)

    accuracy = accuracy_score(true_labels, pred_labels)
    precision = precision_score(true_labels, pred_labels)
    recall = recall_score(true_labels, pred_labels)
    f1 = f1_score(true_labels, pred_labels)

    num_58 = (aya_df[column] == 1).sum()
    num_47 = (aya_df[column] == 0).sum()

    print(f"Metrics for column {column}:")
    print(f"  Accuracy: {accuracy:.2f}")
    print(f"  Precision: {precision:.2f}")
    print(f"  Recall: {recall:.2f}")
    print(f"  F1 Score: {f1:.2f}")
    print(f"  Number of '58' labels: {num_58}")
    print(f"  Number of '47' labels: {num_47}")
    print("\n")


Metrics for column predicted_k_20:
  Accuracy: 0.56
  Precision: 0.25
  Recall: 0.64
  F1 Score: 0.36
  Number of '58' labels: 196
  Number of '47' labels: 205




In [8]:
for column in columns_to_evaluate:

    pred_labels = aya_df[column].dropna()
    aya_df[column] = aya_df[column].apply(map_labels)

    common_indices = true_labels.index.intersection(pred_labels.index)
    true_labels_common = true_labels.loc[common_indices]
    pred_labels_common = pred_labels.loc[common_indices]

    print(f"Metrics for column {column}:")

    report = classification_report(true_labels_common, pred_labels_common, target_names=['0', '1'])
    print(report)

    num_1 = (aya_df[column] == 1).sum()
    num_0 = (aya_df[column] == 0).sum()

    print(f"  Number of '1' labels: {num_58}")
    print(f"  Number of '0' labels: {num_47}")
    print("\n")

Metrics for column predicted_k_20:
              precision    recall  f1-score   support

           0       0.86      0.55      0.67       324
           1       0.25      0.64      0.36        77

    accuracy                           0.56       401
   macro avg       0.56      0.59      0.51       401
weighted avg       0.75      0.56      0.61       401

  Number of '1' labels: 196
  Number of '0' labels: 205




### *kfold Measurment*

In [9]:
from sklearn.model_selection import KFold
import numpy as np

In [10]:
n_splits = 5
kf = KFold(n_splits=n_splits, shuffle=True, random_state=42)

In [11]:
macro_avg_precision_scores = []
macro_avg_recall_scores = []
macro_avg_f1_scores = []
macro_avg_accuracy_scores = [] 

In [12]:
for fold, (train_index, test_index) in enumerate(kf.split(aya_df)):
    print(f"\nFold {fold + 1}/{n_splits}")

    train_df = aya_df.iloc[train_index]
    test_df = aya_df.iloc[test_index]

    train_df['real_tag'] = train_df['real_tag'].apply(map_labels)
    test_df['real_tag'] = test_df['real_tag'].apply(map_labels)

    for column in columns_to_evaluate:
        train_df[column] = train_df[column].apply(map_labels)
        test_df[column] = test_df[column].apply(map_labels)

        true_labels_test = test_df['real_tag'].dropna()
        pred_labels_test = test_df[column].dropna()

        common_indices_test = true_labels_test.index.intersection(pred_labels_test.index)
        
        true_labels_test = true_labels_test.loc[common_indices_test]
        pred_labels_test = pred_labels_test.loc[common_indices_test]

        accuracy = accuracy_score(true_labels_test, pred_labels_test)
        report_dict = classification_report(true_labels_test, pred_labels_test, target_names=['0', '1'], output_dict=True)

        macro_avg_precision = report_dict['macro avg']['precision']
        macro_avg_recall = report_dict['macro avg']['recall']
        macro_avg_f1 = report_dict['macro avg']['f1-score']

        macro_avg_accuracy_scores.append(accuracy)
        macro_avg_precision_scores.append(macro_avg_precision)
        macro_avg_recall_scores.append(macro_avg_recall)
        macro_avg_f1_scores.append(macro_avg_f1)

        print(f"\nMetrics for column {column} in fold {fold + 1}:")
        report = classification_report(true_labels_test, pred_labels_test, target_names=['0', '1'])
        print(report)


Fold 1/5

Metrics for column predicted_k_20 in fold 1:
              precision    recall  f1-score   support

           0       0.92      0.51      0.66        68
           1       0.21      0.75      0.33        12

    accuracy                           0.55        80
   macro avg       0.57      0.63      0.50        80
weighted avg       0.82      0.55      0.61        80


Fold 2/5

Metrics for column predicted_k_20 in fold 2:
              precision    recall  f1-score   support

           0       0.80      0.48      0.60        67
           1       0.20      0.53      0.30        17

    accuracy                           0.49        84
   macro avg       0.50      0.50      0.45        84
weighted avg       0.68      0.49      0.54        84


Fold 3/5

Metrics for column predicted_k_20 in fold 3:
              precision    recall  f1-score   support

           0       0.88      0.59      0.71        64
           1       0.33      0.72      0.46        18

    accuracy  

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_df['real_tag'] = train_df['real_tag'].apply(map_labels)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_df['real_tag'] = test_df['real_tag'].apply(map_labels)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_df[column] = train_df[column].apply(map_labels)
A value is trying to be set o

In [13]:
print("\n=== K-Fold Cross Validation Results (Macro Avg) ===")
print(f"Accuracy: Mean={np.mean(macro_avg_accuracy_scores):.4f}, Variance={np.var(macro_avg_accuracy_scores):.4f}")
print(f"Precision (Macro Avg): Mean={np.mean(macro_avg_precision_scores):.4f}, Variance={np.var(macro_avg_precision_scores):.4f}")
print(f"Recall (Macro Avg): Mean={np.mean(macro_avg_recall_scores):.4f}, Variance={np.var(macro_avg_recall_scores):.4f}")
print(f"F1 Score (Macro Avg): Mean={np.mean(macro_avg_f1_scores):.4f}, Variance={np.var(macro_avg_f1_scores):.4f}")


print(f"\nAccuracy: Range={np.max(macro_avg_accuracy_scores) - np.min(macro_avg_accuracy_scores):.4f}")
print(f"Precision (Macro Avg): Range={np.max(macro_avg_precision_scores) - np.min(macro_avg_precision_scores):.4f}")
print(f"Recall (Macro Avg): Range={np.max(macro_avg_recall_scores) - np.min(macro_avg_recall_scores):.4f}")
print(f"F1 Score (Macro Avg): Range={np.max(macro_avg_f1_scores) - np.min(macro_avg_f1_scores):.4f}")


=== K-Fold Cross Validation Results (Macro Avg) ===
Accuracy: Mean=0.5643, Variance=0.0020
Precision (Macro Avg): Mean=0.5585, Variance=0.0012
Recall (Macro Avg): Mean=0.5965, Variance=0.0029
F1 Score (Macro Avg): Mean=0.5143, Variance=0.0021

Accuracy: Range=0.1339
Precision (Macro Avg): Range=0.1063
Recall (Macro Avg): Range=0.1545
F1 Score (Macro Avg): Range=0.1366
