In [1]:
import os
import pandas as pd
from utils import prepare_labeled_sentences, prepare_labeled_sentences_spacy

Read Datasets

In [2]:
# BBC Dataset
bbc_df = pd.read_csv("data/bbc/bbc_dataset.csv")

#IMDB Dataset
imdb_df = pd.read_csv("data/imdb/imdb.csv")

In [3]:
# Preview to confirm structure
print("BBC Sample:")
display(bbc_df.head())

In [4]:
print("IMDB Sample:")
display(imdb_df.head())

Preprocess BBC Datasets

In [5]:
# Process the BBC dataset
bbc_labeled_data = prepare_labeled_sentences_spacy(bbc_df)

# Convert to DataFrame for modeling
bbc_processed_df = pd.DataFrame(
    [
        {
            "article_id": item["article_id"],
            "article_sentences": item["raw_sentence"],
            "preprocessed_sentence": item["preprocessed_sentence"],
            "label": item["label"],
        }
        for item in bbc_labeled_data
    ]
)

Preprocessing articles: 100%|██████████| 2225/2225 [04:53<00:00,  7.58it/s]


In [6]:
bbc_processed_df.shape

In [7]:
# Count how many sentences are labeled as summary sentences
summary_count = bbc_processed_df['label'].sum()
total_count = len(bbc_processed_df)
print(f"Summary sentences: {summary_count} out of {total_count} ({summary_count/total_count:.2%})")

# Show some examples of sentences included in summaries
print("\nExample summary sentences:")
display(bbc_processed_df[bbc_processed_df['label'] == 1].head(3))

In [8]:
bbc_processed_df.head(60)

Preprocessed IMDB Dataset

In [9]:
# Process the BBC dataset
imdb_labeled_df = prepare_labeled_sentences_spacy(imdb_df[:4000])

# Convert to DataFrame for modeling
imdb_processed_df = pd.DataFrame(
    [
        {
            "article_id": item["article_id"],
            "article_sentences": item["raw_sentence"],
            "preprocessed_sentence": item["preprocessed_sentence"],
            "label": item["label"],
        }
        for item in imdb_labeled_df
    ]
)

Preprocessing articles: 100%|██████████| 4000/4000 [03:29<00:00, 19.10it/s]


In [10]:
imdb_processed_df.shape

In [11]:
# Count how many sentences are labeled as summary sentences
summary_count = imdb_processed_df['label'].sum()
total_count = len(imdb_processed_df)
print(f"Summary sentences: {summary_count} out of {total_count} ({summary_count/total_count:.2%})")

# Show some examples of sentences included in summaries
print("\nExample summary sentences:")
display(imdb_processed_df[imdb_processed_df['label'] == 1].head(3))

In [12]:
print(imdb_processed_df["article_sentences"][2])

In [13]:
imdb_processed_df.head(60)

kNN

In [None]:
from ML_models.knn import KNNExtractiveSummarizer
from sklearn.model_selection import train_test_split

# Prepare data
X = bbc_processed_df["preprocessed_sentence"]
y = bbc_processed_df["label"]
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Initialize and tune model
knn_bbc = KNNExtractiveSummarizer()

# Fine-tune the model on the training set
knn_bbc.tune(X_train, y_train, n_iter=10, scoring="f1")  # You can increase n_iter

# Evaluate
print("BBC Dataset Evaluation (KNN):")
knn_bbc.evaluate(X_test, y_test)

# Generate summaries for a few articles
sample_article_ids = bbc_processed_df["article_id"].unique()[:5]

for article_id in sample_article_ids:
    article_df = bbc_processed_df[bbc_processed_df["article_id"] == article_id]
    reference_summary = " ".join(
        article_df[article_df["label"] == 1]["article_sentences"]
    )
    generated_summary = knn_bbc.summarize(
        article_df["article_sentences"].tolist(),
        article_df["preprocessed_sentence"].tolist(),
    )

    print(f"\nArticle ID: {article_id}")
    print("Reference Summary:", reference_summary[:200] + "...")
    print("Generated Summary:", generated_summary[:200] + "...")

    rouge_scores = knn_bbc.compute_rouge(generated_summary, reference_summary)
    if rouge_scores is not None:
        print("ROUGE Scores:", rouge_scores[0])

Fitting 3 folds for each of 10 candidates, totalling 30 fits
[CV] END clf__metric=cosine, clf__n_neighbors=3, clf__weights=distance, tfidf__max_features=3000, tfidf__min_df=1, tfidf__ngram_range=(1, 2); total time=   7.0s
[CV] END clf__metric=cosine, clf__n_neighbors=3, clf__weights=distance, tfidf__max_features=3000, tfidf__min_df=1, tfidf__ngram_range=(1, 2); total time=   6.8s
[CV] END clf__metric=cosine, clf__n_neighbors=3, clf__weights=distance, tfidf__max_features=3000, tfidf__min_df=1, tfidf__ngram_range=(1, 2); total time=   6.6s
[CV] END clf__metric=euclidean, clf__n_neighbors=7, clf__weights=distance, tfidf__max_features=5000, tfidf__min_df=1, tfidf__ngram_range=(1, 1); total time=   6.0s
[CV] END clf__metric=euclidean, clf__n_neighbors=7, clf__weights=distance, tfidf__max_features=5000, tfidf__min_df=1, tfidf__ngram_range=(1, 1); total time=   5.6s
[CV] END clf__metric=euclidean, clf__n_neighbors=7, clf__weights=distance, tfidf__max_features=5000, tfidf__min_df=1, tfidf__ngr

In [None]:
from ML_models.knn import KNNExtractiveSummarizer
from sklearn.model_selection import train_test_split

# Prepare data
X = imdb_processed_df["preprocessed_sentence"]
y = imdb_processed_df["label"]
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Initialize and tune model
knn_bbc = KNNExtractiveSummarizer()

# Fine-tune the model on the training set
knn_bbc.tune(X_train, y_train, n_iter=10, scoring="f1")  # You can increase n_iter

# Evaluate
print("BBC Dataset Evaluation (KNN):")
knn_bbc.evaluate(X_test, y_test)

# Generate summaries for a few articles
sample_article_ids = imdb_processed_df["article_id"].unique()[:5]

for article_id in sample_article_ids:
    article_df = imdb_processed_df[imdb_processed_df["article_id"] == article_id]
    reference_summary = " ".join(
        article_df[article_df["label"] == 1]["article_sentences"]
    )
    generated_summary = knn_bbc.summarize(
        article_df["article_sentences"].tolist(),
        article_df["preprocessed_sentence"].tolist(),
    )

    print(f"\nArticle ID: {article_id}")
    print("Reference Summary:", reference_summary[:200] + "...")
    print("Generated Summary:", generated_summary[:200] + "...")

    rouge_scores = knn_bbc.compute_rouge(generated_summary, reference_summary)
    if rouge_scores is not None:
        print("ROUGE Scores:", rouge_scores[0])

Fitting 3 folds for each of 10 candidates, totalling 30 fits
[CV] END clf__metric=cosine, clf__n_neighbors=3, clf__weights=distance, tfidf__max_features=3000, tfidf__min_df=1, tfidf__ngram_range=(1, 2); total time=   2.0s
[CV] END clf__metric=cosine, clf__n_neighbors=3, clf__weights=distance, tfidf__max_features=3000, tfidf__min_df=1, tfidf__ngram_range=(1, 2); total time=   2.0s
[CV] END clf__metric=cosine, clf__n_neighbors=3, clf__weights=distance, tfidf__max_features=3000, tfidf__min_df=1, tfidf__ngram_range=(1, 2); total time=   2.0s
[CV] END clf__metric=euclidean, clf__n_neighbors=7, clf__weights=distance, tfidf__max_features=5000, tfidf__min_df=1, tfidf__ngram_range=(1, 1); total time=   1.3s
[CV] END clf__metric=euclidean, clf__n_neighbors=7, clf__weights=distance, tfidf__max_features=5000, tfidf__min_df=1, tfidf__ngram_range=(1, 1); total time=   1.2s
[CV] END clf__metric=euclidean, clf__n_neighbors=7, clf__weights=distance, tfidf__max_features=5000, tfidf__min_df=1, tfidf__ngr

Logistic Regression

In [27]:
from ML_models.logistic_reg import LogisticRegressionSummarizer
from sklearn.model_selection import train_test_split

X = bbc_processed_df["preprocessed_sentence"]
y = bbc_processed_df["label"]
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

lr_bbc = LogisticRegressionSummarizer()

# Fine-tune the model on the training set
lr_bbc.tune(
    X_train, y_train, n_iter=10, scoring="f1"
)  # You can increase n_iter if desired

print("BBC Dataset Evaluation:")
lr_bbc.evaluate(X_test, y_test)

sample_article_ids = bbc_processed_df["article_id"].unique()[:3]

for article_id in sample_article_ids:
    article_df = bbc_processed_df[bbc_processed_df["article_id"] == article_id]
    reference_summary = " ".join(
        article_df[article_df["label"] == 1]["article_sentences"]
    )
    generated_summary = lr_bbc.summarize(
        article_df["article_sentences"].tolist(),
        article_df["preprocessed_sentence"].tolist(),
    )

    print(f"\nArticle ID: {article_id}")
    print("Reference Summary:", reference_summary[:200] + "...")
    print("Generated Summary:", generated_summary[:200] + "...")

    rouge_scores = lr_bbc.compute_rouge(generated_summary, reference_summary)
    if rouge_scores is not None:
        print("ROUGE Scores:", rouge_scores[0])

Fitting 3 folds for each of 10 candidates, totalling 30 fits

✅ Best parameters found:
{'tfidf__ngram_range': (1, 2), 'tfidf__min_df': 2, 'tfidf__max_features': None, 'clf__penalty': 'l1', 'clf__max_iter': 500, 'clf__C': 1}
Best CV score: 0.5722561327954961
BBC Dataset Evaluation:

📊 Classification Report:
              precision    recall  f1-score   support

           0     0.7321    0.6225    0.6729      5062
           1     0.5260    0.6478    0.5806      3274

    accuracy                         0.6324      8336
   macro avg     0.6291    0.6352    0.6267      8336
weighted avg     0.6512    0.6324    0.6366      8336


Article ID: 0
Reference Summary: Ad sales boost Time Warner profit  Quarterly profits at US media giant TimeWarner jumped 76% to $1.13bn (Â£600m) for the three months to December, from $639m year-earlier. TimeWarner said fourth quart...
Generated Summary: TimeWarner said fourth quarter sales rose 2% to $11.1bn from $10.9bn. Time Warner said on Friday that it now

In [28]:
from ML_models.logistic_reg import LogisticRegressionSummarizer
from sklearn.model_selection import train_test_split

X = imdb_processed_df["preprocessed_sentence"]
y = imdb_processed_df["label"]
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

lr_imdb = LogisticRegressionSummarizer()

# Fine-tune the model on the training set
lr_imdb.tune(X_train, y_train, n_iter=10, scoring="f1")

print("IMDB Dataset Evaluation:")
lr_imdb.evaluate(X_test, y_test)

sample_article_ids = imdb_processed_df["article_id"].unique()[:3]

for article_id in sample_article_ids:
    article_df = imdb_processed_df[imdb_processed_df["article_id"] == article_id]
    article_sents = article_df["article_sentences"].tolist()
    preprocessed_sents = article_df["preprocessed_sentence"].tolist()

    if not preprocessed_sents or not article_sents:
        print(f"\nArticle ID: {article_id}")
        print("Empty input. Skipping...")
        continue

    reference_summary = " ".join(
        article_df[article_df["label"] == 1]["article_sentences"]
    )
    generated_summary = lr_imdb.summarize(article_sents, preprocessed_sents)

    print(f"\nArticle ID: {article_id}")
    print("Reference Summary:", reference_summary[:200] + "...")
    print("Generated Summary:", generated_summary[:200] + "...")

    rouge_scores = lr_imdb.compute_rouge(generated_summary, reference_summary)
    if rouge_scores is not None:
        print("ROUGE Scores:", rouge_scores[0])

Fitting 3 folds for each of 10 candidates, totalling 30 fits

✅ Best parameters found:
{'tfidf__ngram_range': (1, 2), 'tfidf__min_df': 3, 'tfidf__max_features': 5000, 'clf__penalty': 'l1', 'clf__max_iter': 500, 'clf__C': 1}
Best CV score: 0.4002394101671622
IMDB Dataset Evaluation:

📊 Classification Report:
              precision    recall  f1-score   support

           0     0.8485    0.6541    0.7388      2021
           1     0.3324    0.5959    0.4267       584

    accuracy                         0.6411      2605
   macro avg     0.5905    0.6250    0.5827      2605
weighted avg     0.7328    0.6411    0.6688      2605


Article ID: 0
Reference Summary: ...
Generated Summary: One of the other reviewers has mentioned that after watching just Oz episode you ll be hooked They are right as this is exactly what happened with me The first thing that struck me about Oz was its br...
⚠️ ROUGE error: Reference is empty.

Article ID: 1
Reference Summary: A wonderful little production The

Decision Trees

In [18]:
from ML_models.decision_tree import DecisionTreeSummarizer

bbc_summarizer = DecisionTreeSummarizer("BBC", bbc_df)
bbc_summarizer.run()
bbc_summarizer.show_samples()

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Fady\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!



=== Running on BBC Dataset ===
Train Accuracy: 0.8961
Test Accuracy: 0.8686

Classification Report:
              precision    recall  f1-score   support

           0       0.99      0.88      0.93     12226
           1       0.05      0.43      0.10       200

    accuracy                           0.87     12426
   macro avg       0.52      0.65      0.51     12426
weighted avg       0.97      0.87      0.92     12426


ROUGE Scores:
rouge-1: 0.3711
rouge-2: 0.2659
rouge-l: 0.3639

--- Article 1665 ---
Predicted: Despite being without flanker Keith Gleeson, coach Eddie O'Sullivan has not had to contend with the sort of casualty lists that have hit England and Scotland in particular prior to the tournament. "For Ireland to win it we need to stay relatively injury free, and fortunately we are one of the few teams that have done that so far," Wood added. But despite their traditional hospitality when the Irish are visiting, Wood believes Wales might end their four-match losing run ag

In [19]:
bbc_summarizer = DecisionTreeSummarizer("IMDB", imdb_df)
bbc_summarizer.run()
bbc_summarizer.show_samples()


=== Running on IMDB Dataset ===
Train Accuracy: 0.9771
Test Accuracy: 0.9684

Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.97      0.98     11650
           1       0.05      0.34      0.08        50

    accuracy                           0.97     11700
   macro avg       0.52      0.66      0.53     11700
weighted avg       0.99      0.97      0.98     11700


ROUGE Scores:
rouge-1: 0.5617
rouge-2: 0.4881
rouge-l: 0.5612

--- Article 13209 ---
Predicted: Two old men sitting on park bench don really have problem with this scene Only problem is that it not scene it the entire movieYup movies don get anymore low concept than this They also don get anymore boring than this either but there worse to come because these two old men are chalk and cheese One is Nat Moyer who is Yiddish communist while the other is Midge Carter former golden gloves champion who also black Let me see now Jew and black man sitting on park bench get

In [20]:
from ML_models.random_forest import RandomForestSummarizer

bbc_summarizer = RandomForestSummarizer("BBC", bbc_df)
bbc_summarizer.run()
bbc_summarizer.show_samples()

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Fady\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!



=== Running on BBC Dataset ===
Train Accuracy: 0.9407
Test Accuracy: 0.9315

Classification Report:
              precision    recall  f1-score   support

           0       0.99      0.94      0.96     12226
           1       0.07      0.27      0.11       200

    accuracy                           0.93     12426
   macro avg       0.53      0.60      0.54     12426
weighted avg       0.97      0.93      0.95     12426


ROUGE Scores:
rouge-1: 0.3028
rouge-2: 0.2067
rouge-l: 0.2960

--- Article 1665 ---
Predicted: "Things have been building up over the past few years and I think this is the year for Ireland," he told BBC Sport. A lot of things are in our favour with England and France at home." "For Ireland to win it we need to stay relatively injury free, and fortunately we are one of the few teams that have done that so far," Wood added. "It is going to be tough and we need to take all the luck and opportunities that come our way."
Reference: "So many of the major England players

In [21]:
from ML_models.random_forest import RandomForestSummarizer

bbc_summarizer = RandomForestSummarizer("IMDB", imdb_df)
bbc_summarizer.run()
bbc_summarizer.show_samples()


=== Running on IMDB Dataset ===
Train Accuracy: 0.9968
Test Accuracy: 0.9924

Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     11650
           1       0.15      0.16      0.15        50

    accuracy                           0.99     11700
   macro avg       0.57      0.58      0.57     11700
weighted avg       0.99      0.99      0.99     11700


ROUGE Scores:
rouge-1: 0.5617
rouge-2: 0.4881
rouge-l: 0.5612

--- Article 13209 ---
Predicted: Two old men sitting on park bench don really have problem with this scene Only problem is that it not scene it the entire movieYup movies don get anymore low concept than this They also don get anymore boring than this either but there worse to come because these two old men are chalk and cheese One is Nat Moyer who is Yiddish communist while the other is Midge Carter former golden gloves champion who also black Let me see now Jew and black man sitting on park bench get

CNN

In [22]:
from DL_models.cnn import CNNExtractiveSummarizer
from sklearn.model_selection import train_test_split

# Prepare data
X = bbc_processed_df["preprocessed_sentence"]
y = bbc_processed_df["label"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize model
cnn_bbc = CNNExtractiveSummarizer()

# Train/tune the model
cnn_bbc.tune(X_train, y_train, X_val_raw=X_test, y_val=y_test, epochs=30)

# Evaluate
print("BBC Dataset Evaluation (CNN):")
cnn_bbc.evaluate(X_test, y_test)

# Generate summaries for a few articles
sample_article_ids = bbc_processed_df["article_id"].unique()[:5]

for article_id in sample_article_ids:
    article_df = bbc_processed_df[bbc_processed_df["article_id"] == article_id]
    reference_summary = " ".join(
        article_df[article_df["label"] == 1]["article_sentences"]
    )
    generated_summary = cnn_bbc.summarize(
        article_df["article_sentences"].tolist(),
        article_df["preprocessed_sentence"].tolist()
    )

    print(f"\nArticle ID: {article_id}")
    print("Reference Summary:", reference_summary[:200] + "...")
    print("Generated Summary:", generated_summary[:200] + "...")

    rouge_scores = cnn_bbc.compute_rouge(generated_summary, reference_summary)
    if rouge_scores is not None:
        print("ROUGE Scores:", rouge_scores[0])




Epoch 1/30
[1m1042/1042[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 8ms/step - accuracy: 0.6376 - loss: 0.6351 - val_accuracy: 0.6492 - val_loss: 0.6152
Epoch 2/30
[1m1042/1042[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 8ms/step - accuracy: 0.7515 - loss: 0.5093 - val_accuracy: 0.6594 - val_loss: 0.6280
Epoch 3/30
[1m1042/1042[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 8ms/step - accuracy: 0.9023 - loss: 0.2620 - val_accuracy: 0.6666 - val_loss: 0.8225
Epoch 4/30
[1m1042/1042[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 8ms/step - accuracy: 0.9739 - loss: 0.0952 - val_accuracy: 0.6543 - val_loss: 1.1941
Epoch 5/30
[1m1042/1042[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 8ms/step - accuracy: 0.9874 - loss: 0.0540 - val_accuracy: 0.6548 - val_loss: 1.3492
Epoch 6/30
[1m1042/1042[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 8ms/step - accuracy: 0.9917 - loss: 0.0391 - val_accuracy: 0.6657 - val_loss: 1.6369
Epoch 7/30
[1

In [23]:
from DL_models.cnn import CNNExtractiveSummarizer
from sklearn.model_selection import train_test_split

# Prepare data
X = imdb_processed_df["preprocessed_sentence"]
y = imdb_processed_df["label"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize model
cnn_bbc = CNNExtractiveSummarizer()

# Train/tune the model
cnn_bbc.tune(X_train, y_train, X_val_raw=X_test, y_val=y_test, epochs=30)

# Evaluate
print("IMDB Dataset Evaluation (CNN):")
cnn_bbc.evaluate(X_test, y_test)

# Generate summaries for a few articles
sample_article_ids = imdb_processed_df["article_id"].unique()[:5]

for article_id in sample_article_ids:
    article_df = imdb_processed_df[imdb_processed_df["article_id"] == article_id]
    reference_summary = " ".join(
        article_df[article_df["label"] == 1]["article_sentences"]
    )
    generated_summary = cnn_bbc.summarize(
        article_df["article_sentences"].tolist(),
        article_df["preprocessed_sentence"].tolist()
    )

    print(f"\nArticle ID: {article_id}")
    print("Reference Summary:", reference_summary[:200] + "...")
    print("Generated Summary:", generated_summary[:200] + "...")

    rouge_scores = cnn_bbc.compute_rouge(generated_summary, reference_summary)
    if rouge_scores is not None:
        print("ROUGE Scores:", rouge_scores[0])


Epoch 1/30




[1m326/326[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 9ms/step - accuracy: 0.7641 - loss: 0.5480 - val_accuracy: 0.7762 - val_loss: 0.4878
Epoch 2/30
[1m326/326[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 8ms/step - accuracy: 0.7927 - loss: 0.4241 - val_accuracy: 0.7351 - val_loss: 0.5162
Epoch 3/30
[1m326/326[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 8ms/step - accuracy: 0.9035 - loss: 0.2478 - val_accuracy: 0.7655 - val_loss: 0.7265
Epoch 4/30
[1m326/326[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 9ms/step - accuracy: 0.9773 - loss: 0.0796 - val_accuracy: 0.7386 - val_loss: 0.8631
Epoch 5/30
[1m326/326[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 8ms/step - accuracy: 0.9894 - loss: 0.0329 - val_accuracy: 0.7420 - val_loss: 1.0624
Epoch 6/30
[1m326/326[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 8ms/step - accuracy: 0.9946 - loss: 0.0178 - val_accuracy: 0.7225 - val_loss: 1.2387
Epoch 7/30
[1m326/326[0m [32m━━━━━━━

In [24]:
from DL_models.bilstm_attention import BiLSTMSummarizer

bilstmn = BiLSTMSummarizer("BBC", bbc_processed_df)
bilstmn.train()
bilstmn.evaluate()
bilstmn.show_samples(n=5)


=== Training on BBC ===
Epoch 1 Loss: 1.0832
Epoch 2 Loss: 1.0049
Epoch 3 Loss: 0.8956
Epoch 4 Loss: 0.7274
Epoch 5 Loss: 0.5557
Training completed in 303.66s
Best Threshold: 0.20, F1: 0.6059

ROUGE Scores:
rouge-1: 0.3623
rouge-2: 0.2557
rouge-l: 0.3085

--- Article ID: 1217 ---
Predicted Summary:
 also donation top conservative scottish business group focus scotland institute international research world large independent conference company lord drayson whose company powderject win pound contract provide smallpox vaccine government september terror attack give party day christmas
Reference Summary:
 party build poll war chests labour party receive donation final quarter new figure show significant donation come retire millionaire businessman philanthropist sir christopher ondaatje give party sum refrigerator magnate william haughey obe give also donation top conservative scottish business group focus scotland institute international research world large independent conference compan

In [25]:
bilstm = BiLSTMSummarizer("IMDB", imdb_processed_df)
bilstm.train()
bilstm.evaluate()
bilstm.show_samples(n=5)


=== Training on IMDB ===
Epoch 1 Loss: 0.9297
Epoch 2 Loss: 0.8832
Epoch 3 Loss: 0.8302
Epoch 4 Loss: 0.7354
Epoch 5 Loss: 0.5985
Training completed in 97.12s
Best Threshold: 0.35, F1: 0.4330

ROUGE Scores:
rouge-1: 0.4271
rouge-2: 0.3773
rouge-l: 0.4141

--- Article ID: 3460 ---
Predicted Summary:
 freddy annoyance see many time one nothing different lot time want take awful one liner get tv screen
Reference Summary:
 hit rock bottom right begin bad act jumbled sequence event mean sure freddy movie suppose dreamlike creepy one like train wreck poor sequence event awful plot setup feel like come terrible headache like get scar directing totally fail none suspense well craft horror previous sequel find even death scene mostly crass moronic death food especially except one cool scene craft like comic book battle movie get point storyline lame lame lame lame
--------------------------------------------------------------------------------

--- Article ID: 1213 ---
Predicted Summary:
 high

FeedForward Neural Network


In [18]:
# ===== Cell 1: Imports =====
from DL_models.FNN import (FeedForwardNet, extract_features, prepare_dataloaders,
                           compute_class_weight, train_model)
from sklearn.model_selection import train_test_split
import torch

In [19]:
# ===== Cell 2: Data Preparation =====
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

X_train_df, X_val_df, y_train, y_val = train_test_split(
    bbc_processed_df[['preprocessed_sentence']],
    bbc_processed_df['label'].values,
    test_size=0.2,
    random_state=42
)

X_train, X_val, vectorizer = extract_features(X_train_df, X_val_df)

train_loader, val_loader = prepare_dataloaders(X_train, y_train, X_val, y_val, device=device)

In [20]:
# ===== Cell 3: Model Initialization =====
input_size = X_train.shape[1]
model = FeedForwardNet(input_size)
pos_weight = compute_class_weight(y_train).to(device)
criterion = torch.nn.BCELoss(pos_weight)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [None]:
# ===== Cell 4: Training =====
train_model(model, train_loader, val_loader, criterion, optimizer, device=device, epochs=15)

Epoch 1 average loss: 0.9809
              precision    recall  f1-score   support

 Not Summary       0.68      0.81      0.74      5062
     Summary       0.59      0.41      0.48      3274

    accuracy                           0.65      8336
   macro avg       0.63      0.61      0.61      8336
weighted avg       0.64      0.65      0.64      8336

Epoch 2 average loss: 0.8845
              precision    recall  f1-score   support

 Not Summary       0.69      0.78      0.73      5062
     Summary       0.58      0.46      0.51      3274

    accuracy                           0.66      8336
   macro avg       0.63      0.62      0.62      8336
weighted avg       0.65      0.66      0.65      8336

Epoch 3 average loss: 0.8282
              precision    recall  f1-score   support

 Not Summary       0.70      0.76      0.73      5062
     Summary       0.57      0.50      0.54      3274

    accuracy                           0.66      8336
   macro avg       0.64      0.63      0.

In [24]:
from rouge_score import rouge_scorer
import numpy as np

# ===== Cell 6: ROUGE Evaluation =====
def evaluate_rouge(df, model, vectorizer, top_k=3):
    scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
    scores = []

    model.eval()
    with torch.no_grad():
        for article_id in df['article_id'].unique():
            article_df = df[df['article_id'] == article_id]
            X = vectorizer.transform(article_df['preprocessed_sentence']).toarray()
            preds = model(torch.tensor(X, dtype=torch.float32)).numpy()

            top_indices = preds.argsort()[-top_k:][::-1]
            predicted_summary = " ".join(article_df.iloc[top_indices]["article_sentences"])
            reference_summary = imdb_df.loc[article_id]["Summary"]

            score = scorer.score(reference_summary, predicted_summary)
            scores.append(score)

    return scores

rouge_scores = evaluate_rouge(bbc_processed_df, model, vectorizer)

avg_rouge1 = np.mean([s["rouge1"].fmeasure for s in rouge_scores])
avg_rouge2 = np.mean([s["rouge2"].fmeasure for s in rouge_scores])
avg_rougeL = np.mean([s["rougeL"].fmeasure for s in rouge_scores])

print(f"Average ROUGE-1: {avg_rouge1:.4f}")
print(f"Average ROUGE-2: {avg_rouge2:.4f}")
print(f"Average ROUGE-L: {avg_rougeL:.4f}")

Average ROUGE-1: 0.1370
Average ROUGE-2: 0.0061
Average ROUGE-L: 0.0919
