In [2]:
####  Abstract Rogue Score
from rouge import Rouge
import pandas as pd
from tqdm import tqdm
import torch
import re

# Load the DataFrame from the Excel file
input_file = "Abstract_chunks_Summary_t5_small_score.xlsx"
df = pd.read_excel(input_file)

# Create a Rouge object
rouge = Rouge()

# Define lists to store the overlap, coherence, and informativeness scores
overlap_scores = []
coherence_scores = []
informativeness_scores = []

# Set device to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Iterate over the rows in the DataFrame with a progress bar
for index, row in tqdm(df.iterrows(), total=len(df), desc="Calculating Scores"):
    # Get the original text and the generated summary
    abstract = row['Abstract']

    # Clean the abstract and claims text
    abstract = re.sub(r'[^\x00-\x7F]+', '', str(abstract))

    # Combine the cleaned abstract and claims
    original_text = abstract
    generated_summary = str(row['Abstract_chunks_Summary_t5_small'])

    # Calculate ROUGE scores
    scores = rouge.get_scores(generated_summary, original_text)

    # Extract the relevant scores
    overlap_score = scores[0]['rouge-1']['f']
    coherence_score = scores[0]['rouge-2']['f']
    informativeness_score = scores[0]['rouge-l']['f']

    # Append the scores to the respective lists
    overlap_scores.append(overlap_score)
    coherence_scores.append(coherence_score)
    informativeness_scores.append(informativeness_score)

# Add the scores to the DataFrame
df['Overlap_Score'] = overlap_scores
df['Coherence_Score'] = coherence_scores
df['Informativeness_Score'] = informativeness_scores

# Save the updated DataFrame to a new Excel file
output_file = "Summary_Scores_hupd_Rouge_score_t5_small_Chunks.xlsx"
df.to_excel(output_file, index=False)

# Print the average scores
print("Average Overlap Score:", sum(overlap_scores) / len(overlap_scores))
print("Average Coherence Score:", sum(coherence_scores) / len(coherence_scores))
print("Average Informativeness Score:", sum(informativeness_scores) / len(informativeness_scores))

Calculating Scores: 100%|██████████| 50/50 [00:00<00:00, 186.05it/s]

Average Overlap Score: 0.33585818115447497
Average Coherence Score: 0.22075952470367072
Average Informativeness Score: 0.33167132807211813





In [3]:
#### Claims Rogue Score
from rouge import Rouge
import pandas as pd
from tqdm import tqdm
import torch
import re

# Load the DataFrame from the Excel file
input_file = "Claims_chunks_Summary_t5_small_score.xlsx"
df = pd.read_excel(input_file)

# Create a Rouge object
rouge = Rouge()

# Define lists to store the overlap, coherence, and informativeness scores
overlap_scores = []
coherence_scores = []
informativeness_scores = []

# Set device to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Iterate over the rows in the DataFrame with a progress bar
for index, row in tqdm(df.iterrows(), total=len(df), desc="Calculating Scores"):
    # Get the original text and the generated summary
    claim = row['Claims']

    # Clean the abstract and claims text
    claims = re.sub(r'[^\x00-\x7F]+', '', str(claim))

    # Combine the cleaned abstract and claims
    original_text = claims
    generated_summary = str(row['Claims_chunks_Summary_t5_small'])

    # Calculate ROUGE scores
    scores = rouge.get_scores(generated_summary, original_text)

    # Extract the relevant scores
    overlap_score = scores[0]['rouge-1']['f']
    coherence_score = scores[0]['rouge-2']['f']
    informativeness_score = scores[0]['rouge-l']['f']

    # Append the scores to the respective lists
    overlap_scores.append(overlap_score)
    coherence_scores.append(coherence_score)
    informativeness_scores.append(informativeness_score)

# Add the scores to the DataFrame
df['Overlap_Score'] = overlap_scores
df['Coherence_Score'] = coherence_scores
df['Informativeness_Score'] = informativeness_scores

# Save the updated DataFrame to a new Excel file
output_file = "Summary_Scores_hupd_Rouge_score_t5_small_Claims_chunks.xlsx"
df.to_excel(output_file, index=False)

# Print the average scores
print("Average Overlap Score:", sum(overlap_scores) / len(overlap_scores))
print("Average Coherence Score:", sum(coherence_scores) / len(coherence_scores))
print("Average Informativeness Score:", sum(informativeness_scores) / len(informativeness_scores))

Calculating Scores: 100%|██████████| 50/50 [00:02<00:00, 17.05it/s]

Average Overlap Score: 0.2863240577983916
Average Coherence Score: 0.16516687088844434
Average Informativeness Score: 0.280663342483949





In [4]:
#### Combined summary Rogue Score
from rouge import Rouge
import pandas as pd
from tqdm import tqdm
import torch
import re

# Load the DataFrame from the Excel file
input_file = "Combined_Google_patent_chunks_Summary_t5_small_score.xlsx"
df = pd.read_excel(input_file)

# Create a Rouge object
rouge = Rouge()

# Define lists to store the overlap, coherence, and informativeness scores
overlap_scores = []
coherence_scores = []
informativeness_scores = []

# Set device to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Iterate over the rows in the DataFrame with a progress bar
for index, row in tqdm(df.iterrows(), total=len(df), desc="Calculating Scores"):
    # Get the original text and the generated summary
    abstract = row['Abstract_chunks_Summary_t5_small']
    claim = row['Claims_chunks_Summary_t5_small']

    # Clean the abstract and claims text
    abstract = re.sub(r'[^\x00-\x7F]+', '', str(abstract))
    claims = re.sub(r'[^\x00-\x7F]+', '', str(claim))

    # Combine the cleaned abstract and claims
    original_text = abstract+' '+claims
    generated_summary = str(row['Combined_Summary_chunks_t5_small'])

    # Calculate ROUGE scores
    scores = rouge.get_scores(generated_summary, original_text)

    # Extract the relevant scores
    overlap_score = scores[0]['rouge-1']['f']
    coherence_score = scores[0]['rouge-2']['f']
    informativeness_score = scores[0]['rouge-l']['f']

    # Append the scores to the respective lists
    overlap_scores.append(overlap_score)
    coherence_scores.append(coherence_score)
    informativeness_scores.append(informativeness_score)

# Add the scores to the DataFrame
df['Overlap_Score'] = overlap_scores
df['Coherence_Score'] = coherence_scores
df['Informativeness_Score'] = informativeness_scores

# Save the updated DataFrame to a new Excel file
output_file = "Summary_Scores_hupd_Rouge_score_t5_small_Combined_Summary_Chunks.xlsx"
df.to_excel(output_file, index=False)

# Print the average scores
print("Average Overlap Score:", sum(overlap_scores) / len(overlap_scores))
print("Average Coherence Score:", sum(coherence_scores) / len(coherence_scores))
print("Average Informativeness Score:", sum(informativeness_scores) / len(informativeness_scores))

Calculating Scores: 100%|██████████| 50/50 [00:00<00:00, 1287.92it/s]

Average Overlap Score: 0.4518406890598394
Average Coherence Score: 0.3500811829049737
Average Informativeness Score: 0.4446407007092676





In [1]:
!pip install rouge

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting rouge
  Downloading rouge-1.0.1-py3-none-any.whl (13 kB)
Installing collected packages: rouge
Successfully installed rouge-1.0.1
