In [9]:
####  Abstract Rogue Score
from rouge import Rouge
import pandas as pd
from tqdm import tqdm
import torch
import re

# Load the DataFrame from the Excel file
input_file = "abstract_summary_t5-small_Score.xlsx"
df = pd.read_excel(input_file)

# Create a Rouge object
rouge = Rouge()

# Define lists to store the overlap, coherence, and informativeness scores
overlap_scores = []
coherence_scores = []
informativeness_scores = []

# Set device to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Iterate over the rows in the DataFrame with a progress bar
for index, row in tqdm(df.iterrows(), total=len(df), desc="Calculating Scores"):
    # Get the original text and the generated summary
    abstract = row['Abstract']

    # Clean the abstract and claims text
    abstract = re.sub(r'[^\x00-\x7F]+', '', str(abstract))

    # Combine the cleaned abstract and claims
    original_text = abstract
    generated_summary = str(row['Abstract_Summary'])

    # Calculate ROUGE scores
    scores = rouge.get_scores(generated_summary, original_text)

    # Extract the relevant scores
    overlap_score = scores[0]['rouge-1']['f']
    coherence_score = scores[0]['rouge-2']['f']
    informativeness_score = scores[0]['rouge-l']['f']

    # Append the scores to the respective lists
    overlap_scores.append(overlap_score)
    coherence_scores.append(coherence_score)
    informativeness_scores.append(informativeness_score)

# Add the scores to the DataFrame
df["Overlap_Score['rouge-1']"] = overlap_scores
df["Coherence_Score['rouge-2']"] = coherence_scores
df["Informativeness_Score['rouge-l']"] = informativeness_scores

# Save the updated DataFrame to a new Excel file
output_file = "Summary_Scores_hupd_Rouge_score_t5_small_total.xlsx"
df.to_excel(output_file, index=False)

# Print the average scores
print("Average Overlap Score:", sum(overlap_scores) / len(overlap_scores))
print("Average Coherence Score:", sum(coherence_scores) / len(coherence_scores))
print("Average Informativeness Score:", sum(informativeness_scores) / len(informativeness_scores))

Calculating Scores: 100%|██████████| 1630/1630 [00:09<00:00, 180.14it/s]


Average Overlap Score: 0.7022384785848844
Average Coherence Score: 0.6146253621342672
Average Informativeness Score: 0.6997961547500037


In [None]:
!pip install Rouge

In [11]:
#### Claims Rogue Score
from rouge import Rouge
import pandas as pd
from tqdm import tqdm
import torch
import re

# Load the DataFrame from the Excel file
input_file = "claim_summary_t5-small_score.xlsx"
df = pd.read_excel(input_file)

# Create a Rouge object
rouge = Rouge()

# Define lists to store the overlap, coherence, and informativeness scores
overlap_scores = []
coherence_scores = []
informativeness_scores = []

# Set device to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Iterate over the rows in the DataFrame with a progress bar
for index, row in tqdm(df.iterrows(), total=len(df), desc="Calculating Scores"):
    # Get the original text and the generated summary
    claim = row['Claims']

    # Clean the abstract and claims text
    claims = re.sub(r'[^\x00-\x7F]+', '', str(claim))

    # Combine the cleaned abstract and claims
    original_text = claims
    generated_summary = str(row['claim_Summary'])

    # Calculate ROUGE scores
    scores = rouge.get_scores(generated_summary, original_text)

    # Extract the relevant scores
    overlap_score = scores[0]['rouge-1']['f']
    coherence_score = scores[0]['rouge-2']['f']
    informativeness_score = scores[0]['rouge-l']['f']

    # Append the scores to the respective lists
    overlap_scores.append(overlap_score)
    coherence_scores.append(coherence_score)
    informativeness_scores.append(informativeness_score)

# Add the scores to the DataFrame
df["Overlap_Score['rouge-1']"] = overlap_scores
df["Coherence_Score['rouge-2']"] = coherence_scores
df["Informativeness_Score['rouge-l']"] = informativeness_scores

# Save the updated DataFrame to a new Excel file
output_file = "Summary_Scores_hupd_Rouge_score_t5_small_Claims_total.xlsx"
df.to_excel(output_file, index=False)

# Print the average scores
print("Average Overlap Score:", sum(overlap_scores) / len(overlap_scores))
print("Average Coherence Score:", sum(coherence_scores) / len(coherence_scores))
print("Average Informativeness Score:", sum(informativeness_scores) / len(informativeness_scores))

Calculating Scores: 100%|██████████| 1630/1630 [01:37<00:00, 16.69it/s]


Average Overlap Score: 0.31694796876483294
Average Coherence Score: 0.19459843339511917
Average Informativeness Score: 0.315735633815753


In [12]:
#### Combined summary Rogue Score
from rouge import Rouge
import pandas as pd
from tqdm import tqdm
import torch
import re

# Load the DataFrame from the Excel file
input_file = "Combined_Google_patent_Summary_t5_small.xlsx"
df = pd.read_excel(input_file)

# Create a Rouge object
rouge = Rouge()

# Define lists to store the overlap, coherence, and informativeness scores
indices = []
overlap_scores = []
coherence_scores = []
informativeness_scores = []

# Set device to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# ... the rest of your code

# Iterate over the rows in the DataFrame with a progress bar
for index, row in tqdm(df.iterrows(), total=len(df), desc="Calculating Scores"):
    # Get the original text and the generated summary
    abstract = row['Abstract_Summary']
    claim = row['claim_Summary']

    # Clean the abstract and claims text
    abstract = re.sub(r'[^\x00-\x7F]+', '', str(abstract))
    claims = re.sub(r'[^\x00-\x7F]+', '', str(claim))

    # Combine the cleaned abstract and claims
    original_text = abstract + ' ' + claims
    generated_summary = str(row['Combined_Summary'])

    # Skip the iteration if generated_summary is empty
    if not generated_summary.strip():
        print(f"Skipping row {index} because generated_summary is empty")
        continue

    # Calculate ROUGE scores
    try:
        scores = rouge.get_scores(generated_summary, original_text)
    except ValueError as e:
        print(f"Error in row {index}: {e}")
        continue

    # Extract the relevant scores
    overlap_score = scores[0]['rouge-1']['f']
    coherence_score = scores[0]['rouge-2']['f']
    informativeness_score = scores[0]['rouge-l']['f']

    # Append the index and scores to the respective lists
    indices.append(index)
    overlap_scores.append(overlap_score)
    coherence_scores.append(coherence_score)
    informativeness_scores.append(informativeness_score)


# Assign scores to the DataFrame for the specific indices
df.loc[indices, "Overlap_Score['rouge-1']"] = overlap_scores
df.loc[indices, "Coherence_Score['rouge-2']"] = coherence_scores
df.loc[indices, "Informativeness_Score['rouge-l']"] = informativeness_scores


# Save the updated DataFrame to a new Excel file
output_file = "Summary_Scores_hupd_Rouge_score_t5_small_Combined_Summary_total.xlsx"
df.to_excel(output_file, index=False)

# Print the average scores
print("Average Overlap Score:", sum(overlap_scores) / len(overlap_scores))
print("Average Coherence Score:", sum(coherence_scores) / len(coherence_scores))
print("Average Informativeness Score:", sum(informativeness_scores) / len(informativeness_scores))


Calculating Scores:  20%|█▉        | 323/1630 [00:01<00:07, 165.49it/s]

Error in row 304: Hypothesis is empty.


Calculating Scores: 100%|██████████| 1630/1630 [00:12<00:00, 126.92it/s]


Average Overlap Score: 0.7367705380119189
Average Coherence Score: 0.6575756066337907
Average Informativeness Score: 0.736232712910879


In [3]:
!pip install rouge

