In [1]:
import pandas as pd
from transformers import BartTokenizer, BartForConditionalGeneration

def load_csv_file(filename):
    try:
        df = pd.read_csv(filename)
        return df
    except FileNotFoundError:
        print(f"File '{filename}' not found.")
        return None

def check_adverse_events(df):
    if 'Section_id' in df.columns:
        adverse_events_df = df[df['Section_id'] == 'Adverse Events']
        return adverse_events_df
    else:
        print("Column 'Section_id' not found in the CSV file.")
        return None

def summarize_with_seq2seq(text):
    # Load the BART model and tokenizer
    model_name = "facebook/bart-large-cnn"
    tokenizer = BartTokenizer.from_pretrained(model_name)
    model = BartForConditionalGeneration.from_pretrained(model_name)

    # Generate the summary
    inputs = tokenizer(text, return_tensors="pt", max_length=1024, truncation=True)
    summary_ids = model.generate(inputs.input_ids, max_length=50, min_length=10, num_beams=4, early_stopping=True)
    summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)

    return summary

if __name__ == "__main__":
    csv_filename = "d:/Thesis/Processed Data/1.Summary/GPT-test-summary.csv"

    # Step 1: Load the CSV file
    df = load_csv_file(csv_filename)

    if df is not None:
        # Step 2: Check for adverse events in 'Section_id' column
        adverse_events_df = check_adverse_events(df)

        if not adverse_events_df.empty:
            # Step 3: Summarize the 'summary' column for rows with adverse events using seq2seq
            summaries = []
            for index, row in adverse_events_df.iterrows():
                text_to_summarize = row.get('summary', '')  # Assuming the 'summary' column name is 'summary'
                summary = summarize_with_seq2seq(text_to_summarize)
                summaries.append(summary)

            # Add the summarized 'summary' column to the DataFrame
            adverse_events_df['summarized_summary'] = summaries

            # Step 4: Save the results to a new CSV file
            output_filename = "d:/Thesis/Processed Data/1.Summary/seq2seq-test-summary-with-summaries.csv"
            adverse_events_df.to_csv(output_filename, index=False)
            print(f"Summarized results saved to '{output_filename}'.")
        else:
            print("No rows with adverse events found in the 'Section_id' column.")
    else:
        print("CSV file couldn't be loaded. Please check the file path.")


KeyboardInterrupt: 

In [2]:
import pandas as pd
import torch
from transformers import BartTokenizer, BartForConditionalGeneration

def generate_summary_bart(text):
    BART_model_name = "facebook/bart-large-cnn"
    BART_tokenizer = BartTokenizer.from_pretrained(BART_model_name)
    BART_model = BartForConditionalGeneration.from_pretrained(BART_model_name)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    BART_model.to(device)

    inputs = BART_tokenizer(text, return_tensors="pt", max_length=1024, truncation=True)
    inputs = {k: v.to(device) for k, v in inputs.items()}

    summary_ids = BART_model.generate(inputs['input_ids'], max_length=100, min_length=10, num_beams=4, early_stopping=True)
    summary = BART_tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    
    return summary

def summarize_csv(input_csv, output_csv):
    i = 0  # Initialize the counter
    # Load the CSV file into a pandas DataFrame
    df = pd.read_csv(input_csv)

    # Columns for which you want to generate summaries
    primary_column = "Primary Trial"
    secondary_column = "Secondary Trial"

    # Generate summaries for each row of the primary and secondary columns and store them in new columns
    df['primary_summary'] = df[primary_column].apply(generate_summary_bart)
    df['secondary_summary'] = df[secondary_column].apply(generate_summary_bart)

    # Merge the primary and secondary summaries into a new "summarised" column
    df['summarised'] = df['primary_summary'] + ' ' + df['secondary_summary']
    
    i += 1  # Increment the counter
    print("Processed:", i, "files")  # Print the value of i
    
    # Save the DataFrame with summaries to a new CSV file
    df.to_csv(output_csv, index=False)




# Generate summaries for the input CSV and save to the output CSV
summarize_csv(input_csv, output_csv)


Processed: 1 files


In [3]:
import pandas as pd
import torch
from transformers import BartTokenizer, BartForConditionalGeneration

# Enable CUDA if available and keep it enabled
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
BART_model_name = "facebook/bart-large-cnn"
BART_tokenizer = BartTokenizer.from_pretrained(BART_model_name)
BART_model = BartForConditionalGeneration.from_pretrained(BART_model_name).to(device)
def generate_summary_bart(text):
    

    inputs = BART_tokenizer(text, return_tensors="pt", max_length=1024, truncation=True)
    inputs = {k: v.to(device) for k, v in inputs.items()}

    summary_ids = BART_model.generate(inputs['input_ids'], max_length=200, min_length=10, num_beams=4, early_stopping=True)
    summary = BART_tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    
    return summary

def summarize_csv(input_csv, output_csv):
    i = 0  # Initialize the counter
    # Load the CSV file into a pandas DataFrame
    df = pd.read_csv(input_csv)

    # Columns for which you want to generate summaries
    primary_column = "Primary Trial"
    secondary_column = "Secondary Trial"

    # Generate summaries for each row of the primary and secondary columns and store them in new columns
    df['primary_summary'] = df[primary_column].apply(generate_summary_bart)
    df['secondary_summary'] = df[secondary_column].apply(generate_summary_bart)

    # Merge the primary and secondary summaries into a new "summarised" column
    df['summarised'] = df['primary_summary'] + ' ' + df['secondary_summary']
    
    i += 1  # Increment the counter
    print("Processed:", i, "files")  # Print the value of i
    
    # Save the DataFrame with summaries to a new CSV file
    df.to_csv(output_csv, index=False)

# Specify the input CSV file and output CSV file
#input_csv="d:/Thesis/Processed Data/test_data/11_data.csv"
input_csv = "c:/Users/akhil/Downloads/your_file_concatenated.csv" # Update with your file path
output_csv = "d:/Thesis/Processed Data/test_data/bart_final_summary.csv"  # Update with your file path
# Generate summaries for the input CSV and save to the output CSV
summarize_csv(input_csv, output_csv)


Processed: 1 files


In [6]:
input_csv="d:/Thesis/Processed Data/test_data/11_data.csv"
#input_csv = "c:/Users/akhil/Downloads/your_file_concatenated.csv" # Update with your file path
output_csv = "d:/Thesis/Processed Data/test_data/bart_final_summary.csv"  # Update with your file path
# Generate summaries for the input CSV and save to the output CSV
summarize_csv(input_csv, output_csv)

Processed: 1 files
