### Baseline models to consider:
 - Zero and Few-shot prompting of LLAMA-3
 - Fine-tuned LLAMA-3 on generating press releases

In [1]:
%load_ext autoreload

In [2]:
import os
import sys
import numpy as np

In [3]:
os.environ['TRANSFORMERS_CACHE'] = '/mnt/swordfish-pool2/milad/hf-cache'
os.environ['HF_DATASETS_CACHE'] = '/mnt/swordfish-pool2/milad/hf-cache'
os.environ['OPENAI_API_KEY'] = 'zTbZNk16Ik1pZnqLn38ZT3BlbkFJImq3pd7widkr7RzsC771'
os.environ['CUDA_VISIBLE_DEVICES'] = '7'
sys.path.append('./src-py')

In [4]:
import tiktoken
import datasets
import json
import os
import pandas as pd
import torch
from tabulate import tabulate

from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer, pipeline
from peft import LoraConfig, get_peft_model



In [5]:
%autoreload
import utils
import prompts
from llm_based_evaluation import *

In [6]:
ds_path = '/mnt/swordfish-pool2/milad/communicating-science-to-the-public/new-eval_experiment_100/'
output_path = '/mnt/swordfish-pool2/milad/communicating-science-to-the-public/generated-press-releases/'

### Zero and few shot journalistic summarization:

In [7]:
model, tokenizer = utils.load_model_with_adapter('meta-llama/Meta-Llama-3-8B-Instruct', '', device_map="cuda:0")
gen_pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer, batch_size=12)

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Device set to use cuda:0


In [8]:
sample_test = datasets.load_from_disk(ds_path + '/processed_test_ds_sample/')

In [9]:
sample_test

Dataset({
    features: ['paper_id', 'paper_title', 'paper_text', 'prompt', 'completion', 'pr-article', 'topic', '__index_level_0__'],
    num_rows: 100
})

In [19]:
prompt = prompts.baseline_pr_generation

In [20]:
prompt

{'strategy_name': 'baseline_pr_generation',
 'instruction': '\n        Please write a press release article to communicate the science presented in the following scientific paper. Your output should be:\n        "Press Release Article": "The press release article about the paper"\n    ',
 'inputs': {'Scientific paper': ''}}

In [24]:
responses = utils.generate_press_release(sample_test.select(range(2)), gen_pipeline, prompts.baseline_pr_generation['instruction'], 
                       paper_text_clm='paper_text', conv_clm=None, max_input_tokens=1500)

In [25]:
responses

['The structural ensemble at 20 µM Glu concentration was characterized by the absence of structures with all four LBDs bound to Glu. Although the Glu-bound LBDs had a clamshell closure angle α larger than 15°, some LBDs remained in the open state (Extended Data Fig. 4c, d ). This observation suggests that channel opening requires agonist binding to at least two LBDs, whereas binding to all four LBDs does not guarantee maximal channel conductance.  Molecular dynamics simulations and machine-learning analysis Molecular dynamics simulations were performed using the GROMACS software package 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 1

In [None]:
baseline_conv_dataset.save_to_disk(output_path + '/'  + 'baseline_gen_ds')

In [16]:
baseline_conv_dataset[1]

'ROR2 knockdown and ROR1 overexpression showed distinct effects on proliferation, migration and invasion in MFE-296. ( A ) ROR2 mRNA expression level was reduced significantly without changing ROR1 following single ROR2 siRNA transfection. ROR1 mRNA expression level was elevated significantly with no changes in ROR2 mRNA level following single ROR1 plasmid transfection. Cotransfecting ROR1 plasmid and ROR2 siRNA did not significantly change ROR1 or ROR2 mRNA level. ( B ) Representative western blot membranes showed effective delivery of ROR2 siRNA and/or ROR1 plasmid in MFE-296. ( C ) ROR1 overexpression and ROR2 knockdown showed distinct effects on cell proliferation. ( D ) ROR1 overexpression and/or ROR2 knockdown did not significantly change cell adhesion. ( E ) ROR1 overexpression and ROR2 knockdown showed distinct effects on cell migration. ( F ) ROR1 overexpression showed a higher average invaded cell number compared to control. For all panels n = 3, error bars represent standard

In [21]:
prompt = prompts.baseline_pr_generation_cot
prompt['inputs']['Scientific paper'] = 'paper_text'
baseline_pr_generation_res = datadreamer_generation.generate_pr_articles(llama3, sample_test, ds_path, 
                                                                         tokenizer, prompt, max_input_tokens=1200, hub_name=None)
baseline_pr_generation_res.save_to_disk(ds_path + '/'  + prompt['strategy_name'] + '_final_ds')

[ [35m🤖 Data[33mDr[31mea[35mmer[0m 💤 ] Initialized. 🚀 Dreaming to folder: /mnt/swordfish-pool2/milad/communicating-science-to-the-public/generated-press-releases/baseline_pr_generation_cot
[ [35m🤖 Data[33mDr[31mea[35mmer[0m 💤 ] Step 'original_ds' is running. ⏳
[ [35m🤖 Data[33mDr[31mea[35mmer[0m 💤 ] Step 'original_ds' finished and is saved to disk. 🎉
[ [35m🤖 Data[33mDr[31mea[35mmer[0m 💤 ] Step 'original_ds (map)' is running. ⏳
[ [35m🤖 Data[33mDr[31mea[35mmer[0m 💤 ] Step 'original_ds (map)' will run lazily. 🥱
  datasource = datasource.map(lambda row: {'inputs_truncated': truncate_text(encoding, row['inputs'], max_input_tokens)})
[ [35m🤖 Data[33mDr[31mea[35mmer[0m 💤 ] Step 'original_ds (map) (map)' is running. ⏳
[ [35m🤖 Data[33mDr[31mea[35mmer[0m 💤 ] Step 'original_ds (map) (map)' will run lazily. 🥱
[ [35m🤖 Data[33mDr[31mea[35mmer[0m 💤 ] Step 'pr-writings' is running. ⏳



        Please write a press release article to communicate the science presented in the following paper.
        Before generating the press release, think step by step about the social impact of the research paper, the innovative aspects of the paper and how it is different from other research on the same topic, and how to communicate the problem, the approach and the results of the paper in a simple and accessible lanuage. Finally output the press release in the following format:
        "Press Release Article": "The press release article about the paper"
    


[ [35m🤖 Data[33mDr[31mea[35mmer[0m 💤 ] Step 'original_ds (map)' finished running lazily. 🎉
[ [35m🤖 Data[33mDr[31mea[35mmer[0m 💤 ] Step 'original_ds (map) (map)' finished running lazily. 🎉
[ [35m🤖 Data[33mDr[31mea[35mmer[0m 💤 ] Step 'pr-writings' progress: 10 row(s) 🔄
[ [35m🤖 Data[33mDr[31mea[35mmer[0m 💤 ] Step 'pr-writings' progress: 20 row(s) 🔄
[ [35m🤖 Data[33mDr[31mea[35mmer[0m 💤 ] Step 'pr-writings' progress: 30 row(s) 🔄
[ [35m🤖 Data[33mDr[31mea[35mmer[0m 💤 ] Step 'pr-writings' progress: 40 row(s) 🔄
[ [35m🤖 Data[33mDr[31mea[35mmer[0m 💤 ] Step 'pr-writings' progress: 50 row(s) 🔄
[ [35m🤖 Data[33mDr[31mea[35mmer[0m 💤 ] Step 'pr-writings' progress: 60 row(s) 🔄
[ [35m🤖 Data[33mDr[31mea[35mmer[0m 💤 ] Step 'pr-writings' progress: 70 row(s) 🔄
[ [35m🤖 Data[33mDr[31mea[35mmer[0m 💤 ] Step 'pr-writings' progress: 80 row(s) 🔄
[ [35m🤖 Data[33mDr[31mea[35mmer[0m 💤 ] Step 'pr-writings' progress: 90 row(s) 🔄
[ [35m🤖 Data[33mDr[31mea[35mmer[

Failed to find PR Article in the output
Failed to find PR Article in the output


[ [35m🤖 Data[33mDr[31mea[35mmer[0m 💤 ] Step 'zipped(original_ds (map) (map), pr-writings (select_columns))' finished running lazily. 🎉
[ [35m🤖 Data[33mDr[31mea[35mmer[0m 💤 ] Step 'zipped(original_ds (map) (map), pr-writings (select_columns)) (map)' finished running lazily. 🎉
[ [35m🤖 Data[33mDr[31mea[35mmer[0m 💤 ] Done. ✨ Results in folder: /mnt/swordfish-pool2/milad/communicating-science-to-the-public/generated-press-releases/baseline_pr_generation_cot


Saving the dataset (0/1 shards):   0%|          | 0/500 [00:00<?, ? examples/s]

In [None]:
# prompt = prompts.baseline_pr_generation_w_conv
# prompt['inputs']['Scientific paper'] = 'paper_text'
# baseline_pr_generation_res = datadreamer_generation.generate_pr_articles(llama3, sample_test, ds_path + '/generated-prs/'  + prompt['strategy_name'], 
#                                                                          tokenizer, prompt, max_input_tokens=1200, hub_name=None)
# baseline_pr_generation_res.save_to_disk(ds_path + '/generated-prs/'  + prompt['strategy_name'] + '_final_ds')

In [25]:
sample_test = sample_test.map(lambda row: {'generated-conversation-as-str': '\n'.join(['{}: {}'.format('Journalist', x['content']) if x['role'] == 'assistant' else '{}: {}'.format('Researcher', x['content']) for x in row['generated_conversation'][1:]])})

Map:   0%|          | 0/500 [00:00<?, ? examples/s]

In [None]:
prompt = prompts.pr_generation_by_conv_summarization
prompt['inputs']['Conversation'] = 'generated-conversation-as-str'
pr_generation_w_conv_res = datadreamer_generation.generate_pr_articles(llama3, sample_test, ds_path, tokenizer, prompt, hub_name=None)
pr_generation_w_conv_res.save_to_disk(ds_path + prompt['strategy_name'] + '_final_ds')

[ [35m🤖 Data[33mDr[31mea[35mmer[0m 💤 ] Initialized. 🚀 Dreaming to folder: /mnt/swordfish-pool2/milad/communicating-science-to-the-public/generated-press-releases/pr_generation_by_conv_summarization
[ [35m🤖 Data[33mDr[31mea[35mmer[0m 💤 ] Step 'original_ds' is running. ⏳
[ [35m🤖 Data[33mDr[31mea[35mmer[0m 💤 ] Step 'original_ds' finished and is saved to disk. 🎉
[ [35m🤖 Data[33mDr[31mea[35mmer[0m 💤 ] Step 'original_ds (map)' is running. ⏳
[ [35m🤖 Data[33mDr[31mea[35mmer[0m 💤 ] Step 'original_ds (map)' will run lazily. 🥱
  datasource = datasource.map(lambda row: {'inputs_truncated': truncate_text(encoding, row['inputs'], max_input_tokens)})
[ [35m🤖 Data[33mDr[31mea[35mmer[0m 💤 ] Step 'original_ds (map) (map)' is running. ⏳
[ [35m🤖 Data[33mDr[31mea[35mmer[0m 💤 ] Step 'original_ds (map) (map)' will run lazily. 🥱
[ [35m🤖 Data[33mDr[31mea[35mmer[0m 💤 ] Step 'pr-writings' is running. ⏳



        Please write a press release article to communicate the science presented in the following paper. The press-release should summarize the main points in the given conversation. The output should have the following format:
        "Press Release Article": "The press release article about the paper"        
    


[ [35m🤖 Data[33mDr[31mea[35mmer[0m 💤 ] Step 'original_ds (map)' finished running lazily. 🎉
[ [35m🤖 Data[33mDr[31mea[35mmer[0m 💤 ] Step 'original_ds (map) (map)' finished running lazily. 🎉


In [28]:
#clm='parsed-pr-article'
clm='gen-pr'
row=0
print(baseline_pr_generation_res[clm][row])
print('++++++++++++++')
print(baseline_pr_generation_cot_res[clm][row])
print('++++++++++++++')
print(baseline_pr_generation_w_conv_res[clm][row])
print('++++++++++++++')
print(pr_generation_w_conv_res[clm][row])

**Press Release Article:**

**Unlocking the Secrets of Superconductors: Scientists Discover Hidden Patterns in Magnetic Materials**

A team of scientists has made a groundbreaking discovery in the field of superconductors, uncovering hidden patterns in magnetic materials that could revolutionize our understanding of these complex systems.

Researchers have long been fascinated by the phenomenon of superconductivity, where certain materials can conduct electricity with zero resistance at extremely low temperatures. However, the exact mechanisms behind this phenomenon have remained elusive, until now.

In a new study, scientists have discovered that impurities in magnetic materials can create "spin droplets" - tiny regions of magnetic order that persist even when the material is cooled to near absolute zero. These droplets, which are invisible to the naked eye, can have a profound impact on the material's electronic properties, leading to the emergence of unconventional superconducting s

NameError: name 'baseline_pr_generation_cot_res' is not defined

In [26]:
# Use the two trained llms
# ds_path = '/mnt/swordfish-pool2/milad/communicating-science-to-the-public/generated-press-releases/llama3-trained-on-llama3-for-1-epochs/'

# q=BitsAndBytesConfig(load_in_8bit=True)
# llama3 = HFTransformers("meta-llama/Meta-Llama-3-8B-Instruct", device='auto', adapter_name='miladalsh/llama3-trained-on-llama3-for-1-epochs', 
#                         quantization_config=q, dtype=torch.bfloat16)
# tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct")
# tokenizer.pad_token_id = tokenizer.eos_token_id

# prompt = prompts.baseline_pr_generation_w_conv
# prompt['inputs']['Scientific paper'] = 'sc-intro'
# pr_generation_w_conv_res = baselines.generate_pr_articles(llama3, sample_test, ds_path, tokenizer, prompt, hub_name=None)
# pr_generation_w_conv_res.save_to_disk(ds_path + prompt['strategy_name'] + 'final_ds')

### Evaluation

In [29]:
ds_path = '/mnt/swordfish-pool2/milad/communicating-science-to-the-public/generated-press-releases/'

In [31]:
ls /mnt/swordfish-pool2/milad/communicating-science-to-the-public/generated-press-releases/ 

[0m[01;34mbaseline_pr_generation[0m/
[01;34mbaseline_pr_generation_cot[0m/
[01;34mbaseline_pr_generation_cot_final_ds[0m/
[01;34mbaseline_pr_generation_final_ds[0m/
[01;34mpr_generation_by_conv_summarization[0m/
[01;34mpr_generation_by_conv_summarization_final_ds[0m/


In [33]:
baseline_pr_generation_res = datasets.load_from_disk(ds_path + '/baseline_pr_generation_final_ds')
baseline_pr_generation_cot_res = datasets.load_from_disk(ds_path + '/baseline_pr_generation_cot_final_ds')
baseline_pr_generation_conv_res = datasets.load_from_disk(ds_path +  '/pr_generation_by_conv_summarization_final_ds')

#### Basic Eval:

In [34]:
eval_results = {}

eval_results['baseline_pr_generation'] = utils.evaluate_text_similarity(baseline_pr_generation_res['parsed-pr-article'],  baseline_pr_generation_res['pr-article'])
eval_results['baseline_pr_generation_cot'] = utils.evaluate_text_similarity(baseline_pr_generation_cot_res['parsed-pr-article'],  baseline_pr_generation_cot_res['pr-article'])
eval_results['pr_generation_w_conv'] = utils.evaluate_text_similarity(baseline_pr_generation_conv_res['parsed-pr-article'],  baseline_pr_generation_conv_res['pr-article'])

tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/482 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.42G [00:00<?, ?B/s]

In [60]:
#baseline_pr_generation_cot_res['gen-pr'][0]

In [24]:
#baseline_pr_generation_cot_res['parsed-pr-article'][0]

In [35]:
print(tabulate(
    [[name] + list(eval_res.values())[:3] for name, eval_res in eval_results.items()],
    headers=['Model', 'Rouge-1', 'Rouge-L', 'BERT-f1']
))

Model                         Rouge-1    Rouge-L    BERT-f1
--------------------------  ---------  ---------  ---------
baseline_pr_generation          0.43       0.178      0.838
baseline_pr_generation_cot      0.432      0.177      0.837
pr_generation_w_conv            0.413      0.175      0.834


In [13]:
print(tabulate(
    [[name] + list(eval_res.values())[:3] for name, eval_res in eval_results.items()],
    headers=['Model', 'Rouge-1', 'Rouge-L', 'BERT-f1']
))

Model                            Rouge-1    Rouge-L    BERT-f1
-----------------------------  ---------  ---------  ---------
baseline_pr_generation             0.434      0.181      0.837
baseline_pr_generation_cot         0.435      0.178      0.835
baseline_pr_generation_w_conv      0.38       0.167      0.834
pr_generation_w_conv               0.405      0.172      0.829


#### LLM-based evalaution:

In [13]:
grdound_truth_ds = baseline_pr_generation_res.map(lambda row: {'parsed-pr-article': row['pr-summary-and-article']})

In [66]:
press_release_datasets = {
    'baseline_pr_generation' : [ datasets.load_from_disk(ds_path + '/baseline-generation/' + 'baseline_pr_generation_final_ds'), ds_path + '/baseline-generation/' + 'baseline_pr_generation_final_ds'],
    'baseline_pr_generation_cot': [datasets.load_from_disk(ds_path + '/baseline-generation/' + 'baseline_pr_generation_cot_final_ds'),ds_path + '/baseline-generation/' + 'baseline_pr_generation_cot_final_ds'],
    'baseline_pr_generation_w_conv': [datasets.load_from_disk(ds_path + '/baseline-generation/' + 'baseline_pr_generation_w_conv_final_ds'), ds_path + '/baseline-generation/' + 'baseline_pr_generation_w_conv_final_ds'],
    'pr_generation_w_conv': [datasets.load_from_disk(ds_path + '/baseline-generation/' + 'pr_generation_by_conv_summarization_final_ds'), ds_path + '/baseline-generation/' + 'pr_generation_by_conv_summarization_final_ds'],
    'gt_press_release' : [grdound_truth_ds, ds_path + '/gt-press-release'],
}

In [None]:
prompts_to_eval = [prompts.pr_clarity_eval_prompt, prompts.pr_scientific_context_eval_prompt, prompts.pr_societal_context_eval_prompt]

llm_eval_results = llm_based_evaluation(prompts_to_eval, press_release_datasets)

Loading /mnt/swordfish-pool2/milad/communicating-science-to-the-public/generated-press-releases//baseline-generation/baseline_pr_generation_final_ds from already saved file
Loading /mnt/swordfish-pool2/milad/communicating-science-to-the-public/generated-press-releases//baseline-generation/baseline_pr_generation_cot_final_ds from already saved file
Loading /mnt/swordfish-pool2/milad/communicating-science-to-the-public/generated-press-releases//baseline-generation/baseline_pr_generation_w_conv_final_ds from already saved file
Loading /mnt/swordfish-pool2/milad/communicating-science-to-the-public/generated-press-releases//baseline-generation/pr_generation_by_conv_summarization_final_ds from already saved file


  0%|                                                                                        | 0/494 [00:00<?, ?it/s]

In [68]:
print(tabulate(
        [[name] + get_llm_avg_scores(res) for name, res in llm_eval_results.items()],
        headers=['Prompt', 'Scientific Context', 'Social Context', 'Accessibility', 'Relevance', 'Avg']
    ))

Prompt                           Scientific Context    Social Context    Accessibility    Relevance
-----------------------------  --------------------  ----------------  ---------------  -----------
baseline_pr_generation                         1.74              1.95             3.6          2.43
baseline_pr_generation_cot                     1.75              1.99             3.8          2.51
baseline_pr_generation_w_conv                  1.7               2.04             3.86         2.53
pr_generation_w_conv                           1.64              2.06             3.38         2.36
gt_press_release                               2.48              2.22             4.39         3.03
