# Reproducing codebase as Python API

The notebook reproduces the trained-BART model as the Python API not as the CLI. 

In [None]:
import os
import sys
import argparse
import logging
import re
import typing as ty

from tqdm import tqdm
from warnings import warn
from torch.multiprocessing import Pool, set_start_method
set_start_method('spawn', force=True)
from functools import partial
import more_itertools as mit

import torch
import fairseq
from fairseq.models.bart import BARTHubInterface
from fairseq.models.bart import BARTModel

import nvgpu

from pathlib import Path

In [None]:
import logzero

from datetime import datetime
_datetime_exec = datetime.now()

logzero.logfile(f"logs/{_datetime_exec.isoformat()}.log")

logger = logzero.logger

In [None]:
def load_model(task: Path, model_path: Path) -> BARTHubInterface:
    """
    Args:
        task: a path to the directory of the model.
        model_path: a path to 'model.pt' file.
    """
    assert task.exists()
    assert model_path.exists()

    logger.info(f"Loading model {model_path}")
    model_dirname, model_fname = os.path.split(model_path.as_posix())
    bart = BARTModel.from_pretrained(
        model_dirname,
        checkpoint_file=model_fname,
        data_name_or_path=task.as_posix()
    )
    logger.info(f"Loading done.")
    return bart


In [None]:
# path to input
PATH_TEXT_FILE_INPUT = Path("/workdir/kmitsuzawa/Project/neurips-2025/ConstraintsFact-Dreyer-2023/abstractive-factual-tradeoff/tests/testresources/xsum/test_source.txt")
assert PATH_TEXT_FILE_INPUT.exists()

seq_text_input = PATH_TEXT_FILE_INPUT.open().readlines()
assert len(seq_text_input) > 0

In [None]:
# with xsum model
PATH_MODEL_FILE = Path('/workdir/kmitsuzawa/Project/neurips-2025/ConstraintsFact-Dreyer-2023/abstractive-factual-tradeoff/tests/testresources/models/bart.large.xsum')
# with cnn model
# PATH_MODEL_FILE = Path('/workdir/kmitsuzawa/Project/neurips-2025/ConstraintsFact-Dreyer-2023/abstractive-factual-tradeoff/tests/testresources/models/bart.large.cnn')

bart_model = load_model(PATH_MODEL_FILE, PATH_MODEL_FILE / 'model.pt')

In [None]:
type(bart_model)

In [None]:
logger.info(str(bart_model))

In [None]:
if torch.cuda.is_available():
    device_obj = torch.device('cuda:0')
else:
    device_obj = torch.device('cpu')
# end if

bart_model = bart_model.to(device_obj)

In [None]:
# def bart_sample(bart: BARTHubInterface,
#                 batch: ty.List[str],
#                 extractive_penalty_fct: str,
#                 beam: int = 4,
#                 lenpen: float = 2.0,  # length penalty
#                 min_len: int = 55,
#                 max_len_a: int = 0,
#                 max_len_b: int = 140,
#                 no_repeat_ngram_size: int = 3):
#     with torch.no_grad():
#         return bart.sample(batch, beam=beam, lenpen=lenpen,
#                             min_len=min_len, max_len_a=max_len_a, max_len_b=max_len_b,
#                             no_repeat_ngram_size=no_repeat_ngram_size,
#                             extractive_penalty_fct=extractive_penalty_fct)
# # end def


# res = bart_sample(
#     bart=bart_model,
#     batch=seq_text_input,
#     extractive_penalty_fct='log_exp(2,2.402244)'
# )

---

In [None]:
# case Xsum constraints dataset
import json

PATH_CONSTRAINS_XSUM = Path("/workdir/kmitsuzawa/Project/neurips-2025/ConstraintsFact-Dreyer-2023/abstractive-factual-tradeoff/tests/testresources/datasets/constraints_fact_v1.0/xsum/collect.json")
assert PATH_CONSTRAINS_XSUM.exists()

with PATH_CONSTRAINS_XSUM.open() as f:
    seq_dataset = [json.loads(_line) for _line in f.readlines()]
# end with

logger.info(f'{len(seq_dataset)} records')

# double check: all xsum
for _record in seq_dataset:
    assert _record['dataset_name'] == 'xsum'
# end for

In [None]:
# # case CNN constraints dataset
# import json

# PATH_CONSTRAINS_CNN = Path("/workdir/kmitsuzawa/Project/neurips-2025/ConstraintsFact-Dreyer-2023/abstractive-factual-tradeoff/tests/testresources/datasets/constraints_fact_v1.0/cnn_dailymail/collect.json")
# assert PATH_CONSTRAINS_CNN.exists()

# with PATH_CONSTRAINS_CNN.open() as f:
#     seq_dataset = [json.loads(_line) for _line in f.readlines()]
# # end with

# logger.info(f'{len(seq_dataset)} records')

# # double check: all xsum
# for _record in seq_dataset:
#     assert _record['dataset_name'] == 'cnn_dailymail'
# # end for

In [None]:
def get_source_and_summary(record_obj: ty.Dict) -> ty.Tuple[str, str]:
    # return record_obj['document_original'], record_obj['summary_raw']
    return record_obj['document_full'], record_obj['summary_raw']
# end def

target_document_index = [1, 10, 100, 200]

import pprint

seq_stack = []

dict_commnad2ep = dict(
    lambda4 = 'log_exp(2,4.804488)',  # lambda4
    lambda2 = 'log_exp(2,2.402244)',  # lambda2
    lambda1 = 'log_exp(2,1.201122)',  # lambda1
    none = 'none()',
    linear = 'linear()',
)
dict_commnad2ep['1/lambda2'] = 'log_exp(2,0.416277447)'  # 1/lambda2, log_exp(2, 1 / (1.20112 * 2))
dict_commnad2ep['1/lambda1'] = 'log_exp(2,0.832556281)'  # 1/lambda1, log_exp(2, 1 / 1.20112)


for _idx in target_document_index:
    _record = seq_dataset[_idx]

    _document_id: str = _record['document_id']
    command_abstractiveness_constraint: str = _record['abstractiveness_constraint']

    _document_original, _summary_raw = get_source_and_summary(_record)
    extractive_penalty_fct = dict_commnad2ep[command_abstractiveness_constraint]

    seq_summary = bart_sample(
        bart=bart_model,
        batch=[_document_original],
        extractive_penalty_fct=extractive_penalty_fct
    )

    _res_obj = dict(
        document_id=_document_id,
        document_original=_document_original,
        summary_raw=_summary_raw,
        summary_gen=seq_summary[0],
        extractive_penalty_fct=extractive_penalty_fct,
        command_abstractiveness_constraint=command_abstractiveness_constraint
    )
    seq_stack.append(_res_obj)
# end for

In [None]:
seq_stack

In [None]:
# seq_text_input = [
#     # "Russian's intervention in Syria is \"hugely significant\" says the UK's former senior military adviser in the Middle East Lt Gen Sir Simon Mayall. 2 October 2015 Last updated at 18:13 BST Speaking in an interview with BBC Newsnight's diplomatic and defence editor Mark Urban, he said UK policy in Syria had been hampered by \"wishful thinking\" - and officials underestimated the staying power of President Bashar al-Assad. More on this story from Mark Urban on BBC Newsnight at 22:30 BST BBC Two on Friday 2 October, and afterwards on iPlayer",
#     # "On the first day in his new job, Choe Peng Sum was given a fairly simple brief: \"Just go make us a lot of money.\" Fast forward about 20 years, and it's fair to say he has done just that. The business he runs, Frasers Hospitality, is one of the world's biggest providers of high-end serviced apartments. Its 148 properties span about 80 capital cities, as well as financial hubs across Europe, Asia, the Middle East and Africa. But it almost didn't get off the ground. When Mr Choe was appointed to launch and lead the company, Asia was booming; the tiger economies of Hong Kong, South Korea, Taiwan and Singapore were expanding rapidly. But as Frasers prepared to open its first two properties in Singapore, the Asian financial crisis hit. It was 1997. Currencies went into freefall. Suddenly, people were losing their jobs and stopped travelling. Mr Choe recalls asking staff if they really wanted to continue working with the firm, because when the properties opened they might not get paid. \"It was really that serious,\" he says. \"I remember tearing up because they said 'let's open it, let's open it whether you can pay us or not'.\" Survival, Mr Choe admits, came through a bit of luck, and the misfortune of others. He had convinced the board at parent firm, property group Frasers Centrepoint, to open serviced apartments rather than hotels - partly because getting planning permission in Singapore was easier. But he also sensed it was a big, untapped market. And at the time of the crisis, it proved to be exactly what customers wanted. \"As we were going through this difficult patch, there were protests and riots in Jakarta,\" he says. \"A lot of companies like Microsoft called up looking for rooms for their staff because they were moving out of Jakarta.\" Frasers' 412 apartments were quickly in demand. Occupancy soon hit 70%, and then 90%. Explaining the popularity of serviced apartments, Mr Choe says that if people are staying somewhere for just a few days, they happily stay in hotels, but if they are going to be somewhere for one month to eight months, the walls of hotel rooms \"close in on you\". But now, Mr Choe, 57, faces new challenges - the travel tastes of millennials and the disruptive nature of Airbnb. \"The way to tackle Airbnb is not to ignore it. I will never underestimate Airbnb,\" he says. There's been no significant impact on Frasers yet. Big corporations still prefer to put employees in big service apartments, he says, because they can guarantee a level of safety and security. But that is likely to change, Mr Choe admits. \"I have two daughters who to my chagrin use Airbnb,\" he says. \"We took a family trip to Florence and I stayed in this wonderful boutique hotel, but paid a bundle for it. \"When my daughter joined us, she said, 'I'm just staying next door and paying about 80 euros'. We paid about 330 euros. \"I asked why they stayed at Airbnb. They say 'it's like a surprise, it's part of the adventure'.\" And so now, Mr Choe wants to bring some of that vibrancy to Frasers. While neutral colours, beige curtains and dark wooden chairs dominate its more traditional apartments, many customers want something different, and this is shaping Fraser's strategy. In 2015 it bought Malmaison Hotel du Vin, a UK hotel group that specialises in developing heritage properties into upscale boutique hotels. That has taken them beyond financial centres, including to Shakespeare's hometown of Stratford-upon-Avon. Or, an intrepid traveller with $500 (Â£325) to spend could have a night in a converted medieval prison in Oxford. And Frasers has launched the Capri sub-brand - whose website promises \"inspiring art and inspirational tech\". On a day-to-day basis Mr Choe says he still draws on his experience as a young man, who - having been given a scholarship by the Shangri-La hotel group to study at Cornell University in the US - came back to Asia to learn about the hospitality industry. \"They put me in every department conceivable. I remember one of the toughest jobs I had was in the butchery. I had to carve an entire cow. For one month, I could not eat meat. \"I'm thankful for those experiences. When you step into a hotel, you immediately pick up what works and what doesn't work. \"When I see the check-in staff walking more than three steps, I know the counter is set up wrong. \"It's like a cockpit. Can you imagine if the pilot had to turn around when he flies?\" More The Boss features, which every week profile a different business leader from around the world: The 'diva of divorce' for the world's super rich The snacks boss with an appetite for success Taking his own path: The world's leading maze designer Mr Choe adds that loyalty is very important to him, and he remains tremendously grateful to staff who have stayed with him. \"I will always respect and remember those who gave up their jobs to join me,\" he says. This loyalty is something that Mr Choe has earned, according to Donald MacLaurin, associate professor at Singapore Institute of Technology, and specialist in the hospitality sector. Mr MacLaurin points out that Mr Choe introduced a five-day working week, in a part of the world where six days is common, thereby showing \"a focus on quality of life issues for employees\". The associate professor adds says the early success of the business was remarkable given the timing of its launch. Fast forward to today and the company is now on track to operate 30,000 serviced apartments globally by 2019. That success, say Mr Choe's admirers, should make him something of a visionary. Follow The Boss series editor Will Smale on Twitter."
#     "Fast forward about 20 years, and it's fair to say he has done just that. The business he runs, Frasers Hospitality, is one of the world's biggest providers of high-end serviced apartments. Its 148 properties span about 80 capital cities, as well as financial hubs across Europe, Asia, the Middle East and Africa. But it almost didn't get off the ground. When Mr Choe was appointed to launch and lead the company, Asia was booming; the tiger economies of Hong Kong, South Korea, Taiwan and Singapore were expanding rapidly. But as Frasers prepared to open its first two properties in Singapore, the Asian financial crisis hit. It was 1997. Currencies went into freefall. Suddenly, people were losing their jobs and stopped travelling. Mr Choe recalls asking staff if they really wanted to continue working with the firm, because when the properties opened they might not get paid. \"It was really that serious,\" he says. \"I remember tearing up because they said 'let's open it, let's open it whether you can pay us or not'.\" Survival, Mr Choe admits, came through a bit of luck, and the misfortune of others. He had convinced the board at parent firm, property group Frasers Centrepoint, to open serviced apartments rather than hotels - partly because getting planning permission in Singapore was easier. But he also sensed it was a big, untapped market. And at the time of the crisis, it proved to be exactly what customers wanted. \"As we were going through this difficult patch, there were protests and riots in Jakarta,\" he says. \"A lot of companies like Microsoft called up looking for rooms for their staff because they were moving out of Jakarta.\" Frasers' 412 apartments were quickly in demand. Occupancy soon hit 70%, and then 90%. Explaining the popularity of serviced apartments, Mr Choe says that if people are staying somewhere for just a few days, they happily stay in hotels, but if they are going to be somewhere for one month to eight months, the walls of hotel rooms \"close in on you\". But now, Mr Choe, 57, faces new challenges - the travel tastes of millennials and the disruptive nature of Airbnb. \"The way to tackle Airbnb is not to ignore it. I will never underestimate Airbnb,\" he says. There's been no significant impact on Frasers yet. Big corporations still prefer to put employees in big service apartments, he says, because they can guarantee a level of safety and security. But that is likely to change, Mr Choe admits. \"I have two daughters who to my chagrin use Airbnb,\" he says. \"We took a family trip to Florence and I stayed in this wonderful boutique hotel, but paid a bundle for it. \"When my daughter joined us, she said, 'I'm just staying next door and paying about 80 euros'. We paid about 330 euros. \"I asked why they stayed at Airbnb. They say 'it's like a surprise, it's part of the adventure'.\" And so now, Mr Choe wants to bring some of that vibrancy to Frasers. While neutral colours, beige curtains and dark wooden chairs dominate its more traditional apartments, many customers want something different, and this is shaping Fraser's strategy. In 2015 it bought Malmaison Hotel du Vin, a UK hotel group that specialises in developing heritage properties into upscale boutique hotels. That has taken them beyond financial centres, including to Shakespeare's hometown of Stratford-upon-Avon. Or, an intrepid traveller with $500 (Â£325) to spend could have a night in a converted medieval prison in Oxford. And Frasers has launched the Capri sub-brand - whose website promises \"inspiring art and inspirational tech\". On a day-to-day basis Mr Choe says he still draws on his experience as a young man, who - having been given a scholarship by the Shangri-La hotel group to study at Cornell University in the US - came back to Asia to learn about the hospitality industry. \"They put me in every department conceivable. I remember one of the toughest jobs I had was in the butchery. I had to carve an entire cow. For one month, I could not eat meat. \"I'm thankful for those experiences. When you step into a hotel, you immediately pick up what works and what doesn't work. \"When I see the check-in staff walking more than three steps, I know the counter is set up wrong. \"It's like a cockpit. Can you imagine if the pilot had to turn around when he flies?\" More The Boss features, which every week profile a different business leader from around the world: The 'diva of divorce' for the world's super rich The snacks boss with an appetite for success Taking his own path: The world's leading maze designer Mr Choe adds that loyalty is very important to him, and he remains tremendously grateful to staff who have stayed with him. \"I will always respect and remember those who gave up their jobs to join me,\" he says. This loyalty is something that Mr Choe has earned, according to Donald MacLaurin, associate professor at Singapore Institute of Technology, and specialist in the hospitality sector. Mr MacLaurin points out that Mr Choe introduced a five-day working week, in a part of the world where six days is common, thereby showing \"a focus on quality of life issues for employees\". The associate professor adds says the early success of the business was remarkable given the timing of its launch. Fast forward to today and the company is now on track to operate 30,000 serviced apartments globally by 2019. That success, say Mr Choe's admirers, should make him something of a visionary. Follow The Boss series editor Will Smale on Twitter."
# ]

# seq_text_summary = [
#     # "Former Foreign Secretary Philip Hammond has said the UK's policy in Syria has been hampered by \"wishful thinking\" - and officials underestimated the staying power of President Bashar al-Assad.",
#     "\"When I was appointed as chief executive of my first company, I didn't think I would be able to survive,\" says Lee Choe."
# ]

# # lambda4 = 'log_exp(2,4.80448)' ??


# seq_summary = bart_sample(
#     bart=bart_model,
#     batch=seq_text_input,
#     extractive_penalty_fct='log_exp(2,4.80448)'
# )

# import pprint

# pprint.pprint(f'Genarated -> {seq_summary[0]}')
# pprint.pprint(f"Dataset -> {seq_text_summary}")

In [None]:
# # tested options for 
# options_tests = ["none()", "linear()", "log_exp(2,1)", "log_exp(2,-1)", "log_exp(2.5, 1.1)", "log_exp2(3, 5)"]
# # failed options
# ["maxlen(2)"]


# for _option in options_tests:
#     seq_summary = bart_sample(
#         bart=bart_model,
#         batch=seq_text_input,
#         extractive_penalty_fct=_option
#     )
#     pprint.pprint(f"{_option}, {seq_summary}")

