<a href="https://colab.research.google.com/github/Ramla24/Text-Summarization-and-Analysis-System./blob/main/Fine_tuning_T5_for_Abstractive_Summarization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
from datasets import Dataset
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
from transformers import DataCollatorForSeq2Seq, Seq2SeqTrainingArguments, Seq2SeqTrainer
from rouge_score import rouge_scorer




In [None]:
file_path = "../Dataset/summarydataset.csv"

In [None]:
data = pd.read_csv(file_path)

In [None]:
# Convert DataFrame to Hugging Face Dataset
dataset = Dataset.from_pandas(data)

# Split the dataset
dataset = dataset.train_test_split(test_size=0.2)

# Model and tokenizer names
model_name = "google/flan-t5-base"  # You can choose other models like "t5-small", "facebook/bart-large-cnn" etc.
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

# Tokenization function
def preprocess_function(examples):
    inputs = tokenizer(examples["content"], padding="max_length", truncation=True, max_length=512)
    outputs = tokenizer(examples["human_summary"], padding="max_length", truncation=True, max_length=128)
    inputs["labels"] = outputs["input_ids"]
    return inputs

# Apply tokenization
tokenized_dataset = dataset.map(preprocess_function, batched=True)

tokenizer_config.json:   0%|          | 0.00/2.54k [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]



config.json:   0%|          | 0.00/1.40k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/990M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

In [None]:
data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=model)

# Metric
rouge_scorer = rouge_scorer.RougeScorer(['rouge1', 'rougeL'], use_stemmer=True)

def compute_metrics(pred):
    labels_ids = pred.label_ids
    pred_ids = pred.predictions

    # Decode IDs to text
    pred_str = tokenizer.batch_decode(pred_ids, skip_special_tokens=True)
    labels_str = tokenizer.batch_decode(labels_ids, skip_special_tokens=True)

    # Calculate ROUGE scores
    rouge_scores = []
    for ref, pred in zip(labels_str, pred_str):
        score = rouge_scorer.score(ref, pred)
        rouge_scores.append(score)

    # Compute average ROUGE scores
    avg_rouge1 = sum([score['rouge1'].fmeasure for score in rouge_scores]) / len(rouge_scores)
    avg_rougeL = sum([score['rougeL'].fmeasure for score in rouge_scores]) / len(rouge_scores)

    return {"rouge1": avg_rouge1, "rougeL": avg_rougeL}

In [None]:
training_args = Seq2SeqTrainingArguments(
    output_dir="./results",
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    predict_with_generate=True,
    fp16=False,
    num_train_epochs=3,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    report_to="none"
)



In [None]:
# Create Trainer instance
trainer = Seq2SeqTrainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["test"],
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)


In [None]:
# Train the model
trainer.train()

Epoch,Training Loss,Validation Loss,Rouge1,Rougel
1,No log,1.173839,0.161162,0.14515
2,1.471000,1.147807,0.166956,0.150975
3,1.317000,1.137456,0.165056,0.14861


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


TrainOutput(global_step=1200, training_loss=1.3590263366699218, metrics={'train_runtime': 40157.4509, 'train_samples_per_second': 0.06, 'train_steps_per_second': 0.03, 'total_flos': 1643417645875200.0, 'train_loss': 1.3590263366699218, 'epoch': 3.0})

In [None]:
model.save_pretrained("./my_trained_model")
tokenizer.save_pretrained("./my_trained_model")

In [None]:
model = AutoModelForSeq2SeqLM.from_pretrained("./my_trained_model")
tokenizer = AutoTokenizer.from_pretrained("./my_trained_model")

In [None]:
# Define a function to generate summary
def generate_summary(input_text):
    # Tokenize input text
    inputs = tokenizer(input_text, return_tensors="pt", truncation=True, padding="max_length", max_length=512)

    # Generate summary using the trained model
    summary_ids = model.generate(inputs['input_ids'], num_beams=4, max_length=512, early_stopping=True)

    # Decode and return the generated summary
    return tokenizer.decode(summary_ids[0], skip_special_tokens=True)

# Example usage:
input_text = """The body of the Iraqi prisoner was found naked and badly bruised in 2003, outside a detention center in southern Iraq run by United States Marines. The    man had been beaten, deprived of sleep, forced to stand for long periods and interrogated by Marines about his alleged role in a fatal ambush of American forces. James N. Mattis,   Donald J. Trump’s nominee for secretary of defense, was then a major general and the commander of the Marine division in Iraq responsible for the center. He quickly convened an inquiry into the death, which led to   and banned the harsh techniques used at the prison. “General Mattis was all up in arms over this,” Ralph Dengler, then a lieutenant colonel, testified at a military hearing in January 2004. He added that the commander, who arrived hours after the discovery on a planned visit with his British counterpart, had immediately described the death as “the worst thing that happened” under his watch in the Iraq war. “I was surprised that he would have felt that strongly about it, considering many of the other deaths, including American deaths,” Colonel Dengler said. Colleagues say the general’s handling of the episode reflects his firmly held views against torture and prisoner mistreatment, which are shared by many military leaders and could put them at odds with the new commander in chief. Mr. Trump, in a New York Times interview in late November, said he had been surprised to learn that the man he was considering to lead the country’s 2. 2 million service members did not believe in torture. During the presidential campaign, Mr. Trump advocated bringing back the   practice of waterboarding, adding that “only a stupid person would say it doesn’t work. ” General Mattis does not support abusive techniques, let alone waterboarding. “Give me a pack of cigarettes and a couple of beers,” he told Mr. Trump during their meeting in November, according to the   “and I do better with that than I do with torture. ” The general’s beliefs were shaped by his military training, but also by his experiences in dealing with issues related to torture or mistreatment, according to an examination of his nearly   career and interviews with former colleagues and friends. General Mattis led the United States Central Command from August 2010 to March 2013. Lt. Col. T. G. Taylor, who was General Mattis’s spokesman during much of that time, said in an interview that the commander had spoken of America as needing to hold the moral high ground. Failing to uphold important principles “makes it easier for a soldier or a Marine to ask, ‘What am I fighting for? ’” Colonel Taylor said. “That is something that General Mattis is keenly aware of,” he added. As a young officer, General Mattis would have been instructed in the laws of war, including the Geneva Conventions, which, among other protections, require humane treatment of prisoners of war. A Marine who enlisted in 1969 while attending college  —   part of an R. O. T. C. program during the height of the Vietnam War  —   he did not deploy to Vietnam. But, several friends say, he believed that American prisoners of war were more likely to be tortured by the Vietcong if the United States tortured enemy captives. In 2006, General Mattis supported Gen. David H. Petraeus of the Army and other military leaders in the development of a new counterinsurgency field manual that highlighted limits on interrogation tactics. “Torture and cruel, inhuman, and degrading treatment is never a morally permissible option, even if lives depend on gaining information,” the manual said. “Lose moral legitimacy, lose the war. ” General Mattis later agreed in a Senate Armed Services Committee hearing that it was inappropriate for the military to support the use of abusive techniques on detainees  —   including waterboarding, forced nudity and sensory deprivation, tactics inflicted on prisoners during interrogations in secret C. I. A. prisons in the years after the 2001 terrorist attacks. Those techniques had been modeled on a military training program operated by the Joint Personnel Recovery Agency, which later fell under the general’s command. That program, Survival, Evasion, Resistance and Escape, also known as SERE, exposed United States soldiers and other service members to techniques that an enemy violating the laws of war might use so that Americans could be better prepared to resist them. The experience typically led trainees to conclude that tortured prisoners of war will say whatever they need to in order to stay alive, whether true or not. A recent investigation by The New York Times found that many terrorism suspects subjected to harsh tactics in C. I. A. jails or American military prisons had lasting mental health problems that were similar to those experienced by some American former P. O. W.s who suffered horrific abuses in Vietnam or Korea. As a commander in Afghanistan and later in Iraq, and as an overall leader of the American war effort, General Mattis often grappled with the consequences of the C. I. A. ’s treatment of prisoners and the harsh conditions at the military detention center at Guantánamo Bay, Cuba. They became a powerful recruiting tool for jihadis who threatened American forces in the field. Testifying at a Senate hearing in 2015 after his retirement, General Mattis opposed the release, before the end of hostilities, of prisoners who had fought against the United States. However, he added, they should be treated humanely, in accordance with international and domestic law. “I would go by the Geneva Conventions, and maintain them, with Red Cross oversight, until the war is over,” he told lawmakers. Torture is not effective in eliciting intelligence, the general felt. “For his whole career, he’s believed that it just doesn’t pay dividends,” said a retired senior United States military officer who is close to General Mattis but spoke on the condition of anonymity for fear of alienating Mr. Trump. The “pack of cigarettes” reference, according to people who served with the general, reflects two tenets that are drummed into future military leaders: that information gleaned from torture is unreliable, and that   can go a long way. “I’m not in his head, but what General Mattis was saying is that offering a modicum of friendship and humanity to someone in a desperate situation is more successful than physical torture,” Colonel Taylor said. Hope Hicks, a spokeswoman for Mr. Trump’s transition team, declined requests for more detail on the  ’s discussions with General Mattis or on the source of General Mattis’s beliefs and any further thoughts Mr. Trump has had about the use of torture. General Mattis declined to be interviewed. Many American military leaders maintain that the Bush administration’s departure from established practices in allowing harsh coercive techniques  —   government lawyers had said they did not meet the legal definition of torture, and had declared that detainees were not protected by the Geneva Conventions  —   tainted the United States’ reputation while not yielding results. “Ineffective, war crime, against our values, moral high ground, et cetera,” Donald J. Guter, a retired rear admiral and the dean of the South Texas College of Law, said in explaining objections to brutal treatment. “A very practical reason is that it opens the door for our own troops to be tortured, and we have no basis to object. If we torture, we’ve lost who we are. ” The Department of Defense has clear policies on the humane treatment of detainees. For example, the Army field manual’s section on intelligence collection states: “Use of torture is not only illegal but also it is a poor technique that yields unreliable results. ” The manual adds, “Cruel, inhuman and degrading treatment is prohibited. ” As a commander in Iraq, General Mattis ordered several investigations into detainee abuse. One of the most significant concerned the 2003 death at a makeshift detention and interrogation center in an abandoned Iraqi Army barracks in southern Iraq, christened Camp Whitehorse. Prisoners were held by United States Marine reservists in extreme heat in a dirty, stone building that had been looted of all amenities by Iraqis during the   invasion. The prisoner who died, Nagem Sadoon Hatab, had been arrested days earlier on suspicion of involvement in a deadly ambush of a United States Army convoy that led to the capture of Jessica Lynch, a    private who was later rescued. Mr. Hatab was alleged to have killed American service members and sold an   rifle taken from one of them. Mr. Hatab failed to stand up while being subjected to     treatment  —   forced standing for 50 minutes of each hour, for up to 10 hours. The objective was to make the detainees tired, stressed and submissive for interrogators, witnesses later said in military court hearings. Like other prisoners at the center, Mr. Hatab was hooded and his hands were restrained behind his back with plastic ties. He was beaten severely by Marine guards after refusing to comply with the forced standing and  . He also underwent questioning by a special Marine interrogation squad. A Marine guard later testified that he had often heard yelling, screaming and banging from the interrogation room, a converted bathroom. After midnight on June 6, 2003, Mr. Hatab was found dead in a courtyard where he had been left lying for hours, his body covered in his own feces. An armed forces medical examiner categorized the death as a homicide. Mr. Hatab had six broken ribs and had suffocated from a broken bone in his throat after being dragged outside by his neck, the examiner concluded. Eight Camp Whitehorse personnel were charged with crimes, including negligent homicide. But the investigator appointed by General Mattis, Col. William B. Gallo, later cited problems with the autopsy and could not determine which of the attacks on Mr. Hatab, if any, might have been lethal. In Colonel Gallo’s opinion, the   treatment did not amount to torture, but the command had failed to provide adequate predeployment training in handling prisoners and in the law of war. Evidence, including certain photographs of the prisoner and a summary of an interrogation he underwent, had been lost or destroyed, complicating the investigation and prosecution. Two Marines, a major and a sergeant, were eventually convicted of lesser charges. Mr. Hatab’s death clearly had an effect on General Mattis. According to reports at the time, he ordered a review of the procedures for handling prisoners, which resulted in a ban of   tactics, including the forced standing. Marine Corps personnel running detention camps were given more training, and a manual was compiled to explain each step of the process. Still, there were several other instances of prisoner abuse involving the division. Later, aiming to earn the trust of the Iraqi people after the first phase of combat had ended, General Mattis added an addendum to the motto of his Marines: “No better friend, no worse enemy. ” The addition was, “First, do no harm. ”"""
summary = generate_summary(input_text)
print("Generated Summary: ", summary)


Generated Summary:  James N. Mattis, Donald J. Trump’s nominee for secretary of defense, was then a major general and the commander of the Marine division in Iraq responsible for the center. He quickly convened an inquiry into the death, which led to and banned the harsh techniques used at the prison. “General Mattis was all up in arms over this,” Ralph Dengler, then a lieutenant colonel, testified at a military hearing in January 2004. Colleagues say the general’s handling of the episode reflects his firmly held views against torture and prisoner mistreatment, which are shared


In [None]:
input_text

'The body of the Iraqi prisoner was found naked and badly bruised in 2003, outside a detention center in southern Iraq run by United States Marines. The    man had been beaten, deprived of sleep, forced to stand for long periods and interrogated by Marines about his alleged role in a fatal ambush of American forces. James N. Mattis,   Donald J. Trump’s nominee for secretary of defense, was then a major general and the commander of the Marine division in Iraq responsible for the center. He quickly convened an inquiry into the death, which led to   and banned the harsh techniques used at the prison. “General Mattis was all up in arms over this,” Ralph Dengler, then a lieutenant colonel, testified at a military hearing in January 2004. He added that the commander, who arrived hours after the discovery on a planned visit with his British counterpart, had immediately described the death as “the worst thing that happened” under his watch in the Iraq war. “I was surprised that he would have f

In [None]:
text ="""The body of the Iraqi prisoner was found naked and badly bruised in 2003, outside a detention center in southern Iraq run by United States Marines. The man had been beaten, deprived of sleep, forced to stand for long periods and interrogated by Marines about his alleged role in a fatal ambush of American forces. This sparked debates on the humanity of the United States. General Mattis said in a Senate Armed Services Committee hearing that it was inappropriate for the military to support the use of abusive techniques on detainees — including waterboarding, forced nudity and sensory deprivation, tactics inflicted on prisoners during interrogations in secret C. I. A. prisons in the years after the 2001 terrorist attacks."""

In [None]:
text

'The body of the Iraqi prisoner was found naked and badly bruised in 2003, outside a detention center in southern Iraq run by United States Marines. The man had been beaten, deprived of sleep, forced to stand for long periods and interrogated by Marines about his alleged role in a fatal ambush of American forces. This sparked debates on the humanity of the United States. General Mattis said in a Senate Armed Services Committee hearing that it was inappropriate for the military to support the use of abusive techniques on detainees — including waterboarding, forced nudity and sensory deprivation, tactics inflicted on prisoners during interrogations in secret C. I. A. prisons in the years after the 2001 terrorist attacks.'

In [None]:
from transformers import pipeline

# Load the summarization pipeline with the LED model
summarizer = pipeline("summarization", model="google/flan-t5-base", tokenizer="google/flan-t5-base")

# Your input document (can be a long article or news)


# Generate summary
summary = summarizer(input_text, max_length=512, min_length=512, do_sample=False)

# Output the summary
print("Generated Summary:", summary[0]['summary_text'])


Token indices sequence length is longer than the specified maximum sequence length for this model (2525 > 512). Running this sequence through the model will result in indexing errors


Generated Summary: James N. Mattis, Donald J. Trump’s nominee for secretary of defense, was a major general and the commander of the Marine division in Iraq responsible for the center. He quickly convened an inquiry into the death, which led to and banned the harsh techniques used at the prison. The general’s beliefs were shaped by his military training, but also by his experiences in dealing with issues related to torture or mistreatment, according to an examination of his nearly career and interviews with former colleagues and friends. As a young officer, General Mattis would have been instructed in the laws of war, including the Geneva Conventions, which, among other protections, require humane treatment of prisoners of war. A Marine who enlisted in 1969 while attending college — part of an R. O. T. C. program during the height of the Vietnam War — he did not deploy to Vietnam. But several friends say he believed that American prisoners were more likely to be tortured by the Vietcon

In [None]:
print("change")

change
