In [None]:
!pip install transformers==4.30.0
!pip install datasets
!pip install evaluate
!pip install rouge_score
!pip install accelerate==0.21.0



In [None]:
from huggingface_hub import notebook_login

notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [None]:
from datasets import load_dataset

dataset = load_dataset("yuningm/citesum",split="test")
#data.train_test_split(test_size=0.4)
dataset = dataset.train_test_split(test_size=0.35)
dataset

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


DatasetDict({
    train: Dataset({
        features: ['src', 'tgt', 'paper_id', 'title', 'discipline'],
        num_rows: 3198
    })
    test: Dataset({
        features: ['src', 'tgt', 'paper_id', 'title', 'discipline'],
        num_rows: 1723
    })
})

In [None]:
from transformers import BartTokenizer, BartForConditionalGeneration
model_name = "facebook/bart-large-cnn"
tokenizer = BartTokenizer.from_pretrained(model_name)
model = BartForConditionalGeneration.from_pretrained(model_name)



In [None]:
def preprocess_data(examples):
    inputs = examples['src']
    model_inputs = tokenizer(inputs, max_length=1024, truncation=True, padding='max_length')

    with tokenizer.as_target_tokenizer():
        labels = tokenizer(examples['tgt'], max_length=218, truncation=True, padding='max_length')

    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

In [None]:
tokenized_datasets = dataset.map(preprocess_data, batched=True)

Map:   0%|          | 0/3198 [00:00<?, ? examples/s]



Map:   0%|          | 0/1723 [00:00<?, ? examples/s]

In [None]:
from transformers import DataCollatorForSeq2Seq

data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=model_name)

In [None]:
import evaluate

rouge = evaluate.load("rouge")

In [None]:
import numpy as np


def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    decoded_preds = tokenizer.batch_decode(predictions, skip_special_tokens=True)
    labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)

    result = rouge.compute(predictions=decoded_preds, references=decoded_labels, use_stemmer=True)

    prediction_lens = [np.count_nonzero(pred != tokenizer.pad_token_id) for pred in predictions]
    result["gen_len"] = np.mean(prediction_lens)

    return {k: round(v, 4) for k, v in result.items()}

In [None]:
vsmall_train_dataset = tokenized_datasets["train"].shuffle(seed = 0).select(range(1400))
small_eval_dataset = tokenized_datasets["test"].shuffle(seed = 0).select(range(700))
from transformers import Seq2SeqTrainingArguments, Seq2SeqTrainer
training_args = Seq2SeqTrainingArguments(
    f"finetuned_on_citesum_bart_text_summarisation",
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,

    save_total_limit=6,
    num_train_epochs=3,
    predict_with_generate=True,

    push_to_hub=True,
)

trainer = Seq2SeqTrainer(
    model=model,
    args=training_args,
    train_dataset=vsmall_train_dataset,
    eval_dataset=small_eval_dataset,
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

For more details, please read https://huggingface.co/docs/huggingface_hub/concepts/git_vs_http.
Cloning https://huggingface.co/Madan490/finetuned_on_citesum_bart_text_summarisation into local empty directory.


In [None]:
trainer.train()



Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum,Gen Len
1,No log,0.344933,0.3492,0.1554,0.2685,0.268,68.7357
2,0.535900,0.345796,0.3516,0.1531,0.2647,0.2646,67.0714


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum,Gen Len
1,No log,0.344933,0.3492,0.1554,0.2685,0.268,68.7357
2,0.535900,0.345796,0.3516,0.1531,0.2647,0.2646,67.0714
3,0.221900,0.35971,0.3541,0.1548,0.2659,0.2657,67.0143


TrainOutput(global_step=1050, training_loss=0.3702215648832775, metrics={'train_runtime': 4124.1395, 'train_samples_per_second': 1.018, 'train_steps_per_second': 0.255, 'total_flos': 9101839328870400.0, 'train_loss': 0.3702215648832775, 'epoch': 3.0})

In [None]:
trainer.save_model("./my_model")

Upload file pytorch_model.bin:   0%|          | 1.00/1.51G [00:00<?, ?B/s]

Upload file runs/Jun17_09-51-25_c698e9ee1a1e/events.out.tfevents.1718617895.c698e9ee1a1e.1467.0:   0%|        …

To https://huggingface.co/Madan490/finetuned_on_citesum_bart_text_summarisation
   80296ae..f2f70f2  main -> main

   80296ae..f2f70f2  main -> main

To https://huggingface.co/Madan490/finetuned_on_citesum_bart_text_summarisation
   f2f70f2..0f46036  main -> main

   f2f70f2..0f46036  main -> main



In [None]:
trainer.evaluate()

{'eval_loss': 0.359709769487381,
 'eval_rouge1': 0.3541,
 'eval_rouge2': 0.1548,
 'eval_rougeL': 0.2659,
 'eval_rougeLsum': 0.2657,
 'eval_gen_len': 67.0143,
 'eval_runtime': 551.5051,
 'eval_samples_per_second': 1.269,
 'eval_steps_per_second': 0.317,
 'epoch': 3.0}

In [None]:
model_path = "/content/finetuned_on_citesum_bart_text_summarisation/checkpoint-1000/"
model = BartForConditionalGeneration.from_pretrained(model_path)
tokenizer = BartTokenizer.from_pretrained(model_path)

def summarize_text(input_text, max_length=1024, min_length=218, num_beams=4):

    inputs = tokenizer.encode("summarize: " + input_text, return_tensors="pt", max_length=1024, truncation=True)


    summary_ids = model.generate(inputs, max_length=max_length, min_length=min_length, num_beams=num_beams, length_penalty=2.0, early_stopping=True)


    summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    return summary


input_text = """
Sure! Here is a 1000-word article on neural networks:

---

### Neural Networks: The Building Blocks of Modern AI

#### Introduction

Neural networks, inspired by the human brain's architecture, have revolutionized the field of artificial intelligence (AI) and machine learning (ML). These complex systems of interconnected nodes, or neurons, mimic the way our brain processes information, enabling machines to learn from data, recognize patterns, and make decisions. From powering search engines and recommendation systems to driving advances in medical diagnosis and autonomous vehicles, neural networks are at the core of many modern AI applications.

#### History and Evolution

The concept of neural networks dates back to the 1940s when Warren McCulloch and Walter Pitts introduced the idea of artificial neurons. However, it wasn't until the 1980s, with the development of backpropagation algorithms, that neural networks gained significant traction. This algorithm, introduced by Geoffrey Hinton and others, allowed networks to learn from errors by adjusting weights iteratively, leading to better accuracy.

The late 2000s and early 2010s marked a pivotal period with the rise of deep learning—a subset of machine learning involving neural networks with many layers (hence the term "deep"). Advances in computational power, availability of large datasets, and innovations like convolutional neural networks (CNNs) and recurrent neural networks (RNNs) propelled deep learning to the forefront of AI research.

#### Structure of Neural Networks

A neural network consists of an input layer, one or more hidden layers, and an output layer. Each layer is composed of nodes, or artificial neurons, which are connected by edges, or weights.

1. **Input Layer**: This layer receives the initial data, which could be anything from images to text or numerical values.
2. **Hidden Layers**: These layers perform complex transformations and feature extraction on the input data. The more hidden layers a network has, the deeper it is.
3. **Output Layer**: This layer produces the final result, such as a classification label, a regression value, or a generated output.

Each neuron in a layer takes the weighted sum of inputs from the previous layer, applies an activation function, and passes the result to the next layer. Common activation functions include the sigmoid, tanh, and ReLU (Rectified Linear Unit), each introducing non-linearity to help the network learn complex patterns.

#### Types of Neural Networks

1. **Feedforward Neural Networks (FNNs)**: The simplest type of neural network where information moves in one direction—from the input layer to the output layer. These are commonly used for tasks like image classification and regression.

2. **Convolutional Neural Networks (CNNs)**: Specifically designed for processing grid-like data such as images. CNNs use convolutional layers to automatically and adaptively learn spatial hierarchies of features from input images. They are highly effective for image recognition, object detection, and computer vision tasks.

3. **Recurrent Neural Networks (RNNs)**: Ideal for sequential data, such as time series or natural language. RNNs have connections that loop back on themselves, enabling them to maintain memory of previous inputs. Long Short-Term Memory (LSTM) and Gated Recurrent Unit (GRU) are advanced RNN architectures that address issues like vanishing gradients, making them suitable for tasks like language modeling and speech recognition.

4. **Generative Adversarial Networks (GANs)**: Consist of two networks—a generator and a discriminator—that compete against each other. The generator creates fake data, while the discriminator evaluates its authenticity. This adversarial process leads to highly realistic data generation, used in applications like image synthesis and super-resolution.

5. **Autoencoders**: Unsupervised learning models designed to learn efficient codings of input data. They consist of an encoder to compress the data and a decoder to reconstruct it. Variants like Variational Autoencoders (VAEs) are used for generative modeling.

#### Training Neural Networks

Training a neural network involves optimizing the weights to minimize the difference between the predicted output and the actual target. This is typically done using a process called backpropagation, combined with an optimization algorithm like stochastic gradient descent (SGD). The steps are as follows:

1. **Forward Pass**: Compute the output by passing the input through the network.
2. **Loss Calculation**: Measure the error using a loss function (e.g., mean squared error for regression, cross-entropy for classification).
3. **Backward Pass**: Calculate the gradient of the loss with respect to each weight using backpropagation.
4. **Weight Update**: Adjust the weights using an optimization algorithm to reduce the error.

#### Challenges and Advances

While neural networks have achieved remarkable success, they also present challenges:

1. **Data Requirements**: Deep networks require large amounts of labeled data for training, which can be expensive and time-consuming to collect.
2. **Computational Resources**: Training deep networks demands significant computational power, often requiring specialized hardware like GPUs and TPUs.
3. **Overfitting**: Deep networks can easily overfit to training data, necessitating techniques like dropout, regularization, and data augmentation to improve generalization.
4. **Interpretability**: Neural networks are often seen as "black boxes," making it difficult to understand how they make decisions. This lack of transparency can be problematic in critical applications like healthcare and finance.

Recent advances address some of these challenges. Transfer learning allows pre-trained models to be fine-tuned on specific tasks with less data. Techniques like neural architecture search (NAS) automate the design of efficient networks. Explainable AI (XAI) aims to make neural networks more interpretable and trustworthy.

#### Applications

Neural networks are at the heart of many transformative technologies:

- **Computer Vision**: Used in facial recognition, object detection, and medical image analysis.
- **Natural Language Processing (NLP)**: Powers applications like machine translation, sentiment analysis, and chatbots.
- **Speech Recognition**: Enables voice assistants, transcription services, and real-time translation.
- **Healthcare**: Assists in disease diagnosis, drug discovery, and personalized treatment plans.
- **Autonomous Vehicles**: Facilitates perception, decision-making, and control systems for self-driving cars.
- **Finance**: Enhances fraud detection, algorithmic trading, and risk management.

#### Conclusion

Neural networks have profoundly impacted the field of artificial intelligence, enabling machines to perform tasks that once seemed impossible. As research continues to advance, neural networks will undoubtedly play a crucial role in shaping the future of technology, driving innovation across various industries and improving our daily lives. Understanding their principles, capabilities, and limitations is essential for harnessing their full potential and addressing the challenges they present.
"""

# Generate and print the summary
summary = summarize_text(input_text)
print("Summary:", summary)

Summary: The concept of neural networks dates back to the 1940s when Warren McCulloch and Walter Pitts introduced the idea of artificial neurons, but it wasn't until the 1980s, with the development of backpropagation algorithms, that neural networks gained significant traction in the field of machine learning REF, where they were used to learn from errors by adjusting weights iteratively, leading to better accuracy in image recognition and object classification tasks, and for speech recognition in the 2010s, when Geoffrey Hinton and others introduced the concept of recurrent neural networks (RNNs), which loop back on themselves, enabling them to maintain memory of previous inputs, and are used in tasks like image classification and object recognition. In the same year, a 1000-word article on neural networks was published in REF on the topic of "Neural Networks: The Building Blocks of Modern AI," where the author provided an overview of the evolution of the field and some of the current