In [1]:
! pip install transformers




In [2]:
! pip show transformers


Name: transformers
Version: 4.41.1
Summary: State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow
Home-page: https://github.com/huggingface/transformers
Author: The Hugging Face team (past and future) with the help of all our contributors (https://github.com/huggingface/transformers/graphs/contributors)
Author-email: transformers@huggingface.co
License: Apache 2.0 License
Location: /usr/local/lib/python3.10/dist-packages
Requires: filelock, huggingface-hub, numpy, packaging, pyyaml, regex, requests, safetensors, tokenizers, tqdm
Required-by: bert-extractive-summarizer


In [3]:
from transformers import BartForConditionalGeneration, BartTokenizer


In [7]:
# Load the pre-trained BART model and tokenizer
model_name = 'facebook/bart-large-cnn'
model = BartForConditionalGeneration.from_pretrained(model_name)
tokenizer = BartTokenizer.from_pretrained(model_name)


In [8]:
# Define the text you want to summarize
text = '''
       Scientists say they have discovered a new species of orangutans on Indonesia’s island of Sumatra.
The population differs in several ways from the two existing orangutan species found in Sumatra and the neighboring island of Borneo.
The orangutans were found inside North Sumatra’s Batang Toru forest, the science publication Current Biology reported.
Researchers named the new species the Tapanuli orangutan. They say the animals are considered a new species because of genetic, skeletal and tooth differences.
Michael Kruetzen is a geneticist with the University of Zurich who has studied the orangutans for several years. He said he was excited to be part of the unusual discovery of a new great ape in the present day. He noted that most great apes are currently considered endangered or severely endangered.
Gorillas, chimpanzees and bonobos also belong to the great ape species.
Orangutan – which means person of the forest in the Indonesian and Malay languages - is the world’s biggest tree-living mammal. The orange-haired animals can move easily among the trees because their arms are longer than their legs. They live more lonely lives than other great apes, spending a lot of time sleeping and eating fruit in the forest.
The new study said fewer than 800 of the newly-described orangutans exist. Their low numbers make the group the most endangered of all the great ape species.
They live within an area covering about 1,000 square kilometers. The population is considered highly vulnerable. That is because the environment which they depend on is greatly threatened by development.
Researchers say if steps are not taken quickly to reduce the current and future threats, the new species could become extinct “within our lifetime.”
Research into the new species began in 2013, when an orangutan protection group in Sumatra found an injured orangutan in an area far away from the other species. The adult male orangutan had been beaten by local villagers and died of his injuries. The complete skull was examined by researchers.
Among the physical differences of the new species are a notably smaller head and frizzier hair. The Tapanuli orangutans also have a different diet and are found only in higher forest areas.
There is no unified international system for recognizing new species. But to be considered, discovery claims at least require publication in a major scientific publication.
Russell Mittermeier is head of the primate specialist group at the International Union for the Conservation of Nature. He called the finding a “remarkable discovery.” He said it puts responsibility on the Indonesian government to help the species survive.
Matthew Nowak is one of the writers of the study. He told the Associated Press that there are three groups of the Tapanuli orangutans that are separated by non-protected land.He said forest land needs to connect the separated groups.
In addition, the writers of the study are recommending that plans for a hydropower center in the area be stopped by the government.
It also recommended that remaining forest in the Sumatran area where the orangutans live be protected.
I’m Bryan Lynn.

        '''

In [9]:
# Tokenize the input text and prepare it for the model
inputs = tokenizer(text, max_length=2048, return_tensors='pt', truncation=True)
inputs

{'input_ids': tensor([[    0, 50118,  1437,  1437,  1437,  1437,  1437,  1437, 14008,   224,
            51,    33,  2967,    10,    92,  4707,     9,    50,  1097,  1182,
          1253,    15,  5722,    17,    27,    29,  2946,     9,  9430, 15560,
             4, 50118,   133,  1956, 31381,    11,   484,  1319,    31,     5,
            80,  2210,    50,  1097, 22165,  4707,   303,    11,  9430, 15560,
             8,     5, 10935,  2946,     9,   163, 12266,   139,     4, 50118,
           133,    50,  1097,  1182,  1253,    58,   303,  1025,   369,  9430,
         15560,    17,    27,    29,  8324,  1097,  6623,   257,  6693,     6,
             5,  2866,  5362,  9149, 28543,   431,     4, 50118, 40450,  1440,
             5,    92,  4707,     5,   255, 20948, 18425,    50,  1097, 22165,
             4,   252,   224,     5,  3122,    32,  1687,    10,    92,  4707,
           142,     9,  9186,     6, 39351,     8, 13495,  5550,     4, 50118,
         14009, 19262,   594,  7889,  

In [10]:
# Generate the summary
summary_ids = model.generate(inputs['input_ids'], max_length=300, min_length=80, length_penalty=2.0, num_beams=4, early_stopping=True)
summary_ids

tensor([[    2,     0, 40450,  1440,     5,    92,  4707,     5,   255, 20948,
         18425,    50,  1097, 22165,     4,   252,   224,     5,  3122,    32,
          1687,    10,    92,  4707,   142,     9,  9186,     6, 39351,     8,
         13495,  5550,     4, 13464,   254,    87,  5735,     9,     5,  3862,
            12, 30343,    50,  1097,  1182,  1253,  5152,     4,  2667,   614,
          1530,   146,     5,   333,     5,   144, 14739,     9,    70,     5,
           372, 40653,  4707,     4,    20,  1956,    16,  1687,  2200,  4478,
           142,     5,  1737,    61,    51,  6723,    15,    16,  8908,  3711,
            30,   709,     4,     2]])

In [11]:
# Decode the generated summary
summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
summary

'Researchers named the new species the Tapanuli orangutan. They say the animals are considered a new species because of genetic, skeletal and tooth differences. Fewer than 800 of the newly-described orangutans exist. Their low numbers make the group the most endangered of all the great ape species. The population is considered highly vulnerable because the environment which they depend on is greatly threatened by development.'

In [12]:
# All together

In [13]:
from transformers import BartForConditionalGeneration, BartTokenizer

# Load the pre-trained BART model and tokenizer
model_name = 'facebook/bart-large-cnn'
model = BartForConditionalGeneration.from_pretrained(model_name)
tokenizer = BartTokenizer.from_pretrained(model_name)

# Define the text you want to summarize
text = '''
       Scientists say they have discovered a new species of orangutans on Indonesia’s island of Sumatra.
The population differs in several ways from the two existing orangutan species found in Sumatra and the neighboring island of Borneo.
The orangutans were found inside North Sumatra’s Batang Toru forest, the science publication Current Biology reported.
Researchers named the new species the Tapanuli orangutan. They say the animals are considered a new species because of genetic, skeletal and tooth differences.
Michael Kruetzen is a geneticist with the University of Zurich who has studied the orangutans for several years. He said he was excited to be part of the unusual discovery of a new great ape in the present day. He noted that most great apes are currently considered endangered or severely endangered.
Gorillas, chimpanzees and bonobos also belong to the great ape species.
Orangutan – which means person of the forest in the Indonesian and Malay languages - is the world’s biggest tree-living mammal. The orange-haired animals can move easily among the trees because their arms are longer than their legs. They live more lonely lives than other great apes, spending a lot of time sleeping and eating fruit in the forest.
The new study said fewer than 800 of the newly-described orangutans exist. Their low numbers make the group the most endangered of all the great ape species.
They live within an area covering about 1,000 square kilometers. The population is considered highly vulnerable. That is because the environment which they depend on is greatly threatened by development.
Researchers say if steps are not taken quickly to reduce the current and future threats, the new species could become extinct “within our lifetime.”
Research into the new species began in 2013, when an orangutan protection group in Sumatra found an injured orangutan in an area far away from the other species. The adult male orangutan had been beaten by local villagers and died of his injuries. The complete skull was examined by researchers.
Among the physical differences of the new species are a notably smaller head and frizzier hair. The Tapanuli orangutans also have a different diet and are found only in higher forest areas.
There is no unified international system for recognizing new species. But to be considered, discovery claims at least require publication in a major scientific publication.
Russell Mittermeier is head of the primate specialist group at the International Union for the Conservation of Nature. He called the finding a “remarkable discovery.” He said it puts responsibility on the Indonesian government to help the species survive.
Matthew Nowak is one of the writers of the study. He told the Associated Press that there are three groups of the Tapanuli orangutans that are separated by non-protected land.He said forest land needs to connect the separated groups.
In addition, the writers of the study are recommending that plans for a hydropower center in the area be stopped by the government.
It also recommended that remaining forest in the Sumatran area where the orangutans live be protected.
I’m Bryan Lynn.

        '''

# Tokenize the input text and prepare it for the model
inputs = tokenizer(text, max_length=2048, return_tensors='pt', truncation=True)

# Generate the summary
summary_ids = model.generate(inputs['input_ids'], max_length=500, min_length=1000, length_penalty=2.0, num_beams=4, early_stopping=True)

# Decode the generated summary
summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)

# Print the summary
# print(summary)




In [14]:
summary

'Researchers named the new species the Tapanuli orangutan. They say the animals are considered a new species because of genetic, skeletal and tooth differences. Fewer than 800 of the newly-described orangutans exist. Their low numbers make the group the most endangered of all the great ape species. They live within an area covering about 1,000 square kilometers. The population is considered highly vulnerable. That is because the environment which they depend on is greatly threatened by development. Researchers say if steps are not taken quickly to reduce the current and future threats, the new. species could become extinct “within our lifetime” The orange-haired animals can move easily among the trees because their arms are longer than their legs. They are the world’s biggest tree-living mammal. The species is also home to the gorillas, chimpanzees, bonobos and other great apes. The study recommends that remaining forest in the Sumatran area where they live be protected. It also recomm