In [1]:
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, pipeline

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
model = AutoModelForSeq2SeqLM.from_pretrained("facebook/bart-large-cnn")
tokenizer = AutoTokenizer.from_pretrained("facebook/bart-large-cnn")

pipe = pipeline("summarization", model=model, tokenizer=tokenizer)

In [2]:
original_text="""Once upon a time in a small village nestled between green hills, there lived a young boy named Arjun. He was curious, brave, and loved exploring the forests near his home. One day, while wandering deeper than ever before, he stumbled upon an injured baby deer trapped under a fallen tree. Despite being scared, Arjun carefully lifted the branches and freed the animal.
Grateful, the deer followed him back to the village, and soon, the villagers noticed that it had extraordinary abilities—it could find hidden water sources, alert people to danger, and even heal minor wounds with its presence. Word of the magical deer spread, attracting travelers and merchants from faraway lands.
However, not everyone had good intentions. A greedy landowner wanted to capture the deer and exploit its powers for profit. Arjun, understanding the danger, devised a plan. He led the deer to the hills and created a secret sanctuary where only those with pure hearts could find it.
Years passed, and Arjun grew up to become a wise protector of the forest. The deer lived safely, helping those who respected nature. The village prospered, and people learned an important lesson: true courage and kindness can protect what is precious, and sometimes, the smallest acts of bravery can change the fate of many."""

In [None]:
gen_kwargs = {
    "max_length": 300,
    "min_length": 100,
    "num_beams": 4,
    "length_penalty": 2.0,
    "early_stopping": True,
}


In [70]:
text = "summarize: " + original_text
pipe(text, **gen_kwargs)

Your max_length is set to 300, but your input_length is only 268. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=134)


[{'summary_text': 'A young boy named Arjun found an injured baby deer trapped under a fallen tree. The deer had extraordinary abilities, including the ability to find hidden water sources and alert people to danger. A greedy landowner wanted to capture the deer and exploit its powers for profit. Arjun, understanding the danger, devised a plan. He led the deer to the hills and created a secret sanctuary where only those with pure hearts could find it.Years passed, and Arjun grew up to become a wise protector of the forest. The village prospered, and people learned an important lesson.'}]

In [73]:
model.save_pretrained("facebbok_model")
tokenizer.save_pretrained("facebook_tokenizer")




('facebook_tokenizer\\tokenizer_config.json',
 'facebook_tokenizer\\special_tokens_map.json',
 'facebook_tokenizer\\vocab.json',
 'facebook_tokenizer\\merges.txt',
 'facebook_tokenizer\\added_tokens.json',
 'facebook_tokenizer\\tokenizer.json')

In [3]:
import os

In [6]:
%pwd

'd:\\text_Summerizer\\text_summarizer'

In [5]:
os.chdir("../")

In [7]:
path="artifacts/model_trainer/facebook_model"

In [8]:
imp_model=AutoModelForSeq2SeqLM.from_pretrained(path)
imp_token=AutoTokenizer.from_pretrained(path)



In [9]:
text = "summarize: " + original_text

In [12]:
pipe = pipeline("summarization", model=imp_model, tokenizer=imp_token)
output=pipe(text, **gen_kwargs)


Device set to use cuda:0
Your max_length is set to 300, but your input_length is only 268. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=134)


In [13]:
trans_text=output[0]["summary_text"]
trans_text

'A young boy named Arjun found an injured baby deer trapped under a fallen tree. The deer had extraordinary abilities, including the ability to find hidden water sources and alert people to danger. A greedy landowner wanted to capture the deer and exploit its powers for profit. Arjun, understanding the danger, devised a plan. He led the deer to the hills and created a secret sanctuary where only those with pure hearts could find it.Years passed, and Arjun grew up to become a wise protector of the forest. The village prospered, and people learned an important lesson.'

In [14]:
import torch
from transformers import AutoModelForCausalLM, pipeline

In [18]:
trans_path="Helsinki-NLP/opus-mt-en-hi"

In [20]:
trans_tokenizer=AutoTokenizer.from_pretrained(trans_path)
trans_model=AutoModelForSeq2SeqLM.from_pretrained(trans_path)



Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


In [21]:
trans_=pipeline("translation", model="Helsinki-NLP/opus-mt-en-hi")

Device set to use cuda:0


In [24]:
trans_(trans_text)[0]['translation_text']

'एक नौजवान ने एक घायल बच्चे को एक जंगली पेड़ के नीचे फंसते हुए पाया. हिरणों के पास असाधारण क्षमता थी, जिसमें गुप्त जल स्रोत और खतरे के लिए सतर्क लोगों को खोजने की क्षमता भी शामिल थी. एक लालची भूमि मालिक चाहता था कि हरिण को पकड़ ले और अपने बलों को लाभ पहुँचाने के लिए उसकी शक्\u200dति का लाभ उठाना चाहता था. एक आदमी, समझ - प्राप्त करना चाहता था कि एक योजना, एक योजना की ओर ले जाता है. वहाँ एक रहस्य और वहाँ उन लोगों को पाया जाता है जो एक पाकस्थान के साथ रहते थे, और एक बुद्धिमान गाँव में रहते थे. यह एक बहुत ही बुद्धिमान गाँव के लिए एक बहुत बड़ा सबक है.'

In [25]:
trans_model.save_pretrained("translation_model")



In [27]:
trans_tokenizer.save_pretrained("translation_tokenizer")

('translation_tokenizer\\tokenizer_config.json',
 'translation_tokenizer\\special_tokens_map.json',
 'translation_tokenizer\\vocab.json',
 'translation_tokenizer\\source.spm',
 'translation_tokenizer\\target.spm',
 'translation_tokenizer\\added_tokens.json')

In [15]:
tokenizer=AutoTokenizer.from_pretrained("artifacts/model_translation/translation_tokenizer")
model=AutoModelForSeq2SeqLM.from_pretrained("artifacts/model_translation/translation_model")



In [16]:
trans_pipe=pipeline("translation",model=model,tokenizer=tokenizer)

Device set to use cuda:0


In [17]:
output=trans_pipe(trans_text)[0]['translation_text']

In [18]:
output

'एक नौजवान ने एक घायल बच्चे को एक जंगली पेड़ के नीचे फंसते हुए पाया. हिरणों के पास असाधारण क्षमता थी, जिसमें गुप्त जल स्रोत और खतरे के लिए सतर्क लोगों को खोजने की क्षमता भी शामिल थी. एक लालची भूमि मालिक चाहता था कि हरिण को पकड़ ले और अपने बलों को लाभ पहुँचाने के लिए उसकी शक्\u200dति का लाभ उठाना चाहता था. एक आदमी, समझ - प्राप्त करना चाहता था कि एक योजना, एक योजना की ओर ले जाता है. वहाँ एक रहस्य और वहाँ उन लोगों को पाया जाता है जो एक पाकस्थान के साथ रहते थे, और एक बुद्धिमान गाँव में रहते थे. यह एक बहुत ही बुद्धिमान गाँव के लिए एक बहुत बड़ा सबक है.'