In [2]:
text = """
Encoder: The encoder is composed of a stack of N = 6 identical layers. Each layer has two sub-layers. The first is a multi-head self-attention mechanism, and the second is a simple, position- wise fully connected feed-forward network. We employ a residual connection [11] around each of the two sub-layers, followed by layer normalization [1]. That is, the output of each sub-layer is LayerNorm(x + Sublayer(x)), where Sublayer(x) is the function implemented by the sub-layer itself. To facilitate these residual connections, all sub-layers in the model, as well as the embedding layers, produce outputs of dimension dmodel = 512.
Decoder: The decoder is also composed of a stack of N = 6 identical layers. In addition to the two sub-layers in each encoder layer, the decoder inserts a third sub-layer, which performs multi-head attention over the output of the encoder stack. Similar to the encoder, we employ residual connections around each of the sub-layers, followed by layer normalization. We also modify the self-attention sub-layer in the decoder stack to prevent positions from attending to subsequent positions. This masking, combined with fact that the output embeddings are offset by one position, ensures that the predictions for position i can depend only on the known outputs at positions less than i.
"""

In [None]:
# from langchain.prompts import PromptTemplate
from langchain_community.llms import Ollama
from langchain.schema import HumanMessage, SystemMessage
from langchain_community.chat_models import ChatOllama


class QuizGenerator:
    def __init__(self) -> None:
        self.system_msg = """
            Create several multiple choice questions based on provided text.
            Send questions in order of importance and quality.
            Strictly follow python-like format:
            ["Which of the following subjects could fly?", ["Cat", "Airplane", "Coffee", ""], ["Airplane"]]
            ["Which component(s) do(es) not exists in physical world?", ["Network Interphace Cart", "Router", "Quick Sort", "Ghost"], ["Quick Sort", "Ghost"]]
            """

        self.llm = ChatOllama(
            model="llama3:8b",
            format="json",
            keep_alive=0,
            temperature=0.0,
            system=self.system_msg,
            top_k=10,
            top_p=0.05,
            verbose=True
        )

    def generate_quiz(
        self,
        text: str,  # choose a data format
    ) -> str:
        question = self.llm.invoke(
            text
        )
        return question


q = QuizGenerator()
ans = q.generate_quiz(text=text)
print(ans)

In [None]:
from langchain_community.chat_models import ChatOllama


class SummaryGenerator:
    def __init__(self) -> None:
        self.system_msg = """
        Make a summary of provided text. Find key points and build a summary around these key points.
        """

        self.llm = ChatOllama(
            model="llama3:8b",
            keep_alive=0,
            temperature=0.0,
            system=self.system_msg,
            top_k=10,
            top_p=0.05,
            verbose=True
        )

    def generate_summary(
        self,
        text: str,  # choose a data format
    ) -> str:
        question = self.llm.invoke(
            text
        )
        return question


q = SummaryGenerator()
ans = q.generate_summary(text=text)
print(ans)

In [4]:
from langchain_huggingface import HuggingFaceEndpoint

class SummaryGenerator:
    def __init__(self) -> None:
        self.system_msg = """
        Make a summary of provided text. Find key points and build a summary around these key points.
        """
        self.llm = HuggingFaceEndpoint(
            model="Falconsai/text_summarization",
            verbose=True,
            task="summarization"
        )


    def generate_summary(
        self,
        text: str,  # choose a data format
    ) -> str:
        question = self.llm.invoke(text)
        return question


if __name__ == "__main__":
    text = """
			Encoder: The encoder is composed of a stack of N = 6 identical layers. Each layer has two sub-layers. The first is a multi-head self-attention mechanism, and the second is a simple, position- wise fully connected feed-forward network. We employ a residual connection [11] around each of the two sub-layers, followed by layer normalization [1]. That is, the output of each sub-layer is LayerNorm(x + Sublayer(x)), where Sublayer(x) is the function implemented by the sub-layer itself. To facilitate these residual connections, all sub-layers in the model, as well as the embedding layers, produce outputs of dimension dmodel = 512.
			Decoder: The decoder is also composed of a stack of N = 6 identical layers. In addition to the two sub-layers in each encoder layer, the decoder inserts a third sub-layer, which performs multi-head attention over the output of the encoder stack. Similar to the encoder, we employ residual connections around each of the sub-layers, followed by layer normalization. We also modify the self-attention sub-layer in the decoder stack to prevent positions from attending to subsequent positions. This masking, combined with fact that the output embeddings are offset by one position, ensures that the predictions for position i can depend only on the known outputs at positions less than i.
		"""
    s = SummaryGenerator()
    ans = s.generate_summary(text=text)
    print(ans)


ValidationError: 1 validation error for HuggingFaceEndpoint
__root__
  Please specify an `endpoint_url` or `repo_id` for the model. (type=value_error)

In [6]:
from transformers import T5ForConditionalGeneration, AutoTokenizer, pipeline
from langchain_huggingface import HuggingFacePipeline

model_id = "Falconsai/text_summarization"  # possibly best one out of 3
tokenizer = AutoTokenizer.from_pretrained(model_id)

model = T5ForConditionalGeneration.from_pretrained(
    pretrained_model_name_or_path=model_id
)

In [12]:
pipe = pipeline("summarization", model=model, tokenizer=tokenizer, max_new_tokens=100, top_k=50, temperature=0.1, do_sample=True)
llm = HuggingFacePipeline(pipeline=pipe)
llm.invoke(text)

'The encoder is composed of a stack of N = 6 identical layers . Each layer has two sub-layers . The first is a multi-head self-attention mechanism .'

In [10]:
llm = HuggingFacePipeline.from_model_id(
            model_id="Falconsai/text_summarization",
            task="summarization",
            #pipeline=pipe,
            
        )
llm.invoke(text)

'The encoder is composed of a stack of N = 6 identical layers . Each layer has two sub-layers . The first is a multi-head self-attention mechanism .'

In [47]:
from transformers import PegasusForConditionalGeneration, AutoTokenizer,pipeline
from langchain_huggingface import HuggingFacePipeline

model_id = "google/pegasus-xsum"
tokenizer = AutoTokenizer.from_pretrained(model_id)

model = PegasusForConditionalGeneration.from_pretrained(
    pretrained_model_name_or_path=model_id
)


pipe = pipeline("summarization", model=model, tokenizer=tokenizer, max_new_tokens=100, top_k=50, temperature=0.1, do_sample=True)
llm = HuggingFacePipeline(pipeline=pipe)
llm.invoke(text)

Some weights of PegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['model.decoder.embed_positions.weight', 'model.encoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


'We present a novel encoder and decoder, both of which employ a multi-head self-attention mechanism.'

In [36]:
from transformers import BartForConditionalGeneration, AutoTokenizer,pipeline
from langchain_huggingface import HuggingFacePipeline

model_id = "facebook/bart-large-cnn"
tokenizer = AutoTokenizer.from_pretrained(model_id)

model = BartForConditionalGeneration.from_pretrained(
    pretrained_model_name_or_path=model_id
)


pipe = pipeline("summarization", model=model, tokenizer=tokenizer, max_new_tokens=100, top_k=50, temperature=0.1, do_sample=True)
llm = HuggingFacePipeline(pipeline=pipe)
llm.invoke(text)



'Encoder: The encoder is composed of a stack of N = 6 identical layers. Each layer has two sub-layers. The first is a multi-head self-attention mechanism, and the second is a simple, position- wise fully connected feed-forward network.'