In [1]:
import requests
import warnings
from pathlib import Path

import pandas as pd
from langchain import PromptTemplate
from langchain.chains.summarize import load_summarize_chain
from langchain.document_loaders import PyPDFLoader
from langchain_community.llms import Ollama
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler

warnings.filterwarnings("ignore")

In [2]:
llm = Ollama(model="mistral", callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]))

In [14]:
data_folder = Path.cwd() / "data"
data_folder.mkdir(parents=True, exist_ok=True)

pdf_url = "https://cloudely.com/wp-content/uploads/2024/01/The-Beginners-Guide-to-Large-Language-Models-1.pdf"

pdf_file = str(data_folder / pdf_url.split("/")[-1])
response = requests.get(pdf_url)

# Ensure the response is successful
if response.status_code == 200:
    # Write the content to a file
    with open(pdf_file, 'wb') as file:
        file.write(response.content)
    print(f"PDF downloaded successfully to {pdf_file}")
else:
    print(f"Failed to download PDF. Status code: {response.status_code}")

PDF downloaded successfully to /Users/aboniasojasingarayar/Documents/Github/Tutorial/data/The-Beginners-Guide-to-Large-Language-Models-1.pdf


In [15]:
pdf_loader = PyPDFLoader(pdf_file)
pages = pdf_loader.load_and_split()
print(pages[2].page_content)

Types of Large Language Models
There are several significant types of LLMs, which utilize different
architectures and training techniques:
Autoregressive Models
These were some of the earliest LLMs, including OpenAI's GPT
models. They are trained to predict the next word or token in a
sequence using previous context. Generating text is done
sequentially, one token at a time.
Autoencoding Models
Examples are BERT, T5, and BART. These models encode the entire
input sequence into a latent representation and then decode it back
into the original sequence. This allows them to be fine-tuned for
various downstream NLP tasks.
Encoder-Decoder Models
Like GPT-3, these models have separate encoder and decoder
components. The encoder ingests the input text while the decoder
generates the output text. This architecture provides more flexibility.
Sparse Models
To reduce computational costs, some models use sparse
representations and attention mechanisms. Sparse Transformer is
one example of this app

# Method 1: Stuffing

In [5]:
prompt_template = """Write a concise summary of the following text delimited by triple backquotes.
              Return your response in bullet points which covers the key points of the text.
              ```{text}```
              BULLET POINT SUMMARY:
  """

prompt = PromptTemplate(template=prompt_template, input_variables=["text"])

In [6]:
stuff_chain = load_summarize_chain(llm, chain_type="stuff", prompt=prompt)

In [7]:
four_pages = pages[:4]

In [8]:
four_pages

[Document(page_content='Large Language Models \nCSC413 Tutorial 9 \nYongchao Zhou', metadata={'source': '/Users/aboniasojasingarayar/Documents/Github/Tutorial/data/tut09_llm.pdf', 'page': 0}),
 Document(page_content='Overview \n●What are LLMs? \n●Why LLMs? \n●Emergent Capabilities \n○Few-shot In-context Learning \n○Advanced Prompt Techniques \n●LLM Training \n○Architectures \n○Objectives \n●LLM Finetuning \n○Instruction ﬁnetuning \n○RLHF \n○Bootstrapping \n●LLM Risks', metadata={'source': '/Users/aboniasojasingarayar/Documents/Github/Tutorial/data/tut09_llm.pdf', 'page': 1}),
 Document(page_content='What are Language Models? \n●Narrow Sense \n○A probabilistic model that assigns a probability to every ﬁnite sequence (grammatical or not) \n●Broad Sense \n○Decoder-only models (GPT-X, OPT, LLaMA, PaLM) \n○Encoder-only models (BERT, RoBERTa, ELECTRA) \n○Encoder-decoder models (T5, BART)', metadata={'source': '/Users/aboniasojasingarayar/Documents/Github/Tutorial/data/tut09_llm.pdf', 'page':

In [9]:
try:
    print(stuff_chain.run(four_pages))
except Exception as e:
    print(
        "The code failed since it won't be able to run inference on such a huge context and throws this exception: ",
        e,
    )

 * Large Language Models (LLMs) are probabilistic models assigning a probability to every finite sequence, in both narrow and broad senses
* Narrow sense: assigns probability to grammatical or not sequences
* Broad sense: includes decoder-only, encoder-only, and encoder-decoder models with billions of parameters
* Why LLMs?
  + Emergent Capabilities
    - Few-shot In-context Learning
    - Advanced Prompt Techniques
* LLM Training: architectures, objectives, finetuning methods
* Architectures: transformer models, etc.
* Objectives: Maximum Likelihood Estimation (MLE), etc.
* Finetuning: Instruction finetuning, RLHF (Reinforcement Learning with Human Feedback), Bootstrapping
* Risks associated with LLMs * Large Language Models (LLMs) are probabilistic models assigning a probability to every finite sequence, in both narrow and broad senses
* Narrow sense: assigns probability to grammatical or not sequences
* Broad sense: includes decoder-only, encoder-only, and encoder-decoder models wit

# Method 2: MapReduce

In [16]:
data_folder = Path.cwd() / "data"
data_folder.mkdir(parents=True, exist_ok=True)

pdf_url = "https://cloudely.com/wp-content/uploads/2024/01/The-Beginners-Guide-to-Large-Language-Models-1.pdf"

pdf_file = str(data_folder / pdf_url.split("/")[-1])
response = requests.get(pdf_url)

# Ensure the response is successful
if response.status_code == 200:
    # Write the content to a file
    with open(pdf_file, 'wb') as file:
        file.write(response.content)
    print(f"PDF downloaded successfully to {pdf_file}")
else:
    print(f"Failed to download PDF. Status code: {response.status_code}")

PDF downloaded successfully to /Users/aboniasojasingarayar/Documents/Github/Tutorial/data/The-Beginners-Guide-to-Large-Language-Models-1.pdf


In [17]:
pdf_loader = PyPDFLoader(pdf_file)
pages = pdf_loader.load_and_split()
print(pages[2].page_content)

Types of Large Language Models
There are several significant types of LLMs, which utilize different
architectures and training techniques:
Autoregressive Models
These were some of the earliest LLMs, including OpenAI's GPT
models. They are trained to predict the next word or token in a
sequence using previous context. Generating text is done
sequentially, one token at a time.
Autoencoding Models
Examples are BERT, T5, and BART. These models encode the entire
input sequence into a latent representation and then decode it back
into the original sequence. This allows them to be fine-tuned for
various downstream NLP tasks.
Encoder-Decoder Models
Like GPT-3, these models have separate encoder and decoder
components. The encoder ingests the input text while the decoder
generates the output text. This architecture provides more flexibility.
Sparse Models
To reduce computational costs, some models use sparse
representations and attention mechanisms. Sparse Transformer is
one example of this app

In [18]:
map_prompt_template = """
                      Write a summary of this chunk of text that includes the main points and any important details.
                      {text}
                      """

map_prompt = PromptTemplate(template=map_prompt_template, input_variables=["text"])

combine_prompt_template = """
                      Write a concise summary of the following text delimited by triple backquotes.
                      Return your response in bullet points which covers the key points of the text.
                      ```{text}```
                      BULLET POINT SUMMARY:
                      """

combine_prompt = PromptTemplate(
    template=combine_prompt_template, input_variables=["text"]
)

In [19]:
map_reduce_chain = load_summarize_chain(
    llm,
    chain_type="map_reduce",
    map_prompt=map_prompt,
    combine_prompt=combine_prompt,
    return_intermediate_steps=True,
)

In [20]:
map_reduce_outputs = map_reduce_chain({"input_documents": pages})

 This text is titled "The Beginner's Guide to Large Language Models" from Cloudly.com. The guide aims to provide an introduction and understanding of large language models for beginners. The main points include:

1. Definition and explanation of what large language models are, emphasizing their role in natural language processing and machine learning.
2. Discussion on the differences between small and large language models, focusing on the scale of training data and the complexity of models.
3. Explanation of how large language models are built using deep learning techniques like transformers and recurrent neural networks (RNNs).
4. Overview of applications of large language models such as text generation, translation, summarization, question answering, and sentiment analysis.
5. Discussion on popular large language models like BERT, GPT-3, T5, and RoBERTa, providing brief descriptions and use cases for each.
6. Importance of fine-tuning large language models to adapt them to specific 

In [21]:
final_mp_data = []
for doc, out in zip(
    map_reduce_outputs["input_documents"], map_reduce_outputs["intermediate_steps"]
):
    output = {}
    output["file_name"] = Path(doc.metadata["source"]).stem
    output["file_type"] = Path(doc.metadata["source"]).suffix
    output["page_number"] = doc.metadata["page"]
    output["chunks"] = doc.page_content
    output["concise_summary"] = out
    final_mp_data.append(output)

In [22]:
pdf_mp_summary = pd.DataFrame.from_dict(final_mp_data)
pdf_mp_summary = pdf_mp_summary.sort_values(
    by=["file_name", "page_number"]
)  # sorting the dataframe by filename and page_number
pdf_mp_summary.reset_index(inplace=True, drop=True)
pdf_mp_summary.head()

Unnamed: 0,file_name,file_type,page_number,chunks,concise_summary
0,The-Beginners-Guide-to-Large-Language-Models-1,.pdf,0,THE BEGINNER’S GUIDE TO \nLARGE LANGUAGE MODEL...,"This text is titled ""The Beginner's Guide to ..."
1,The-Beginners-Guide-to-Large-Language-Models-1,.pdf,1,Do you ever feel curious about how human-like ...,This text introduces Large Language Models (L...
2,The-Beginners-Guide-to-Large-Language-Models-1,.pdf,2,Types of Large Language Models\nThere are seve...,The text discusses various types of Large Lan...
3,The-Beginners-Guide-to-Large-Language-Models-1,.pdf,3,How Do LLMs Really Work?\nAt their technologic...,"LLMs, or Large Language Models, utilize the t..."
4,The-Beginners-Guide-to-Large-Language-Models-1,.pdf,4,"With their powerful yet aligned properties, LL...",LLMs (Language Model Machines) have become es...


In [23]:
index = 3
print("[Context]")
print(pdf_mp_summary["chunks"].iloc[index])
print("\n\n [Simple Summary]")
print(pdf_mp_summary["concise_summary"].iloc[index])
print("\n\n [Page number]")
print(pdf_mp_summary["page_number"].iloc[index])
print("\n\n [Source: file_name]")
print(pdf_mp_summary["file_name"].iloc[index])

[Context]
How Do LLMs Really Work?
At their technological core, LLMs rely on the transformer
architecture first introduced in 2017. Transformers represent the
current state-of-the-art for language tasks by tackling long-standing
challenges with prior approaches like RNNs.
Stacked Self-Attention Layers
Transformers consist of stacked encoding-decoding layers
containing two sub-layers: multi-head self-attention followed by
point-wise feed-forward networks. Self-attention mechanisms relate
different positions in a sequence to compute representations for
downstream processing.
This attention-based approach allows relationships between all
parts of an input sequence to be mapped simultaneously in parallel.
It also avoids issues with long-term dependencies that recurrent
models struggle with.
Pretraining Through Self-Supervision
LLMs are initially trained on massive corpora using self-supervised
objectives that require no human labelling. Chief among these is the
masked language modelling (M

# Method 3: Refine

In [24]:
question_prompt_template = """
                  Please provide a summary of the following text.
                  TEXT: {text}
                  SUMMARY:
                  """

question_prompt = PromptTemplate(
    template=question_prompt_template, input_variables=["text"]
)

refine_prompt_template = """
              Write a concise summary of the following text delimited by triple backquotes.
              Return your response in bullet points which covers the key points of the text.
              ```{text}```
              BULLET POINT SUMMARY:
              """

refine_prompt = PromptTemplate(
    template=refine_prompt_template, input_variables=["text"]
)


In [25]:
refine_chain = load_summarize_chain(
    llm,
    chain_type="refine",
    question_prompt=question_prompt,
    refine_prompt=refine_prompt,
    return_intermediate_steps=True,
)

In [26]:
refine_outputs = refine_chain({"input_documents": pages})

 Title: The Beginner's Guide to Large Language Models (Source: WWW.CLOUDELY.COM)

Summary:
This beginner's guide aims to provide an understanding of large language models, their applications, and the key concepts surrounding them. It begins by explaining what large language models are – AI systems designed to generate human-like text based on input data.

The text then delves into the history of language models, starting with early statistical models like Markov Models and Hidden Markov Models, followed by the rise of deep learning models in the 2010s. The guide emphasizes that recent advancements in language models can be attributed to three key factors: massive amounts of data, increased computational power, and sophisticated model architectures.

Some popular large language models are introduced, such as BERT (Bidirectional Encoder Representations from Transformers), GPT-3 (Generative Pretrained Transformer 3), and T5 (Text-to-Text Transfer Transformer). These models have shown impr

In [27]:
final_refine_data = []
for doc, out in zip(
    refine_outputs["input_documents"], refine_outputs["intermediate_steps"]
):
    output = {}
    output["file_name"] = Path(doc.metadata["source"]).stem
    output["file_type"] = Path(doc.metadata["source"]).suffix
    output["page_number"] = doc.metadata["page"]
    output["chunks"] = doc.page_content
    output["concise_summary"] = out
    final_refine_data.append(output)

In [28]:
pdf_refine_summary = pd.DataFrame.from_dict(final_refine_data)
pdf_refine_summary = pdf_mp_summary.sort_values(
    by=["file_name", "page_number"]
)  # sorting the datafram by filename and page_number
pdf_refine_summary.reset_index(inplace=True, drop=True)
pdf_refine_summary.head()

Unnamed: 0,file_name,file_type,page_number,chunks,concise_summary
0,The-Beginners-Guide-to-Large-Language-Models-1,.pdf,0,THE BEGINNER’S GUIDE TO \nLARGE LANGUAGE MODEL...,"This text is titled ""The Beginner's Guide to ..."
1,The-Beginners-Guide-to-Large-Language-Models-1,.pdf,1,Do you ever feel curious about how human-like ...,This text introduces Large Language Models (L...
2,The-Beginners-Guide-to-Large-Language-Models-1,.pdf,2,Types of Large Language Models\nThere are seve...,The text discusses various types of Large Lan...
3,The-Beginners-Guide-to-Large-Language-Models-1,.pdf,3,How Do LLMs Really Work?\nAt their technologic...,"LLMs, or Large Language Models, utilize the t..."
4,The-Beginners-Guide-to-Large-Language-Models-1,.pdf,4,"With their powerful yet aligned properties, LL...",LLMs (Language Model Machines) have become es...


In [29]:
index = 3
print("[Context]")
print(pdf_refine_summary["chunks"].iloc[index])
print("\n\n [Simple Summary]")
print(pdf_refine_summary["concise_summary"].iloc[index])
print("\n\n [Page number]")
print(pdf_refine_summary["page_number"].iloc[index])
print("\n\n [Source: file_name]")
print(pdf_refine_summary["file_name"].iloc[index])

[Context]
How Do LLMs Really Work?
At their technological core, LLMs rely on the transformer
architecture first introduced in 2017. Transformers represent the
current state-of-the-art for language tasks by tackling long-standing
challenges with prior approaches like RNNs.
Stacked Self-Attention Layers
Transformers consist of stacked encoding-decoding layers
containing two sub-layers: multi-head self-attention followed by
point-wise feed-forward networks. Self-attention mechanisms relate
different positions in a sequence to compute representations for
downstream processing.
This attention-based approach allows relationships between all
parts of an input sequence to be mapped simultaneously in parallel.
It also avoids issues with long-term dependencies that recurrent
models struggle with.
Pretraining Through Self-Supervision
LLMs are initially trained on massive corpora using self-supervised
objectives that require no human labelling. Chief among these is the
masked language modelling (M