#Cheat Sheet Generator with Open Source LLMs and Cohere
Made by: Wilfredo Aaron Sosa Ramos (AI Lab Manager at RealityAI Labs)

##1. Install the dependencies:

In [3]:
!pip install -q langchain langchain_core langchain_community chroma langchain_chroma langchain_groq langchain_cohere

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/44.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/250.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m250.0/250.0 kB[0m [31m7.8 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/109.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m109.1/109.1 kB[0m [31m8.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m209.2/209.2 kB[0m [31m9.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.1/3.1 MB[0m [31m54.5 MB/s[0m eta [36m0:00:00[0m
[?25h

In [9]:
!pip install -q pypdf

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/298.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.2/298.0 kB[0m [31m3.3 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m298.0/298.0 kB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0m
[?25h

##2. Set env. variables

In [4]:
import os
from google.colab import userdata
os.environ['GROQ_API_KEY'] = userdata.get('GROQ_API_KEY')
os.environ['COHERE_API_KEY'] = userdata.get('COHERE_API_KEY')

##3. Create vector store with a PDF file

###3.1. Create File Handler

In [5]:
import tempfile
import uuid
import requests

class FileHandler:
    def __init__(self, file_loader, file_extension):
        self.file_loader = file_loader
        self.file_extension = file_extension

    def load(self, url):
        # Generate a unique filename with a UUID prefix
        unique_filename = f"{uuid.uuid4()}.{self.file_extension}"

        try:
            # Download the file from the URL and save it to a temporary file
            response = requests.get(url, timeout=10)
            response.raise_for_status()  # Raise an HTTPError for bad responses

            with tempfile.NamedTemporaryFile(delete=False, prefix=unique_filename) as temp_file:
                temp_file.write(response.content)
                temp_file_path = temp_file.name

        except requests.exceptions.RequestException as req_err:
            raise Exception(f"Failed to download file from URL", url) from req_err
        except Exception as e:
            raise Exception(f"Failed to handle file download", url) from e

        # Use the file_loader to load the documents
        try:
            loader = self.file_loader(file_path=temp_file_path)
        except Exception as e:
            raise Exception(f"No file found", temp_file_path) from e

        try:
            documents = loader.load()
        except Exception as e:
            raise Exception(f"No file content available", temp_file_path) from e

        # Remove the temporary file
        os.remove(temp_file_path)

        return documents

###3.2. Create PDF document loader util function

In [6]:
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter

splitter = RecursiveCharacterTextSplitter(
    chunk_size = 1000,
    chunk_overlap = 100
)

def load_pdf_documents(pdf_url: str):
    pdf_loader = FileHandler(PyPDFLoader, "pdf")
    docs = pdf_loader.load(pdf_url)

    if docs:
        split_docs = splitter.split_documents(docs)

        return split_docs

In [10]:
docs = load_pdf_documents("https://arxiv.org/pdf/2403.03853")

In [11]:
docs

[Document(metadata={'source': '/tmp/38da2325-309c-4807-87ec-7d862dcfc5d9.pdfi8n9sao8', 'page': 0}, page_content='ShortGPT: Layers in Large Language Models are More Redun-\ndant Than You Expect\nXin Men∗\nBaichuan Inc.\nMingyu Xu∗\nBaichuan Inc.\nQingyu Zhang∗\nISCAS\nBingning Wang †\nBaichuan Inc.\nHongyu Lin\nISCAS\nYaojie Lu\nISCAS\nXianpei Han\nISCAS\nWeipeng Chen\nBaichuan Inc.\nAbstract\nAs Large Language Models (LLMs) continue to advance in performance,\ntheir size has increased significantly, with current LLMs containing billions\nor even trillions of parameters. In this study, we identify notable redun-\ndancy across the layers of LLMs, where some layers contribute minimally\nto overall network functionality. To quantify this, we introduce a metric\ncalled Block Influence (BI) which use the similarity between layer’s input\nand output to measure the importance of each layer. Based on the observa-\ntion of layer redundancy, we propose a straightforward pruning method:\nlayer rem

###3.3. Create vector store using Cohere and Chroma (For English lang.)

In [7]:
from langchain_cohere import CohereEmbeddings

embedding_model = CohereEmbeddings(model="embed-english-v3.0")

In [12]:
from langchain_community.vectorstores import Chroma

vectorstore = Chroma.from_documents(
    documents=docs,
    collection_name="rag",
    embedding=embedding_model,
)

###3.4. Obtain retriever

In [13]:
retriever = vectorstore.as_retriever(
                search_type="similarity",
                search_kwargs={'k': 4},
            )

##4. Create Pydantic Schemas:

In [14]:
from pydantic import BaseModel, Field
from typing import List

class TopicSchema(BaseModel):
    title: str = Field(..., description="The title of the topic covered in the cheat sheet.")
    description: str = Field(..., description="A brief description or summary of the topic.")
    key_points: List[str] = Field(..., description="List of key takeaways or bullet points for the topic.")
    resources: List[str] = Field(..., description="List of external resources or references for the topic.")

class ExampleSchema(BaseModel):
    example_title: str = Field(..., description="The title or name of the example.")
    code_snippet: str = Field(..., description="A code snippet or text representing the example.")
    explanation: str = Field(..., description="A detailed explanation of the example and its purpose.")
    output: str = Field(..., description="The expected output or result of the example when executed.")
    tags: List[str] = Field(..., description="Tags or keywords associated with the example for indexing.")

class SectionSchema(BaseModel):
    section_title: str = Field(..., description="The title of the section.")
    topics: List[TopicSchema] = Field(..., description="List of topics included in this section.")
    examples: List[ExampleSchema] = Field(..., description="List of examples relevant to the section.")
    summary: str = Field(..., description="A brief summary of the section's content and purpose.")
    importance: int = Field(..., description="An importance level or priority score for the section.")

class MetadataSchema(BaseModel):
    author: str = Field(..., description="The name of the cheat sheet's author.")
    version: str = Field(..., description="The version of the cheat sheet.")
    last_updated: str = Field(..., description="The last updated date of the cheat sheet in ISO 8601 format.")
    created_date: str = Field(..., description="The creation date of the cheat sheet in ISO 8601 format.")
    tags: List[str] = Field(..., description="A list of tags or keywords describing the cheat sheet.")
    intended_audience: str = Field(..., description="The primary audience or user group for the cheat sheet.")
    license: str = Field(..., description="The licensing information for using or sharing the cheat sheet.")

class CheatSheetSchema(BaseModel):
    title: str = Field(..., description="The main title of the cheat sheet.")
    description: str = Field(..., description="A brief overview or introduction to the cheat sheet.")
    metadata: MetadataSchema = Field(..., description="Metadata information about the cheat sheet.")
    sections: List[SectionSchema] = Field(..., description="A list of sections containing the cheat sheet's content.")
    total_sections: int = Field(..., description="The total number of sections in the cheat sheet.")
    difficulty_level: str = Field(..., description="The difficulty level of the cheat sheet content.")
    usage_scenarios: List[str] = Field(..., description="Scenarios or contexts where this cheat sheet is useful.")

##5. Create RAG chains with Llama and Mixtral

In [18]:
from langchain_core.output_parsers import StrOutputParser, JsonOutputParser

In [19]:
from langchain_groq import ChatGroq
llm_llama_3_1 = ChatGroq(model="llama-3.1-8b-instant", temperature=0)
llm_mixtral_8_7b = ChatGroq(model="mixtral-8x7b-32768", temperature=0)

In [20]:
topic = "Create a Cheat Sheet for Large Language Models"
language = "English"
context = retriever.invoke(topic)

In [21]:
context

[Document(metadata={'page': 8, 'source': '/tmp/38da2325-309c-4807-87ec-7d862dcfc5d9.pdfi8n9sao8'}, page_content='that compared to multiple-choice tasks, generative tasks face the problem of accumulated\nerrors and large model is more robust than small one. The reasons behind it still need to\nbe explored. The post-training techniques discussed in Section 4.6 have the potential to\nmitigate this issue and warrant further exploration.\n6 Related works\nTo reduce the inference cost of large language models and increase their practical applica-\ntions, there have been many recent works on compressing models, which can be classified\ninto two categories: model pruning and quantization. Besides, there are some works aim to\nstudy the redundancy of model which is essential for compressing models.\nModel pruning: model pruning (LeCun et al., 1989; Han et al., 2015) is a classic and effective\nmethod of reducing model redundancy modules to compress models. The model pruning\nmethods mainly incl

In [23]:
from langchain_core.prompts import ChatPromptTemplate, PromptTemplate

def generate_cheatsheet_topics(topic, language, context):
    # System prompt
    system = """
    You are an expert assistant specializing in creating well-structured and insightful cheat sheets.
    When given a topic, your task is to brainstorm and return a comprehensive list of related subtopics that
    are relevant, practical, and helpful for creating a cheat sheet. You must strictly adhere to the following:

    1. **Language Specification**: Always return the subtopics in the language requested by the user (e.g., Spanish, English, etc.).
    2. **Structure**:
        - Each subtopic must have a descriptive title.
        - Optionally, include a one-sentence description of each subtopic if requested.
    3. **Coverage**:
        - Cover fundamental concepts, advanced details, practical applications, and examples.
        - Include tools, frameworks, or methodologies relevant to the topic.
    4. **Clarity**:
        - Use clear, concise language suitable for the given audience's level (beginner, intermediate, advanced).
    5. **Examples**:
        - Provide diverse subtopics catering to different perspectives or practical uses of the main topic.
    6. **Scalability**:
        - Ensure the subtopics are modular for easy expansion or division into cheat sheet sections.
    """

    # Prompt template
    prompt = ChatPromptTemplate.from_messages(
        [
            ("system", system),
            ("human", """
            User's topic and language request:
            **Topic**: {topic}
            **Language**: {language}

            You must use the following context: {context}.

            Generate a list of subtopics for the cheat sheet with the above context.
            """),
        ]
    )

    # Chain definition
    rag_chain = prompt | llm_llama_3_1 | StrOutputParser()

    # Debugging information
    print(f"Generating cheat sheet topics for: {topic} in {language}")

    # Run the chain
    generation = rag_chain.invoke({
        "context": context,
        "topic": topic,
        "language": language,
    })

    # Return generation results
    return generation

In [24]:
cheat_sheet_topics = generate_cheatsheet_topics(topic, language, context)

Generating cheat sheet topics for: Create a Cheat Sheet for Large Language Models in English


In [26]:
print(cheat_sheet_topics)

Here's a comprehensive list of subtopics for the cheat sheet on "Create a Cheat Sheet for Large Language Models" in English:

### 1. **Understanding Large Language Models**

* **1.1. Definition and Overview**: Define what large language models are and their significance in natural language processing.
* **1.2. Types of Large Language Models**: Discuss the different types of large language models, including transformer-based models and recurrent neural networks.
* **1.3. Advantages and Challenges**: Highlight the advantages and challenges of large language models, including their ability to handle complex tasks and their high computational requirements.

### 2. **Model Pruning Techniques**

* **2.1. Introduction to Model Pruning**: Explain the concept of model pruning and its importance in reducing the size of large language models.
* **2.2. Unstructured Pruning**: Describe unstructured pruning, including its methods and applications.
* **2.3. Structured Pruning**: Explain structured pr

In [35]:
def create_cheatsheet(list_topics):
    # Parser
    parser = JsonOutputParser(pydantic_object=CheatSheetSchema)

    # Prompt
    prompt_message = """
    You are an advanced assistant specializing in generating structured and insightful cheat sheets.
    When given a list of subtopics and their descriptions, your task is to create a fully detailed
    cheat sheet strictly following the structure defined below:

    1. Use the following structure:
       - `title`: The overall title of the cheat sheet.
       - `description`: A brief introduction to the cheat sheet, explaining its purpose and scope.
       - `metadata`: Include `author`, `version`, `last_updated`, `created_date`, `tags`, `intended_audience`, and `license`.
       - `sections`: Divide content into logical sections based on the subtopics.

    2. Each section must include:
       - `section_title`: The title of the subtopic.
       - `topics`: A list of key concepts, each with:
         - `title`: Key concept title.
         - `description`: Summary of the concept.
         - `key_points`: List of essential points to understand.
         - `resources`: Links or references for further exploration.
       - `examples`: A list of examples, each with:
         - `example_title`: Name of the example.
         - `code_snippet`: Relevant code or practical example text.
         - `explanation`: Explanation of the example's purpose.
         - `output`: Expected output or results.
         - `tags`: Keywords related to the example.
       - `summary`: Summarize the section content in 1–2 sentences.
       - `importance`: A numerical score (1–10) for the section's importance.

    3. Metadata Rules:
       - Always include metadata like the author's name, the cheat sheet's version, and a list of tags relevant to the topic.
       - Specify the intended audience (e.g., beginners, intermediate users, experts).

    4. Additional Information:
       - Include at least one practical example in each section.
       - Ensure the content is written in the language requested by the user.

    Use this list of topics:
    <list_topics>
    {list_topics}
    </list_topics>

    You must respond as a JSON following this format:

    <format_instruction>
    {format_instructions}
    </format_instruction>
    """

    prompt = PromptTemplate(
        template=prompt_message,
        input_variables=["list_topics"],
        partial_variables={"format_instructions": parser.get_format_instructions()},
    )

    # Chain
    cheat_sheet_chain = prompt | llm_mixtral_8_7b | parser

    # Run
    cheatsheet_response = cheat_sheet_chain.invoke(list_topics)

    print(f"Generated cheat sheet based on the topics: {cheatsheet_response}")

    return cheatsheet_response

In [36]:
cheat_sheet = create_cheatsheet(cheat_sheet_topics)

Generated cheat sheet based on the topics: {'title': 'Create a Cheat Sheet for Large Language Models', 'description': 'This cheat sheet provides a comprehensive overview of large language models, including model pruning, quantization, and compression techniques. It also covers tools and frameworks, best practices, and future directions for large language models.', 'metadata': {'author': 'Advanced Assistant', 'version': '1.0', 'last_updated': '2023-03-15', 'created_date': '2023-03-01', 'tags': ['large language models', 'model pruning', 'quantization', 'compression'], 'intended_audience': 'Intermediate users, Experts', 'license': 'Creative Commons Attribution-NonCommercial 4.0 International'}, 'sections': [{'section_title': '1.1. Definition and Overview', 'topics': [{'title': 'Definition', 'description': 'Large language models are artificial neural networks designed for natural language processing tasks.', 'key_points': ['Handle complex language tasks', 'Deep learning-based', 'Transforme

In [37]:
cheat_sheet

{'title': 'Create a Cheat Sheet for Large Language Models',
 'description': 'This cheat sheet provides a comprehensive overview of large language models, including model pruning, quantization, and compression techniques. It also covers tools and frameworks, best practices, and future directions for large language models.',
 'metadata': {'author': 'Advanced Assistant',
  'version': '1.0',
  'last_updated': '2023-03-15',
  'created_date': '2023-03-01',
  'tags': ['large language models',
   'model pruning',
   'quantization',
   'compression'],
  'intended_audience': 'Intermediate users, Experts',
  'license': 'Creative Commons Attribution-NonCommercial 4.0 International'},
 'sections': [{'section_title': '1.1. Definition and Overview',
   'topics': [{'title': 'Definition',
     'description': 'Large language models are artificial neural networks designed for natural language processing tasks.',
     'key_points': ['Handle complex language tasks',
      'Deep learning-based',
      'Tran