## ***Import Libraries***

In [None]:
# install below module
!pip install langchain
! pip install pypdf
!pip install transformers
!pip install sentence_transformers
!pip install accelerate
!pip install bitsandbytes
!pip install reportlab

Collecting reportlab
  Downloading reportlab-4.1.0-py3-none-any.whl (1.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m10.6 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: reportlab
Successfully installed reportlab-4.1.0


In [None]:
from langchain.document_loaders import TextLoader, PyPDFLoader
from transformers import pipeline
from langchain.llms import HuggingFacePipeline
from langchain import PromptTemplate
from langchain.prompts import SystemMessagePromptTemplate, HumanMessagePromptTemplate, ChatPromptTemplate


In [None]:
import warnings
warnings.filterwarnings("ignore")

## ***Load Documents***

In [None]:
from google.colab import files
import os

def loadDocuments(file_path=None):
    if file_path is None:
        uploaded = files.upload()
        file_path = list(uploaded.keys())[0]

    ext = os.path.splitext(file_path)[-1].lower()
    if ext == '.txt':
        loader = TextLoader(file_path)
    elif ext == '.pdf':
        loader = PyPDFLoader(file_path)
    else:
        print("Please upload a correct file (either .txt or .pdf)")
        return None

    documents = loader.load()
    print(documents)
    return documents

text = loadDocuments()

Saving face mask.pdf to face mask.pdf


In [None]:
texts = ""
for document in text:
  texts+=document.page_contents

lst_text=texts.split()

if len(lst_text)>=20000:
  lst_text = lst_text[:20000]
else:
  lst_text = lst_text


texts = " ".join(lst_text)

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

Tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2")
model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2",
                                         load_in_4bit=True,
                                         torch_dtype=torch.float16)

tokenizer_config.json:   0%|          | 0.00/1.46k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/72.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/596 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/3 [00:00<?, ?it/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.94G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/111 [00:00<?, ?B/s]

In [None]:
from langchain.llms import HuggingFacePipeline
from transformers import pipeline

pipe = pipeline(
                'text-generation',
                model=model,
                tokenizer=Tokenizer,
                max_length=15000,
)

local_llm = HuggingFacePipeline(pipeline=pipe)

In [None]:
from langchain.chains.question_answering import load_qa_chain

prompt_temp = """
Context: {context}

Instructions:
1. Open-ended Questions: Formulate questions that require detailed responses or explanations.
2. Multiple Choice Questions (MCQs): Create questions with four options each, including the correct answer.
3. True/False Questions: Develop statements that can be answered as true or false.
4. Provide answers for all types of questions.

Ensure the questions and answers are relevant to the provided context.

"""


prompt = PromptTemplate(input_variables=['context'], template = prompt_temp)
PROMPT = prompt.format(context = texts)


In [None]:
result= local_llm(PROMPT)

# **Result save in Pdf file**

In [None]:
from reportlab.lib.pagesizes import letter
from reportlab.pdfgen import canvas
from reportlab.pdfbase.pdfmetrics import stringWidth

def save_to_pdf(text, filename):
    c = canvas.Canvas(filename, pagesize=letter)
    textobject = c.beginText(10, 750)  # Starting position for text
    textobject.setFont("Helvetica", 12)  # Set font and size
    lines = text.split('\n')
    for line in lines:
        line = line.strip()  # Remove leading/trailing whitespaces
        width = stringWidth(line, "Helvetica", 12)
        if width < 600:  # Adjust the value according to your page width
            textobject.textLine(line)
        else:
            parts = []
            while width >= 400:  # Adjust the value according to your page width
                part_width = stringWidth(line[:len(line)//2], "Helvetica", 12)
                if part_width < 400:  # Adjust the value according to your page width
                    parts.append(line[:len(line)//2])
                    line = line[len(line)//2:]
                    width = stringWidth(line, "Helvetica", 12)
                else:
                    parts.append(line[:len(line)//3])
                    line = line[len(line)//3:]
                    width = stringWidth(line, "Helvetica", 12)
            if line:
                parts.append(line)
            for part in parts:
                textobject.textLine(part)
    c.drawText(textobject)
    c.save()

save_to_pdf(result, "output6.pdf")