## Dataset Downloading

In [None]:
from google.colab import files
files.upload()  # upload kaggle.json here


In [None]:
!rm -r ~/.kaggle
!mkdir ~/.kaggle
!mv ./kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

In [None]:
!kaggle datasets download -d gowrishankarp/newspaper-text-summarization-cnn-dailymail

In [None]:
!unzip newspaper-text-summarization-cnn-dailymail.zip

## Install Packages

In [None]:
%pip install "accelerate>=0.16.0,<1" "transformers[torch]>=4.28.1,<5" "torch>=1.13.1,<2"
!pip install langchain
!pip install datasets

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting torch<2,>=1.13.1
  Downloading torch-1.13.1-cp310-cp310-manylinux1_x86_64.whl (887.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m887.5/887.5 MB[0m [31m1.3 MB/s[0m eta [36m0:00:00[0m
Collecting nvidia-cuda-runtime-cu11==11.7.99 (from torch<2,>=1.13.1)
  Downloading nvidia_cuda_runtime_cu11-11.7.99-py3-none-manylinux1_x86_64.whl (849 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m849.3/849.3 kB[0m [31m56.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting nvidia-cudnn-cu11==8.5.0.96 (from torch<2,>=1.13.1)
  Downloading nvidia_cudnn_cu11-8.5.0.96-2-py3-none-manylinux1_x86_64.whl (557.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m557.1/557.1 MB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting nvidia-cublas-cu11==11.10.3.66 (from torch<2,>=1.13.1)
  Downloading nvidia_cublas_cu11-11.10.3.66-py3-none-man

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting datasets
  Downloading datasets-2.12.0-py3-none-any.whl (474 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m474.6/474.6 kB[0m [31m10.8 MB/s[0m eta [36m0:00:00[0m
Collecting dill<0.3.7,>=0.3.0 (from datasets)
  Downloading dill-0.3.6-py3-none-any.whl (110 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m110.5/110.5 kB[0m [31m12.2 MB/s[0m eta [36m0:00:00[0m
Collecting xxhash (from datasets)
  Downloading xxhash-3.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (212 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m212.5/212.5 kB[0m [31m25.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting multiprocess (from datasets)
  Downloading multiprocess-0.70.14-py310-none-any.whl (134 kB)
[2K     [90m━━━━━━━━━━━━━━

## Create Pipeline

In [None]:
# Import packages
import torch
from transformers import pipeline, set_seed
from langchain import PromptTemplate, LLMChain
from langchain.llms import HuggingFacePipeline
import textwrap
import nltk
import pandas as pd
from datasets import Dataset
from transformers import AutoTokenizer
from transformers import AutoModelForCausalLM, Trainer, TrainingArguments, DataCollatorForLanguageModeling

### Class PipelineModel

In [None]:
class PipelineModel:
    def __init__(self, model_path):
        self.model_path = model_path
        self.pipeline = None
        self.prompt = None
        self.prompt_with_context = None
        self.llm_chain = None
        self.llm_context_chain = None
        self.tokenizer = None
        self.model = None
        self.train_dataset = None
        self.valid_dataset = None
        self.test_dataset = None

    def create_pipeline(self):
        # Create pipeline for model
        self.pipeline = pipeline(
            model=self.model_path, torch_dtype=torch.bfloat16, trust_remote_code=True, device_map="auto", return_full_text=True
        )

        # Template for an instruction with no input
        self.prompt = PromptTemplate(input_variables=["instruction"], template="{instruction}")

        # Template for an instruction with input
        self.prompt_with_context = PromptTemplate(
            input_variables=["instruction", "context"], template="{instruction}\n\nInput:\n{context}"
        )

        self.hf_pipeline = HuggingFacePipeline(pipeline=self.pipeline)

        self.llm_chain = LLMChain(llm=self.hf_pipeline, prompt=self.prompt)
        self.llm_context_chain = LLMChain(llm=self.hf_pipeline, prompt=self.prompt_with_context)


    def summarize_large_text(self, text, chunk_size=8192, summary_length=4):
        """Summarizes a large text by chunking it into smaller parts."""
        chunks = textwrap.wrap(text, chunk_size)
        summaries = []
        for chunk in chunks:
            summary = self.llm_context_chain.predict(instruction="Briefly summarize the text.", context=chunk).lstrip()
            summary = ' '.join(nltk.tokenize.sent_tokenize(summary)[:summary_length])
            summaries.append(summary)
        return " ".join(summaries)



    def load_dataset(self, csv_file):
        df = pd.read_csv(csv_file)
        formatted_df = pd.DataFrame(
            {
                'input': 'Briefly summarize the text.\n\n' + df['article'],
                'output': df['highlights'],
            }
        )
        return Dataset.from_pandas(formatted_df)

    def train_model(self, train_file, valid_file, test_file):
        # Load your train, validation and test datasets
        self.train_dataset = self.load_dataset(train_file)
        self.valid_dataset = self.load_dataset(valid_file)
        self.test_dataset = self.load_dataset(test_file)

        self.tokenizer = AutoTokenizer.from_pretrained(self.model_path, padding_side="left")

        # Tokenize the datasets
        def tokenize_function(examples):
            return self.tokenizer(examples['input'], padding="max_length", truncation=True, max_length=2048)

        self.train_dataset = self.train_dataset.map(tokenize_function, batched=True)
        self.valid_dataset = self.valid_dataset.map(tokenize_function, batched=True)
        self.test_dataset = self.test_dataset.map(tokenize_function, batched=True)

        # Specify the target outputs
        def with_labels(examples):
            return {'labels': examples['input_ids']}  # model will try to predict its own input

        self.train_dataset = self.train_dataset.map(with_labels, batched=True)
        self.valid_dataset = self.valid_dataset.map(with_labels, batched=True)
        self.test_dataset = self.test_dataset.map(with_labels, batched=True)

        # We will ignore the 'token_type_ids' and 'attention_mask' in each example for simplicity
        self.train_dataset.set_format(type='torch', columns=['input_ids', 'labels'])
        self.valid_dataset.set_format(type='torch', columns=['input_ids', 'labels'])
        self.test_dataset.set_format(type='torch', columns=['input_ids', 'labels'])

        self.model = AutoModelForCausalLM.from_pretrained(self.model_path)

        # Enable gradient checkpointing to reduce GPU memory usage during backward pass
        self.model.config.gradient_checkpointing = True

        data_collator = DataCollatorForLanguageModeling(tokenizer=self.tokenizer, mlm=False)

        training_args = TrainingArguments(
            output_dir="./results",
            overwrite_output_dir=True,
            num_train_epochs=3,
            per_device_train_batch_size=1,  # adjust according to your GPU memory
            gradient_accumulation_steps=16,  # use gradient accumulation
            fp16=True,  # use mixed precision training
            save_steps=10_000,
            save_total_limit=2,
            prediction_loss_only=True,
        )

        trainer = Trainer(
            model=self.model,
            args=training_args,
            data_collator=data_collator,
            train_dataset=self.train_dataset,
            eval_dataset=self.valid_dataset,
        )

        trainer.train()

        # Save the model after training
        self.model.save_pretrained("./results")

    def predict_summary(self, context):
        model = AutoModelForCausalLM.from_pretrained("./results")
        generate_text = InstructionTextGenerationPipeline(model=model, tokenizer=self.tokenizer)
        self.llm_chain = LLMChain(llm=self.hf_pipeline, prompt=self.prompt)
        self.llm_context_chain = LLMChain(llm=self.hf_pipeline, prompt=self.prompt_with_context)
        return self.llm_context_chain.predict(instruction="Briefly summarize the text.", context=context).lstrip()


In [None]:
pipeline = PipelineModel('databricks/dolly-v2-3b')
pipeline.create_pipeline()
pipeline.train_model('cnn_dailymail/train.csv', 'cnn_dailymail/validation.csv', 'cnn_dailymail/test.csv')

nltk.download('punkt')
set_seed(42)
context = """
Ever noticed how plane seats appear to be getting smaller and smaller? With increasing numbers of people taking to the skies, some experts are questioning if having such packed out planes is putting passengers at risk. They say that the shrinking space on aeroplanes is not only uncomfortable - it's putting our health and safety in danger. More than squabbling over the arm rest, shrinking space on planes putting our health and safety in danger? This week, a U.S consumer advisory group set up by the Department of Transportation said at a public hearing that while the government is happy to set standards for animals flying on planes, it doesn't stipulate a minimum amount of space for humans. 'In a world where animals have more rights to space and food than humans,' said Charlie Leocha, consumer representative on the committee. 'It is time that the DOT and FAA take a stand for humane treatment of passengers.' But could crowding on planes lead to more serious issues than fighting for space in the overhead lockers, crashing elbows and seat back kicking? Tests conducted by the FAA use planes with a 31 inch pitch, a standard which on some airlines has decreased . Many economy seats on United Airlines have 30 inches of room, while some airlines offer as little as 28 inches . Cynthia Corbertt, a human factors researcher with the Federal Aviation Administration, that it conducts tests on how quickly passengers can leave a plane. But these tests are conducted using planes with 31 inches between each row of seats, a standard which on some airlines has decreased, reported the Detroit News. The distance between two seats from one point on a seat to the same point on the seat behind it is known as the pitch. While most airlines stick to a pitch of 31 inches or above, some fall below this. While United Airlines has 30 inches of space, Gulf Air economy seats have between 29 and 32 inches, Air Asia offers 29 inches and Spirit Airlines offers just 28 inches. British Airways has a seat pitch of 31 inches, while easyJet has 29 inches, Thomson's short haul seat pitch is 28 inches, and Virgin Atlantic's is 30-31.
"""
summary = pipeline.summarize_large_text(context)
print(summary)


Original

In [None]:
# create pipeline for model
generate_text = pipeline(model="databricks/dolly-v2-3b", torch_dtype=torch.bfloat16,
                         trust_remote_code=True, device_map="auto", return_full_text=True)


Downloading (…)lve/main/config.json:   0%|          | 0.00/819 [00:00<?, ?B/s]

Downloading (…)instruct_pipeline.py:   0%|          | 0.00/9.16k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/databricks/dolly-v2-3b:
- instruct_pipeline.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


Downloading pytorch_model.bin:   0%|          | 0.00/5.68G [00:00<?, ?B/s]



Downloading (…)okenizer_config.json:   0%|          | 0.00/450 [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/2.11M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/228 [00:00<?, ?B/s]

In [None]:
# template for an instrution with no input
prompt = PromptTemplate(
    input_variables=["instruction"],
    template="{instruction}")

# template for an instruction with input
prompt_with_context = PromptTemplate(
    input_variables=["instruction", "context"],
    template="{instruction}\n\nInput:\n{context}")

hf_pipeline = HuggingFacePipeline(pipeline=generate_text)

llm_chain = LLMChain(llm=hf_pipeline, prompt=prompt)
llm_context_chain = LLMChain(llm=hf_pipeline, prompt=prompt_with_context)


In [None]:
%%time
set_seed(42)

context = """
Ever noticed how plane seats appear to be getting smaller and smaller? With increasing numbers of people taking to the skies, some experts are questioning if having such packed out planes is putting passengers at risk. They say that the shrinking space on aeroplanes is not only uncomfortable - it's putting our health and safety in danger. More than squabbling over the arm rest, shrinking space on planes putting our health and safety in danger? This week, a U.S consumer advisory group set up by the Department of Transportation said at a public hearing that while the government is happy to set standards for animals flying on planes, it doesn't stipulate a minimum amount of space for humans. 'In a world where animals have more rights to space and food than humans,' said Charlie Leocha, consumer representative on the committee. 'It is time that the DOT and FAA take a stand for humane treatment of passengers.' But could crowding on planes lead to more serious issues than fighting for space in the overhead lockers, crashing elbows and seat back kicking? Tests conducted by the FAA use planes with a 31 inch pitch, a standard which on some airlines has decreased . Many economy seats on United Airlines have 30 inches of room, while some airlines offer as little as 28 inches . Cynthia Corbertt, a human factors researcher with the Federal Aviation Administration, that it conducts tests on how quickly passengers can leave a plane. But these tests are conducted using planes with 31 inches between each row of seats, a standard which on some airlines has decreased, reported the Detroit News. The distance between two seats from one point on a seat to the same point on the seat behind it is known as the pitch. While most airlines stick to a pitch of 31 inches or above, some fall below this. While United Airlines has 30 inches of space, Gulf Air economy seats have between 29 and 32 inches, Air Asia offers 29 inches and Spirit Airlines offers just 28 inches. British Airways has a seat pitch of 31 inches, while easyJet has 29 inches, Thomson's short haul seat pitch is 28 inches, and Virgin Atlantic's is 30-31.
"""
summary_ins_1 = "Give a brief summary of the text."
summary_ins_2 = "Briefly summarize the text."

# Download the Punkt sentence tokenizer
nltk.download('punkt')

def summarize_large_text(text, summarizer, chunk_size=8192, summary_length=4):
    """Summarizes a large text by chunking it into smaller parts."""
    chunks = textwrap.wrap(text, chunk_size)
    summaries = []
    for chunk in chunks:
        summary = summarizer.predict(instruction=summary_ins_1, context=chunk).lstrip()
        summary = ' '.join(nltk.tokenize.sent_tokenize(summary)[:summary_length])
        summaries.append(summary)
    return " ".join(summaries)

print(summarize_large_text(context, llm_context_chain))


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


What's with the shrinking seats on planes? According to experts, it's putting our health and safety in danger.
CPU times: user 4.75 s, sys: 211 ms, total: 4.96 s
Wall time: 8.44 s


In [None]:
# we start finetunig on dataset cnn_dailymail!!
def load_dataset(csv_file):
    df = pd.read_csv(csv_file)
    formatted_df = pd.DataFrame({
        'input': 'Briefly summarize the text.\n\n' + df['article'],
        'output': df['highlights']
    })
    return Dataset.from_pandas(formatted_df)

# Load your train, validation and test datasets
train_dataset = load_dataset('cnn_dailymail/train.csv')
valid_dataset = load_dataset('cnn_dailymail/validation.csv')
test_dataset = load_dataset('cnn_dailymail/test.csv')

In [None]:
tokenizer = AutoTokenizer.from_pretrained("databricks/dolly-v2-3b", padding_side="left")
sample_text = train_dataset[:100]['input']  # take a small sample of your data
tokenized_sample = tokenizer(sample_text, truncation=False)  # tokenize without truncation
max_length_in_sample = max(len(seq) for seq in tokenized_sample["input_ids"])

print("Maximum length in the sample:", max_length_in_sample)

In [None]:
tokenizer = AutoTokenizer.from_pretrained("databricks/dolly-v2-3b")

# Tokenize the datasets
def tokenize_function(examples):
    return tokenizer(examples['input'], padding="max_length", truncation=True, max_length=2048)

train_dataset = train_dataset.map(tokenize_function, batched=True)
valid_dataset = valid_dataset.map(tokenize_function, batched=True)
test_dataset = test_dataset.map(tokenize_function, batched=True)

# Specify the target outputs
def with_labels(examples):
    return {'labels': examples['input_ids']}  # model will try to predict its own input

train_dataset = train_dataset.map(with_labels, batched=True)
valid_dataset = valid_dataset.map(with_labels, batched=True)
test_dataset = test_dataset.map(with_labels, batched=True)

# We will ignore the 'token_type_ids' and 'attention_mask' in each example for simplicity
train_dataset.set_format(type='torch', columns=['input_ids', 'labels'])
valid_dataset.set_format(type='torch', columns=['input_ids', 'labels'])
test_dataset.set_format(type='torch', columns=['input_ids', 'labels'])


In [None]:
model = AutoModelForCausalLM.from_pretrained("databricks/dolly-v2-3b")

# Enable gradient checkpointing to reduce GPU memory usage during backward pass
model.config.gradient_checkpointing = True

data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)

training_args = TrainingArguments(
    output_dir="./results",
    overwrite_output_dir=True,
    num_train_epochs=3,
    per_device_train_batch_size=1,  # adjust according to your GPU memory
    gradient_accumulation_steps=16,  # use gradient accumulation
    fp16=True,  # use mixed precision training
    save_steps=10_000,
    save_total_limit=2,
    prediction_loss_only=True,
)

trainer = Trainer(
    model=model,
    args=training_args,
    data_collator=data_collator,
    train_dataset=train_dataset,
    eval_dataset=valid_dataset,
)

trainer.train()

# Save the model after training
model.save_pretrained("./results")


In [None]:
model = AutoModelForCausalLM.from_pretrained("./results")

generate_text = InstructionTextGenerationPipeline(model=model, tokenizer=tokenizer)

llm_chain = LLMChain(llm=hf_pipeline, prompt=prompt)
llm_context_chain = LLMChain(llm=hf_pipeline, prompt=prompt_with_context)

context = "Your article here."
print(llm_context_chain.predict(instruction="Briefly summarize the text.", context=context).lstrip())


## Conversation Starter and chatbot

In [None]:
# input whatever model or t5?
model = AutoModelForCausalLM.from_pretrained("./results")
generate_text = InstructionTextGenerationPipeline(model=model, tokenizer=tokenizer)
hf_pipeline = HuggingFacePipeline(pipeline=generate_text)



## Adding from demo

In [None]:
%pip install -U chromadb==0.3.22 langchain==0.0.164 transformers==4.29.0 accelerate==0.19.0 bitsandbytes

### Class QABot

In [None]:
from tensorflow.python.client import device_lib
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, AutoModelForSeq2SeqLM
from langchain import PromptTemplate
from langchain.llms import HuggingFacePipeline
from langchain.chains.question_answering import load_qa_chain
from langchain.memory import ConversationSummaryBufferMemory
from langchain import LLMChain
import textwrap
import nltk
import pandas as pd
from datasets import Dataset
from langchain.chains import ConversationalRetrievalChain
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma

class QABot:
    def __init__(self, model_name, collection_name, vector_db_path, persist_directory):
        self.model_name = model_name
        self.collection_name = collection_name
        self.vector_db_path = vector_db_path
        self.persist_directory = persist_directory
        self.hf_embed = None
        self.chroma_db = None
        self.template = None
        self.prompt = None
        self.instruct_pipeline = None
        self.hf_pipe = None
        self.tokenizer = None
        self.model = None
        self.pipe_summary = None
        self.hf_summary = None
        self.memory = None
        self.conversation = None

    def get_available_gpus(self):
        local_device_protos = device_lib.list_local_devices()
        return [x.name for x in local_device_protos if x.device_type == 'GPU']

    def check_gpu_availability(self):
        if len(self.get_available_gpus()) == 0:
            raise Exception("Running dolly without GPU will be slow. We recommend you switch to a Single Node cluster with at least 1 GPU to properly run this demo.")

    def build_embeddings_and_db(self):
        self.hf_embed = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
        self.chroma_db = Chroma(collection_name=self.collection_name, embedding_function=self.hf_embed, persist_directory=self.vector_db_path)

    def build_qa_chain(self):
        torch.cuda.empty_cache()

        # Defining the prompt template
        # langchain will load our similar documents as {context}
        self.template = """
        You are a chatbot having a conversation with a human. Your are asked to give relevant topics related to the news and
        chat with the user for any other follow up questions with the related topics.
        Your traits include high politeness, high professionalism, and high confidence.
        Given the following extracted parts of a long document and a question, answer the user question. If you don't know, say that you do not know.

        {context}

        {chat_history}

        {human_input}

        Response:
        """

        self.prompt = PromptTemplate(input_variables=['context', 'human_input', 'chat_history'], template=self.template)

        # Setting up the HuggingFace pipeline
        self.instruct_pipeline = pipeline(model=self.model_name, torch_dtype=torch.bfloat16, trust_remote_code=True, device_map="auto", return_full_text=True, max_new_tokens=256, top_p=0.95, top_k=50)
        self.hf_pipe = HuggingFacePipeline(pipeline=self.instruct_pipeline)

        # Add a summarizer to our memory conversation
        # Let's make sure we don't summarize the discussion too much to avoid losing to much of the content

        # Models we'll use to summarize our chat history
        # We could use one of these models: https://huggingface.co/models?filter=summarization. facebook/bart-large-cnn gives great results, we'll use t5-small for memory

        self.tokenizer = AutoTokenizer.from_pretrained("microsoft/DialoGPT-medium")
        self.model = AutoModelForCausalLM.from_pretrained("microsoft/DialoGPT-medium")

        self.pipe_summary = pipeline("summarization", model=self.model, tokenizer=self.tokenizer)
        self.hf_summary = HuggingFacePipeline(pipeline=self.pipe_summary)

        # Will keep 500 tokens and then ask for a summary. Removes prefix as our model isn't trained on specific chat prefix and can get confused.
        self.memory = ConversationSummaryBufferMemory(llm =self.hf_summary, max_token_limit=500, memory_key="chat_history", return_messages=True)

        print("loading chain, this can take some time...")
        self.conversation = ConversationalRetrievalChain.from_llm(
            llm= self.hf_summary,
            memory = self.memory,
            verbose=True,
            combine_docs_chain_kwargs={'prompt': QA})

        return self.conversation


In [None]:
bot = QABot(
    model_name="google/pegasus-cnn_dailymail",
    collection_name="gardening_docs",
    vector_db_path="/dbfs/"+"news"+"/vector_db",
    persist_directory="path/to/persist/directory"
)
bot.check_gpu_availability()
bot.build_embeddings_and_db()
conversation = bot.build_qa_chain()

### Class ChatBot

In [None]:
class ChatBot():
  def __init__(self, db):
    self.reset_context()
    self.db = db

  def reset_context(self):
    self.sources = []
    self.discussion = []
    # Building the chain will load Dolly and can take some time depending on the model size and your GPU
    self.qa_chain = build_qa_chain()
    displayHTML("<h1>Hi! I'm a chat bot specialized in gardening. How Can I help you today?</h1>")

  def get_similar_docs(self, question, similar_doc_count):
    return self.db.similarity_search(question, k=similar_doc_count)

  def chat(self, question):
    # Keep the last 3 discussion to search similar content
    self.discussion.append(question)
    similar_docs = self.get_similar_docs(" \n".join(self.discussion[-3:]), similar_doc_count=2)
    # Remove similar doc if they're already in the last questions (as it's already in the history)
    similar_docs = [doc for doc in similar_docs if doc.metadata['source'] not in self.sources[-3:]]

    result = self.qa_chain({"input_documents": similar_docs, "human_input": question})
    # Cleanup the answer for better display:
    answer = result['output_text'].capitalize()
    result_html = f"<p><blockquote style=\"font-size:24\">{question}</blockquote></p>"
    result_html += f"<p><blockquote style=\"font-size:18px\">{answer}</blockquote></p>"
    result_html += "<p><hr/></p>"
    for d in result["input_documents"]:
      source_id = d.metadata["source"]
      self.sources.append(source_id)
      result_html += f"<p><blockquote>{d.page_content}<br/>(Source: <a href=\"https://gardening.stackexchange.com/a/{source_id}\">{source_id}</a>)</blockquote></p>"
    displayHTML(result_html)

chat_bot = ChatBot(chroma_db)

In [None]:
### Test
        question = f"My name is {name} and I am near {location}"
        result = conversation({"question": question, "chat_history": chat_history})
        chat_history.append((question, result["answer"]))
        print(f"Swrlie: {result['answer']}")
        ###

        print("Chat with Swrlie:")
        while True:
            try:

                print("Your question:")
                question = input()
                if question=="quit" or question=="q":
                    break
                with get_openai_callback() as cb:
                    start_time = time.time()
                    result = conversation({"question": question, "chat_history": chat_history})
                    end_time = time.time()
                print(f"Swrlie: {result['answer']}")
                if benchmark:
                    print(f"Time taken to generate response: {end_time - start_time} seconds")
                    print(cb)
                chat_history.append((question, result["answer"]))
            except Exception as e:
                print(f"Error: {e}")

# Restart

In [None]:
!pip install huggingface_hub > /dev/null
!pip install langchain
!pip install faiss-gpu
!pip install transformers > /dev/null
!pip install accelerate==0.19.0
!pip install sentence_transformers

### Class ChatSystem

In [None]:
import os
import time
import torch
from langchain import HuggingFaceHub, PromptTemplate, LLMChain
from langchain.chains import ConversationalRetrievalChain, ConversationChain
from langchain.document_loaders import TextLoader
from langchain.embeddings import HuggingFaceEmbeddings, SentenceTransformerEmbeddings
from langchain.memory import ConversationBufferMemory, ConversationSummaryBufferMemory, ReadOnlySharedMemory
from langchain.retrievers.document_compressors import EmbeddingsFilter
from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
import accelerate
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, AutoModelForSeq2SeqLM


class ChatSystem:
    def __init__(self, api_token, repo_id, model_name, temperature=0):
        os.environ["HUGGINGFACEHUB_API_TOKEN"] = api_token
        self.repo_id = repo_id
        self.model_name = model_name
        self.llm_tags = HuggingFaceHub(repo_id=self.repo_id, model_kwargs={"temperature": temperature})
        self.article = None
        self.llm_chat = None
        self.news_conversation = None
        self.tags_conversation = None
        self.chat_history = []
        self.initialize_model()

    def initialize_model(self):
        instruct_pipeline = pipeline(model=self.model_name, torch_dtype=torch.bfloat16, trust_remote_code=True,
                                     device_map="auto", return_full_text=True, max_new_tokens=256, top_p=0.95, top_k=50)
        self.llm_chat = HuggingFacePipeline(pipeline=instruct_pipeline)

    def load_document(self, filename):
        with open(filename) as f:
            self.article = f.readlines()
        self.initialize_llm_chain()


    def initialize_llm_chain(self):
        template = "article: {article}."
        prompt = PromptTemplate(template=template, input_variables=["article"])
        llm_chain = LLMChain(prompt=prompt, llm=self.llm_tags)
        self.process_and_split_document(llm_chain)


    def process_and_split_document(self, llm_chain):
        tags = llm_chain.run(self.article)
        tags_text = " ".join(tags)
        loader = TextLoader('test.txt')
        document = loader.load()
        text_splitter = RecursiveCharacterTextSplitter(chunk_size=2500, chunk_overlap=100, length_function=len)
        split_document = text_splitter.split_documents(document)
        embeddings_1 = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
        combined_vector_store = FAISS.from_documents(split_document, embeddings_1)
        retriever = combined_vector_store.as_retriever(search_kwargs=dict(k=3))
        self.initialize_memory(retriever)



    def initialize_memory(self, retriever):
        memory = ConversationSummaryBufferMemory(llm=self.llm_chat, max_token_limit=500, memory_key="chat_history", return_messages=True)
        self.initialize_news_conversation(retriever, memory)

    def initialize_news_conversation(self, retriever, memory):
        instruction = """
        You are a chatbot having a conversation with a human. Your are asked to chat with the user for any other follow up questions with the news.
        Given the following extracted parts of a long document and a question, answer the user question.
        If you don't know, say that you do not know.
        """
        query_template = instruction + """
                =========
                context: {context}
                =========
                Chat History:{chat_history}
                =========
                Question: {question}
                =========
                """
        QA = PromptTemplate(template=query_template, input_variables=["context", "chat_history", "question"])
        print("loading chain, this can take some time...")
        self.news_conversation = ConversationalRetrievalChain.from_llm(
            llm=self.llm_chat,
            retriever=retriever,
            memory=memory,
            combine_docs_chain_kwargs={'prompt': QA})
        self.initialize_tag_conversation()

    def initialize_tag_conversation(self):
        tag_instruction = """
      You are a chatbot having a conversation with a human. Your are asked to chat with the user for any other follow up questions with the given topics.
      Given the related tags and a question, answer the user question.
      If you don't know, say that you do not know.
      """
      tag_template = tag_instruction + """tags:""" + tags_text + """
                =========
                Chat History:{history}
                =========
                Question: {input}
                =========
                """
        tag_prompt = PromptTemplate(template=tag_template, input_variables=["history", "input"])
        print("loading chain, this can take some time...")
        memory2 = ConversationSummaryBufferMemory(llm=self.llm_chat, max_token_limit=500, memory_key="history", return_messages=True)
        self.tags_conversation = ConversationChain(llm=self.llm_chat, prompt=tag_prompt, memory=ConversationBufferMemory())


In [None]:
api_token = "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"

repo_id = "fabiochiu/t5-base-tag-generation" # See https://huggingface.co/models?pipeline_tag=text-generation&sort=downloads for some other options

model_name = "gpt2" # can use dolly-v2-3b, dolly-v2-7b or dolly-v2-12b for smaller model and faster inferences.

repo_id = "google/flan-t5-large" # this one is ok for news
llm_chat = HuggingFaceHub(repo_id=repo_id, model_kwargs={"temperature":0})


Original

In [None]:
# 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
import os
os.environ["HUGGINGFACEHUB_API_TOKEN"] = "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"
from langchain import HuggingFaceHub

repo_id = "fabiochiu/t5-base-tag-generation" # See https://huggingface.co/models?pipeline_tag=text-generation&sort=downloads for some other options

llm_tags = HuggingFaceHub(repo_id=repo_id, model_kwargs={"temperature":0})

with open("test.txt") as f:
  content = f.readlines()

from langchain import PromptTemplate, LLMChain

template = """article: {article}."""
prompt = PromptTemplate(template=template, input_variables=["article"])
llm_chain = LLMChain(prompt=prompt, llm=llm_tags)

article = content

print(llm_chain.run(article))


Digital Marketing, Digital, Marketing, Strategy, Online, Online Marketing, Marketing Strategies, Decision Making


In [None]:
from langchain.text_splitter import CharacterTextSplitter,RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings,SentenceTransformerEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import ConversationalRetrievalChain
from langchain.retrievers.document_compressors import EmbeddingsFilter
import time
from langchain import HuggingFacePipeline
import torch
import accelerate
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, AutoModelForSeq2SeqLM
from langchain.memory import ConversationBufferMemory,ConversationSummaryBufferMemory
tags = llm_chain.run(article)
tags_text = " ".join(tags)
from langchain.document_loaders import TextLoader
loader = TextLoader('test.txt')
document = loader.load()
text_splitter = RecursiveCharacterTextSplitter(
        chunk_size = 2500,
        chunk_overlap  = 100,
        length_function = len,
    )
split_document = text_splitter.split_documents(document)
embeddings_1 = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
combined_vector_store = FAISS.from_documents(split_document, embeddings_1)
retriever = combined_vector_store.as_retriever(search_kwargs=dict(k=3))


Downloading (…)e9125/.gitattributes:   0%|          | 0.00/1.18k [00:00<?, ?B/s]

Downloading (…)_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Downloading (…)7e55de9125/README.md:   0%|          | 0.00/10.6k [00:00<?, ?B/s]

Downloading (…)55de9125/config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

Downloading (…)ce_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

Downloading (…)125/data_config.json:   0%|          | 0.00/39.3k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

Downloading (…)nce_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

Downloading (…)e9125/tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

Downloading (…)9125/train_script.py:   0%|          | 0.00/13.2k [00:00<?, ?B/s]

Downloading (…)7e55de9125/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)5de9125/modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

In [None]:
from langchain.memory import ConversationBufferMemory, ReadOnlySharedMemory
model_name = "gpt2" # can use dolly-v2-3b, dolly-v2-7b or dolly-v2-12b for smaller model and faster inferences.
instruct_pipeline = pipeline(model=model_name, torch_dtype=torch.bfloat16, trust_remote_code=True, device_map="auto",
                              return_full_text=True, max_new_tokens=256, top_p=0.95, top_k=50)
llm_chat = HuggingFacePipeline(pipeline=instruct_pipeline)


memory = ConversationSummaryBufferMemory(llm =llm_chat, max_token_limit=500, memory_key="chat_history", return_messages=True)

Downloading (…)lve/main/config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

Downloading model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

Downloading (…)neration_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Xformers is not installed correctly. If you want to use memory_efficient_attention to accelerate training use the following command to install Xformers
pip install xformers.


In [None]:
repo_id = "google/flan-t5-large" # this one is ok for news
llm_chat = HuggingFaceHub(repo_id=repo_id, model_kwargs={"temperature":0})

In [None]:
instruction = """
You are a chatbot having a conversation with a human. Your are asked to chat with the user for any other follow up questions with the news.
Given the following extracted parts of a long document and a question, answer the user question.
If you don't know, say that you do not know.
"""
Query_template = instruction + """
        =========
        context: {context}
        =========
        Chat History:{chat_history}
        =========
        Question: {question}
        =========
        """
QA = PromptTemplate(template=Query_template, input_variables=["context", "chat_history", "question"])

print("loading chain, this can take some time...")

news_conversation = ConversationalRetrievalChain.from_llm(
      llm= llm_chat,
      retriever=retriever,
      memory = memory,
      combine_docs_chain_kwargs={'prompt': QA})

loading chain, this can take some time...


In [None]:
from langchain.chains import ConversationChain
tag_instruction = """
You are a chatbot having a conversation with a human. Your are asked to chat with the user for any other follow up questions with the given topics.
Given the related tags and a question, answer the user question.
If you don't know, say that you do not know.
"""
tag_template = tag_instruction + """tags:""" + tags + """
        =========
        Chat History:{history}
        =========
        Question: {input}
        =========
        """
tag_prompt = PromptTemplate(template=tag_template, input_variables=["history", "input"])

print("loading chain, this can take some time...")
memory2 = ConversationSummaryBufferMemory(llm =llm_chat, max_token_limit=500, memory_key="history", return_messages=True)

tags_conversation = ConversationChain(
      llm= llm_chat,
      prompt=tag_prompt,
      memory = ConversationBufferMemory())


loading chain, this can take some time...


In [None]:
# news
chat_history = []
while True:
  question = input()
  if question == "q":
    break
  start_time = time.time()
  result = news_conversation({"question": question, "chat_history": chat_history})
  end_time = time.time()

  print(result["answer"])
  print(f"Time taken to generate response: {end_time - start_time} seconds")

what is it targteing to
gamers
Time taken to generate response: 6.014962673187256 seconds
why
The flavors are also a way to reach people online, in games and beyond.
Time taken to generate response: 7.932666063308716 seconds
q


In [None]:
# tag
chat_history = []
while True:
  question = input()
  if question == "q":
    break
  start_time = time.time()
  result = tags_conversation({"input": question, "history": chat_history})
  end_time = time.time()

  print(result["response"])
  # print(result)
  print(f"Time taken to generate response: {end_time - start_time} seconds")

what marketing strategy can you give

Marketing Strategy - "When you are able to provide a good service or product online, you can potentially reach a lot of people and can make money at the same time."
Time taken to generate response: 2.2363839149475098 seconds
I need some

Here are some marketing strategies:
1. Provide a marketing strategy based on the answer to the previous question.
2. When you are able to provide a good service or product online, you can potentially reach a lot of people and can make money at the same time.
3. Content creation.
4. Search engine optimization.
Time taken to generate response: 4.315000534057617 seconds
is digital marketing good

Digital Marketing is good when you can provide a good service or product online, you can potentially reach a lot of people and can make money at the same time
Time taken to generate response: 2.0085484981536865 seconds
why

Digital Marketing is good when you can provide a good service or product online, you can potentially re