### Import libraries and login to Hugging Face to use Llama models (You must have an access to Llama models in you Hugging Face account)

In [1]:
# Install necessary libraries
!pip install gradio transformers nltk huggingface_hub langchain torch accelerate langchain_community -q


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.2/62.2 MB[0m [31m35.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m321.9/321.9 kB[0m [31m27.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m107.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m83.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m61.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [2]:
# Import libraries and initialize
import gc
import gradio as gr
import torch
import nltk
from huggingface_hub import notebook_login
from transformers import AutoTokenizer, pipeline
from langchain.llms import HuggingFacePipeline
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain

nltk.download('punkt')
gc.collect()
torch.cuda.empty_cache()
notebook_login()


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

### Setup summarizer

In [3]:
# Setup summarization model and chain
model_name = "meta-llama/Llama-2-7b-chat-hf"
tokenizer = AutoTokenizer.from_pretrained(model_name)
llama_pipeline = pipeline(
    "text-generation",
    model=model_name,
    torch_dtype=torch.float16,
    device_map="auto"
)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/1.62k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/614 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/188 [00:00<?, ?B/s]

Device set to use cuda:0


In [4]:
llama_llm = HuggingFacePipeline(pipeline=llama_pipeline)

  llama_llm = HuggingFacePipeline(pipeline=llama_pipeline)


### Setup prompt to the LLM

In [5]:
summary_template = PromptTemplate(
    input_variables=["text", "max_length"],
    template="""Summarize the following text in a concise manner, with a maximum of {max_length} tokens:

{text}

Summary:"""
)
summary_chain = LLMChain(llm=llama_llm, prompt=summary_template)

  summary_chain = LLMChain(llm=llama_llm, prompt=summary_template)


### Summarization function

In [9]:
# Define summarization function
def summarize(input_text, max_length):
    # Generate summary using LangChain
    full_output = summary_chain.run(text=input_text, max_length=int(max_length)).strip()
    # keep only the actual summary.
    if "Summary:" in full_output:
        summary = full_output.split("Summary:")[-1].strip()
    else:
        summary = full_output
    return summary

### Creating UI with Gradio

In [10]:
# Build Gradio Interface
with gr.Blocks() as demo:
    gr.Markdown("## Summarize Text")
    input_text = gr.Textbox(label="Input Text", lines=5)
    max_len = gr.Number(label="Max Summary Length", value=50)
    btn = gr.Button("Summarize")
    summary_box = gr.Textbox(label="Summary")

    btn.click(fn=summarize, inputs=[input_text, max_len], outputs=[summary_box])

In [11]:
demo.launch()

Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://141d5a758e3ab9db64.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


