<a href="https://colab.research.google.com/github/NiekVerhoeff/workshop/blob/main/chat_with_zip_hf.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Chat with your files - HuggingFace**

This notebook demonstrates a Retreival Augmented Generation system that can read data from different files zipped in a zip-file.

This notebook requires that the runtimetype is set to T4. To do this, go to the top right corner and click on the downfacing arrow next to the runtime details. Click on Change Runtimetype (Runtimetype wijzigen), choose T4 GPU and save.

This notebook was made with help from the following article: https://sabeerali.medium.com/build-your-personal-rag-chatbot-chat-freely-with-your-data-powered-by-llamaindex-and-open-llms-63eb8ad1a053

In [None]:
#@title Initialize things

!pip install llama-index
!pip install docx2txt
#!pip install torch transformers python-pptx Pillow
%pip install llama-index-readers-web
%pip install llama-index-llms-huggingface
#!pip install "transformers[torch]" "huggingface_hub[inference]"
!pip install transformers accelerate pypdf einops bitsandbytes
!pip install llama-index-embeddings-huggingface

from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, ServiceContext
#from llama_index.llms import HuggingFaceLLM

from llama_index.core import SimpleDirectoryReader
import nest_asyncio

nest_asyncio.apply()

import os
from pydantic import BaseModel, Field
from typing import List
from typing import Dict
from typing import Any
from llama_index.core.extractors import PydanticProgramExtractor
from llama_index.core.node_parser import SentenceSplitter
from google.colab import userdata
from llama_index.core.ingestion import IngestionPipeline

from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from typing import List, Optional

from llama_index.llms.huggingface import (
    HuggingFaceInferenceAPI,
    HuggingFaceLLM,
)
#HF_TOKEN = userdata.get('HF_TOKEN')

In [None]:
#@title Set up model

#@markdown Choose any Text Generation model from Huggingface: https://huggingface.co/models?pipeline_tag=text-generation&sort=trending
from llama_index.core import PromptTemplate

system_prompt = """<|SYSTEM|>#
At your service
"""

# This will wrap the default prompts that are internal to llama-index
query_wrapper_prompt = PromptTemplate("<|USER|>{query_str}<|ASSISTANT|>")

import torch

#!pip install accelerate
#!pip install -i https://pypi.org/simple/ bitsandbytes

import accelerate
import bitsandbytes
#from transformers import BitsAndBytesConfig, AutoModelForCausalLM

#model_id = "mistralai/Mistral-7B-v0.1"

#quantization_config = BitsAndBytesConfig(load_in_8bit=True, llm_int8_threshold=200.0)

#model = AutoModelForCausalLM.from_pretrained(model_id, device_map = "auto", quantization_config=quantization_config)

#quantization_config = BitsAndBytesConfig(load_in_8bit=True, llm_int8_threshold=200.0)

model = "mistralai/Mistral-7B-v0.1" #@param {type:"string"}

llm = HuggingFaceLLM(
    context_window=4096,
    max_new_tokens=256,
    generate_kwargs={"temperature": 0, "do_sample": False},
    system_prompt=system_prompt,
    query_wrapper_prompt=query_wrapper_prompt,
    tokenizer_name=model,
    model_name=model,
    device_map="auto",
    tokenizer_kwargs={"max_length": 4096},
    # uncomment this if using CUDA to reduce memory usage
    model_kwargs={
        "torch_dtype": torch.float16,
        "llm_int8_enable_fp32_cpu_offload": True,
        "bnb_4bit_quant_type": 'nf4',
        "bnb_4bit_use_double_quant":True,
        "bnb_4bit_compute_dtype":torch.bfloat16,
        "load_in_8bit": True}
)

In [None]:
#@title Upload a zip-file with documents
#@markdown supported extensions are listed here: https://docs.llamaindex.ai/en/stable/module_guides/loading/simpledirectoryreader/

from google.colab import files
import zipfile
import os

# Upload the ZIP file
uploaded = files.upload()  # Select and upload the ZIP file

# Assuming there's only one ZIP file uploaded, get its filename
zip_filename = next(iter(uploaded.keys()))

# Extract the ZIP file
with zipfile.ZipFile(zip_filename, 'r') as zip_ref:
    zip_ref.extractall()

print("Folder structure has been extracted.")


In [None]:
#@title Load data

# @markdown If you're working in Colab, go to the files menu by clicking the folder symbol on the left. Right click (or click on the three dots) on the folder you uploaded and choose Copy Path (Pad kopiÃ«ren) and paste below. Check the recursive box if the given directory has subdirectories.

directory = '/content/bronnen_in_bytes'#@param {type:"string"}

reader = SimpleDirectoryReader(
    input_dir=f"{directory}",
    recursive=False # @param {type:"boolean"}
)

all_docs = []

for docs in reader.iter_data():
    for doc in docs:
        doc.text = doc.text.upper()
        print(doc.text)
        all_docs.append(doc)

print(all_docs)

In [None]:
#@title Chat Away!

service_context = ServiceContext.from_defaults(chunk_size=1024,
                                               llm=llm,
                                               embed_model='local')

index = VectorStoreIndex.from_documents(all_docs, service_context=service_context)
query_engine = index.as_query_engine(streaming=True)

prompt = "" #@param {type:"string"}

response_stream = query_engine.query(prompt)
response_stream.print_response_stream()