# Setup
Connect to a Colab Runtime **with** GPU enabled! 
- Download files and libraries
- Download the model
- set up various environment variables
- create the database for the corpora of documents

In [1]:
!git clone https://github.com/Mamiglia/privateGPT
!mv privateGPT/* ./
!pip install -r requirements.txt

Cloning into 'privateGPT'...
remote: Enumerating objects: 301, done.[K
remote: Counting objects: 100% (19/19), done.[K
remote: Compressing objects: 100% (13/13), done.[K
remote: Total 301 (delta 8), reused 16 (delta 6), pack-reused 282[K
Receiving objects: 100% (301/301), 109.47 KiB | 5.47 MiB/s, done.
Resolving deltas: 100% (151/151), done.
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting langchain==0.0.177 (from -r requirements.txt (line 1))
  Downloading langchain-0.0.177-py3-none-any.whl (877 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m877.7/877.7 kB[0m [31m31.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting gpt4all==0.2.3 (from -r requirements.txt (line 2))
  Downloading gpt4all-0.2.3-py3-none-manylinux1_x86_64.whl (329 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m329.1/329.1 kB[0m [31m35.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting chromadb==0.3.23 (from -r requirem

In [2]:
# Download documents corpus
!rm -r source_documents/?* privateGPT/
!gdown 1rkdQMKldyKYcebQrNcSzOzPUPPbHpXBL # brutti's slides
!gdown 1ukb0jrIgn6yupw8MSsfIrr_Ie7aZ35RI # statistics books
!unzip documents.zip -d source_documents/
!unzip books.zip -d source_documents/books/

Downloading...
From: https://drive.google.com/uc?id=1rkdQMKldyKYcebQrNcSzOzPUPPbHpXBL
To: /content/documents.zip
100% 93.2M/93.2M [00:00<00:00, 162MB/s]
Downloading...
From: https://drive.google.com/uc?id=1ukb0jrIgn6yupw8MSsfIrr_Ie7aZ35RI
To: /content/books.zip
100% 33.8M/33.8M [00:00<00:00, 128MB/s]
Archive:  documents.zip
   creating: source_documents/documents/
   creating: source_documents/documents/SDS-I/
  inflating: source_documents/documents/SDS-I/Topic 06 slide part-01 (20211025).pdf  
  inflating: source_documents/documents/SDS-I/Topic 06 slide part-02 (20211025).pdf  
  inflating: source_documents/documents/SDS-I/Topic_00_slide_handout_(20222009).pdf  
  inflating: source_documents/documents/SDS-I/Topic_01_slide_handout_(20201016).pdf  
  inflating: source_documents/documents/SDS-I/Topic_02_slide_handout_(20201016).pdf  
  inflating: source_documents/documents/SDS-I/Topic_03_slide_handout_(20201016).pdf  
  inflating: source_documents/documents/SDS-I/Topic_04a_slide_handout_

In [4]:
# Set up Llama.cpp for using GPU
!pip uninstall -y llama-cpp-python
%env LLAMA_CUBLAS=1
%env CMAKE_ARGS=-DLLAMA_CUBLAS=on
%env FORCE_CMAKE=1
!pip install llama-cpp-python --no-cache-dir --verbose 

Found existing installation: llama-cpp-python 0.1.50
Uninstalling llama-cpp-python-0.1.50:
  Successfully uninstalled llama-cpp-python-0.1.50
env: LLAMA_CUBLAS=1
env: CMAKE_ARGS=-DLLAMA_CUBLAS=on
env: FORCE_CMAKE=1
Using pip 23.1.2 from /usr/local/lib/python3.10/dist-packages/pip (python 3.10)
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting llama-cpp-python
  Downloading llama_cpp_python-0.1.61.tar.gz (1.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.4/1.4 MB[0m [31m80.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Running command pip subprocess to install build dependencies
  Using pip 23.1.2 from /usr/local/lib/python3.10/dist-packages/pip (python 3.10)
  Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/, https://us-python.pkg.dev/colab-wheels/public/simple/
  Collecting setuptools>=42
    Using cached setuptools-67.8.0-py3-none-any.whl (1.1 MB)
  Collect

In [5]:
from huggingface_hub import hf_hub_download
# Download Llama.cpp model
hf_hub_download(
    repo_id="vicuna/ggml-vicuna-7b-1.1", 
    filename="ggml-vic7b-q5_1.bin",
    local_dir='/content/models/'
)

Downloading ggml-vic7b-q5_1.bin:   0%|          | 0.00/5.06G [00:00<?, ?B/s]

'/content/models/ggml-vic7b-q5_1.bin'

In [6]:
ENV = '''PERSIST_DIRECTORY=db
MODEL_TYPE=LlamaCpp
MODEL_PATH=/content/models/ggml-vic7b-q5_1.bin
EMBEDDINGS_MODEL_NAME=all-MiniLM-L6-v2
MODEL_N_CTX=1000
N_GPU_LAYERS=50
'''
with open('.env', 'w') as f:
  f.write(ENV)

# ingest document corpus and create vectorial DB
!gdown 1peXz6l-lmG5bhHiizhCQ9pbCDBcqJLuZ
!unzip db.zip
!python ingest.py

Downloading...
From: https://drive.google.com/uc?id=1peXz6l-lmG5bhHiizhCQ9pbCDBcqJLuZ
To: /content/db.zip
100% 75.8M/75.8M [00:00<00:00, 101MB/s] 
Archive:  db.zip
   creating: db/
  inflating: db/chroma-embeddings.parquet  
   creating: db/index/
  inflating: db/index/index_31a0072f-e557-46bb-9940-00d91a3aeceb.bin  
  inflating: db/index/uuid_to_id_31a0072f-e557-46bb-9940-00d91a3aeceb.pkl  
  inflating: db/index/id_to_uuid_31a0072f-e557-46bb-9940-00d91a3aeceb.pkl  
  inflating: db/index/index_metadata_31a0072f-e557-46bb-9940-00d91a3aeceb.pkl  
  inflating: db/chroma-collections.parquet  
The cache for model files in Transformers v4.22.0 has been updated. Migrating your old cache. This is a one-time only operation. You can interrupt this and resume the migration later on by calling `transformers.utils.move_cache()`.
0it [00:00, ?it/s]
Downloading (…)e9125/.gitattributes: 100% 1.18k/1.18k [00:00<00:00, 6.69MB/s]
Downloading (…)_Pooling/config.json: 100% 190/190 [00:00<00:00, 834kB/s]
Do

# Run the model:
-M to mute the debug stdout

In [7]:
!python privateGPT.py -M

Using embedded DuckDB with persistence: data will be stored in: db
ggml_init_cublas: found 1 CUDA devices:
  Device 0: Tesla T4
llama.cpp: loading model from /content/models/ggml-vic7b-q5_1.bin
llama_model_load_internal: format     = ggjt v2 (pre #1508)
llama_model_load_internal: n_vocab    = 32000
llama_model_load_internal: n_ctx      = 1000
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 256
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: ftype      = 9 (mostly Q5_1)
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: n_parts    = 1
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.07 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 1979.59 MB (+ 1026.00 MB per state)
llama_model_load_internal: allocating batch_size x 1 MB =