In [10]:
!pip install -q torch datasets
!pip install -q accelerate==0.21.0 \
                peft==0.4.0 \
                bitsandbytes>=0.41.3 \
                transformers==4.31.0 \
                trl==0.4.7

[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
sentence-transformers 2.5.1 requires transformers<5.0.0,>=4.32.0, but you have transformers 4.31.0 which is incompatible.[0m[31m
[0m

Load the quantize Mistral-7B Model


In [11]:
! pip install git+https://github.com/huggingface/transformers

Collecting git+https://github.com/huggingface/transformers
  Cloning https://github.com/huggingface/transformers to /tmp/pip-req-build-g_3_ob7j
  Running command git clone --filter=blob:none --quiet https://github.com/huggingface/transformers /tmp/pip-req-build-g_3_ob7j
  Resolved https://github.com/huggingface/transformers to commit b382a09e28c7e59129246ccdf4b00f2cac4547a4
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting tokenizers<0.19,>=0.14 (from transformers==4.39.0.dev0)
  Using cached tokenizers-0.15.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.6 MB)
Building wheels for collected packages: transformers
  Building wheel for transformers (pyproject.toml) ... [?25l[?25hdone
  Created wheel for transformers: filename=transformers-4.39.0.dev0-py3-none-any.whl size=8661416 sha256=91cc47f4b6f2c32543189f6e8f1be873fa7b4d6733fa4c722c07af

In [12]:
#################################################################
# Tokenizer
#################################################################
import transformers
from transformers import AutoTokenizer

model_name='mistralai/Mistral-7B-Instruct-v0.1'

model_config = transformers.AutoConfig.from_pretrained(
    model_name,
)

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

In [13]:
#################################################################
# bitsandbytes parameters
#################################################################

# Activate 4-bit precision base model loading
use_4bit = True

# Compute dtype for 4-bit base models
bnb_4bit_compute_dtype = "float16"

# Quantization type (fp4 or nf4)
bnb_4bit_quant_type = "nf4"

# Activate nested quantization for 4-bit base models (double quantization)
use_nested_quant = False

In [14]:
#################################################################
# Set up quantization config
#################################################################
import torch
from transformers import BitsAndBytesConfig

compute_dtype = getattr(torch, bnb_4bit_compute_dtype)

bnb_config = BitsAndBytesConfig(
    load_in_4bit=use_4bit,
    bnb_4bit_quant_type=bnb_4bit_quant_type,
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=use_nested_quant,
)

# Check GPU compatibility with bfloat16
if compute_dtype == torch.float16 and use_4bit:
    major, _ = torch.cuda.get_device_capability()
    if major >= 8:
        print("=" * 80)
        print("Your GPU supports bfloat16: accelerate training with bf16=True")
        print("=" * 80)

In [15]:
#################################################################
# Load pre-trained config
#################################################################

from transformers import AutoModelForCausalLM

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
)

`low_cpu_mem_usage` was None, now set to True since model is quantized.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [7]:
inputs_not_chat = tokenizer.encode_plus("[INST] Tell me about football? [/INST]", return_tensors="pt")['input_ids'].to('cuda')

generated_ids = model.generate(inputs_not_chat,
                               max_new_tokens=1000,
                               do_sample=True)
decoded = tokenizer.batch_decode(generated_ids)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


In [8]:
def print_number_of_trainable_model_parameters(model):
    trainable_model_params = 0
    all_model_params = 0
    for _, param in model.named_parameters():
        all_model_params += param.numel()
        if param.requires_grad:
            trainable_model_params += param.numel()
    return f"trainable model parameters: {trainable_model_params}\nall model parameters: {all_model_params}\npercentage of trainable model parameters: {100 * trainable_model_params / all_model_params:.2f}%"

print(print_number_of_trainable_model_parameters(model))

trainable model parameters: 262410240
all model parameters: 3752071168
percentage of trainable model parameters: 6.99%


Create Vector Database

In [1]:
!pip install langchain

Collecting langchain
  Downloading langchain-0.1.11-py3-none-any.whl (807 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m807.5/807.5 kB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain)
  Downloading dataclasses_json-0.6.4-py3-none-any.whl (28 kB)
Collecting jsonpatch<2.0,>=1.33 (from langchain)
  Downloading jsonpatch-1.33-py2.py3-none-any.whl (12 kB)
Collecting langchain-community<0.1,>=0.0.25 (from langchain)
  Downloading langchain_community-0.0.27-py3-none-any.whl (1.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting langchain-core<0.2,>=0.1.29 (from langchain)
  Downloading langchain_core-0.1.30-py3-none-any.whl (256 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m256.9/256.9 kB[0m [31m1.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting langchain-text-splitters<0.1,>=0.0.1 (from langchain)
  Downloadi

In [2]:
!pip install playwright

Collecting playwright
  Downloading playwright-1.42.0-py3-none-manylinux1_x86_64.whl (37.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m37.3/37.3 MB[0m [31m27.5 MB/s[0m eta [36m0:00:00[0m
Collecting pyee==11.0.1 (from playwright)
  Downloading pyee-11.0.1-py3-none-any.whl (15 kB)
Installing collected packages: pyee, playwright
Successfully installed playwright-1.42.0 pyee-11.0.1


In [3]:
!pip install nest_asyncio



In [4]:
!playwright install

Downloading Chromium 123.0.6312.4 (playwright build v1105)[2m from https://playwright.azureedge.net/builds/chromium/1105/chromium-linux.zip[22m
[1G154.7 MiB [] 0% 0.0s[0K[1G154.7 MiB [] 0% 47.5s[0K[1G154.7 MiB [] 0% 33.2s[0K[1G154.7 MiB [] 0% 21.9s[0K[1G154.7 MiB [] 0% 13.9s[0K[1G154.7 MiB [] 0% 11.5s[0K[1G154.7 MiB [] 1% 8.9s[0K[1G154.7 MiB [] 1% 7.4s[0K[1G154.7 MiB [] 2% 6.4s[0K[1G154.7 MiB [] 2% 5.9s[0K[1G154.7 MiB [] 2% 5.8s[0K[1G154.7 MiB [] 3% 5.8s[0K[1G154.7 MiB [] 3% 5.2s[0K[1G154.7 MiB [] 4% 5.2s[0K[1G154.7 MiB [] 4% 5.3s[0K[1G154.7 MiB [] 4% 5.0s[0K[1G154.7 MiB [] 5% 4.8s[0K[1G154.7 MiB [] 5% 4.7s[0K[1G154.7 MiB [] 6% 4.5s[0K[1G154.7 MiB [] 7% 4.2s[0K[1G154.7 MiB [] 8% 3.9s[0K[1G154.7 MiB [] 9% 3.8s[0K[1G154.7 MiB [] 9% 3.7s[0K[1G154.7 MiB [] 10% 3.6s[0K[1G154.7 MiB [] 10% 3.4s[0K[1G154.7 MiB [] 11% 3.3s[0K[1G154.7 MiB [] 12% 3.2s[0K[1G154.7 MiB [] 13% 3.1s[0K[1G154.7 MiB [] 14% 3.0s[0K[1G154.7 MiB [] 15% 3.0s[0K[

In [5]:
!pip install html2text

Collecting html2text
  Downloading html2text-2024.2.26.tar.gz (56 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/56.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━[0m [32m41.0/56.5 kB[0m [31m970.4 kB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.5/56.5 kB[0m [31m969.2 kB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: html2text
  Building wheel for html2text (setup.py) ... [?25l[?25hdone
  Created wheel for html2text: filename=html2text-2024.2.26-py3-none-any.whl size=33110 sha256=b1853eea47d2cf3a4b17d066954cc89e84153d20a0a6433573bf0d7c1a30a545
  Stored in directory: /root/.cache/pip/wheels/f3/96/6d/a7eba8f80d31cbd188a2787b81514d82fc5ae6943c44777659
Successfully built html2text
Installing collected packages: html2text
Successfully installed html2text-2024.2.26


In [6]:
!pip install sentence-transformers

Collecting sentence-transformers
  Downloading sentence_transformers-2.5.1-py3-none-any.whl (156 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m156.5/156.5 kB[0m [31m1.6 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: sentence-transformers
Successfully installed sentence-transformers-2.5.1


In [7]:
! pip install faiss-gpu

Collecting faiss-gpu
  Downloading faiss_gpu-1.7.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (85.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m85.5/85.5 MB[0m [31m7.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-gpu
Successfully installed faiss-gpu-1.7.2


In [8]:
from langchain.text_splitter import CharacterTextSplitter
from langchain.document_loaders import AsyncChromiumLoader
from langchain.document_transformers import Html2TextTransformer
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
import nest_asyncio

nest_asyncio.apply()

articles = ["https://www.bbc.co.uk/sport/live/football/68154773",
            "https://www.mirror.co.uk/sport/football/news/jurgen-klopp-leaving-liverpool-after-32290246",
            "https://www.manchestereveningnews.co.uk/sport/football/transfer-news/21-manchester-united-players-who-28765729"]

# Scrapes the blogs above
loader = AsyncChromiumLoader(articles)
docs = loader.load()

# Converts HTML to plain text
html2text = Html2TextTransformer()
docs_transformed = html2text.transform_documents(docs)

# Chunk text
text_splitter = CharacterTextSplitter(chunk_size=800,
                                      chunk_overlap=100)
chunked_documents = text_splitter.split_documents(docs_transformed)

# Load chunked documents into the FAISS index
db = FAISS.from_documents(chunked_documents,
                          HuggingFaceEmbeddings(model_name='sentence-transformers/all-mpnet-base-v2'))


# Connect query to FAISS index using a retriever
retriever = db.as_retriever(
    search_type="similarity",
    search_kwargs={'k': 4}
)

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.6k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/438M [00:00<?, ?B/s]

  return self.fget.__get__(instance, owner)()


tokenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [16]:
import transformers
from langchain.llms import HuggingFacePipeline
from langchain.prompts import PromptTemplate
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from langchain import LLMChain

text_generation_pipeline = transformers.pipeline(
    model=model,
    tokenizer=tokenizer,
    task="text-generation",
    temperature=0.2,
    repetition_penalty=1.1,
    return_full_text=True,
    max_new_tokens=300,
)

prompt_template = """
### [INST]
Instruction: Answer the question based on your football knowledge. Here is context to help:

{context}

### QUESTION:
{question}

[/INST]
 """

mistral_llm = HuggingFacePipeline(pipeline=text_generation_pipeline)

# Create prompt from prompt template
prompt = PromptTemplate(
    input_variables=["context", "question"],
    template=prompt_template,
)

# Create llm chain
llm_chain = LLMChain(llm=mistral_llm, prompt=prompt)

In [17]:
llm_chain.invoke({"context":"",
                  "question": "what manchester united should do ?"})

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


{'context': '',
 'question': 'what manchester united should do ?',
 'text': "1. Manchester United should focus on building a strong and balanced team with a mix of youth and experience, as well as players who possess the necessary skills and attributes to compete at the highest level in the Premier League.\n2. They should also prioritize developing their young players and giving them opportunities to play and grow within the first team.\n3. Additionally, Manchester United should invest in a solid defensive structure and work on improving their set-piece performance.\n4. It's important for Manchester United to have a clear and defined playing style that allows them to dominate possession and create chances for their attacking players.\n5. Finally, they should consider bringing in experienced managers or coaches who can help guide the team towards success and instill a winning mentality."}

In [19]:
from langchain.schema.runnable import RunnablePassthrough
query = "what manchester united should do ?"

retriever = db.as_retriever()

rag_chain = (
 {"context": retriever, "question": RunnablePassthrough()}
    | llm_chain
)

rag_chain.invoke(query)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


{'context': [Document(page_content="**Mason Greenwood**\n\nSIMILAR ARTICLES TO THIS\n\nPowered by\n\nManchester United could finally do what Ralf Rangnick recommended in transfer\nmarket\n\n39 Comments\n\nGary Neville eyes two Man United fixtures where Erik ten Hag can change Sir\nJim Ratcliffe opinion\n\nComments\n\nManchester United suffer new injury blow as Erik ten Hag raises possible\nissues with medical staff\n\n34 Comments\n\nGareth Southgate is not the manager Manchester United need right now as links\nemerge\n\n5 Comments\n\nManchester United have a problem everyone is talking about - but Erik ten Hag\ncannot fix\n\n12 Comments\n\nManchester United need to find a buyer for their forgotten man after academy\nbreakthrough\n\n3 Comments\n\nManchester United Supporters' Trust respond to ambitious Old Trafford\nannouncement\n\n16 Comments", metadata={'source': 'https://www.manchestereveningnews.co.uk/sport/football/transfer-news/21-manchester-united-players-who-28765729'}),
  Docum

In [20]:
query = "Why Jurgen Klopp is leaving Liverpool ?"

rag_chain.invoke(query)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


{'context': [Document(page_content="*   *   *   *   * |\n  *   * 11\n  * Comments\n  * |\n\nBookmark\n\nJurgen Klopp rocked the footballing world when he announced in January that\nthis would be his final season as Liverpool boss.\n\nTalk immediately turned to what's next for Klopp, for Liverpool, and, of\ncourse, what inspired the German to drop this bombshell news during such a\npositive time in the club's history.\n\nKlopp did his best to explain in a long statement to the fans, and the short\nanswer is the 56-year-old is tired. After nine years working one of the most\ndemanding jobs in sport, the manager no longer feels he can operate with the\nnecessary energy required.", metadata={'source': 'https://www.mirror.co.uk/sport/football/news/jurgen-klopp-leaving-liverpool-after-32290246'}),
  Document(page_content='Klopp added: "I told the club already in November. I have to explain a little\nbit that maybe the job I do people see from the outside, I’m on the touchline\nand in trainin