*  Using microsoft guidance library to force desired output format
*  Using TheBloke C++ quantized LLAMA2 version to reduce RAM & vRAM requirements



In [1]:
import requests

# Downloading desired model
def download_file_with_progress(url, filename):
    """
    Download a file with progress indicator from a given URL

    :param url: URL to the file
    :param filename: Filename to save the downloaded content
    """
    response = requests.get(url, stream=True)
    total_size_in_bytes = int(response.headers.get('content-length', 0))
    block_size = 1024*1024*100 # 100 megabites chunks
    progress_bar_size = 50
    print(f"Starting download of {filename}")

    with open(filename, 'wb') as file:
        downloaded_size = 0
        for data in response.iter_content(block_size):
            downloaded_size += len(data)
            file.write(data)
            done = int(progress_bar_size * downloaded_size / total_size_in_bytes)
            print(f"\r[{'█' * done}{'.' * (progress_bar_size - done)}] {downloaded_size * 100 / total_size_in_bytes:.2f}%", end = '')
    print("\nDownload completed.")

# URL to the .gguf file
gguf_url = "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q3_K_L.gguf?download=true"

# Local filename to save the .gguf file
gguf_filename = "llama-2-7b-chat.Q3_K_L.gguf"

# Download the .gguf file with progress
download_file_with_progress(gguf_url, gguf_filename)


Starting download of llama-2-7b-chat.Q3_K_L.gguf
[██████████████████████████████████████████████████] 100.00%
Download completed.


In [2]:
!CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 pip install llama-cpp-python
!pip install guidance

Collecting llama-cpp-python
  Downloading llama_cpp_python-0.2.26.tar.gz (8.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.8/8.8 MB[0m [31m26.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Installing backend dependencies ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: llama-cpp-python
  Building wheel for llama-cpp-python (pyproject.toml) ... [?25l[?25hdone
  Created wheel for llama-cpp-python: filename=llama_cpp_python-0.2.26-cp310-cp310-manylinux_2_35_x86_64.whl size=8130346 sha256=e7f107fdcc1357a959a1fed8f24b07eafc89cccef4e793c70ac0656c199d284c
  Stored in directory: /root/.cache/pip/wheels/91/80/ce/ac6afea8c1d6fbcec7e14183033a5b2796c742d4f470010c72
Successfully built llama-cpp-python
Installing collected packages: llama-cpp-python
Successfully installed llama-cpp-python-0.2.26
Co

In [5]:
import guidance
from guidance import models, gen, system, user, assistant

# Model needs to be locally saved
# A sample model can be downloaded from
# https://huggingface.co/TheBloke/Llama-2-7B-GGUF/blob/main/llama-2-7b.Q5_K_M.gguf

gguf_filename = "llama-2-7b-chat.Q3_K_L.gguf"
llama2 = models.LlamaCpp(gguf_filename, n_gpu_layers=-1, n_ctx=4096)

# BLAS = 1 means there is GPU acceleration

AVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 1 | SSSE3 = 1 | VSX = 0 | 


In [15]:
%%time

regex_pattern = r"0(\.\d+)?|1(\.0+)?" # accept output as a float from 0 to 1

sentence_pair = "[Dogs eat bones, Dog is green]"
query = f"""How semantically similar are those two sentences on scale from 0 to 1: {sentence_pair}"""

output = llama2 + f'''\
        Q: {query}
        A: {gen('similarity', regex=regex_pattern)}'''

CPU times: user 456 ms, sys: 2.81 ms, total: 459 ms
Wall time: 471 ms


In [10]:
output["similarity"]

'0.3'

In [16]:
# Get data

import os
import sys
import re
import pandas as pd

if 'google.colab' in sys.modules:
    from google.colab import drive
    drive.mount('/content/drive')
    PATH = os.path.join("drive", "MyDrive", "LMU", "AppliedDL", "data", "raw")


def get_data(subset):

    df_train = pd.read_csv(os.path.join(PATH, 'eng_train.csv'))
    df_train["Split_Text"] = df_train["Text"].apply(lambda x: x.replace("\n", " "))
    df_train['Split_Text'] = df_train['Split_Text'].apply(lambda x: x.split("\r"))
    df_train['Split_Text'] = df_train['Split_Text'].apply(lambda x: [re.sub(r"[^a-zA-Z0-9]+", ' ', k) for k in x])

    df_train["sen_1"] = df_train["Split_Text"].apply(lambda x: x[0])
    df_train["sen_2"] = df_train["Split_Text"].apply(lambda x: x[1])
    df_train.drop(["Split_Text"], axis=1, inplace=True)
    display(df_train.head())

    if subset is not None:
        df_train = df_train.sample(n=subset, random_state=42)

    return df_train

df = get_data(subset=1000)

Mounted at /content/drive


Unnamed: 0,PairID,Text,Score,sen_1,sen_2
0,ENG-train-0000,"It that happens, just pull the plug.\r\nif tha...",1.0,It that happens just pull the plug,if that ever happens just pull the plug
1,ENG-train-0001,A black dog running through water.\r\nA black ...,1.0,A black dog running through water,A black dog is running through some water
2,ENG-train-0002,I've been searchingthe entire abbey for you.\r...,1.0,I ve been searchingthe entire abbey for you,I m looking for you all over the abbey
3,ENG-train-0003,If he is good looking and has a good personali...,1.0,If he is good looking and has a good personali...,If he s good looking and a good personality h...
4,ENG-train-0004,"She does not hate you, she is just annoyed wit...",1.0,She does not hate you she is just annoyed with...,She doesn t hate you she is just annoyed


In [17]:
import random
from tqdm.notebook import tqdm

def get_list(df, n, score_sep = False):

    ints = random.sample(range(len(df)), n)

    if score_sep == False:

        sen_list = []
        for i in ints:
            prompt = f'[{df.iloc[i, 3]}, {df.iloc[i, 4]}] = {df.iloc[i, 2]}'
            sen_list.append(prompt)

        return (", ").join(sen_list)

    else:
        i = ints[0]
        sentences = f'[{df.iloc[i, 3]}, {df.iloc[i, 4]}]'
        score = df.iloc[i, 2]

        return sentences, score


print(get_list(df, 3), "\n\n", get_list(df, 3, score_sep = True))

[I wanted to know why Shiarra was being dragged into this and the petty excuse just made my day ,  As much as I want to strangle Shiarra I really like the other characters so I am sticking with it ] = 0.41, [Hold her neck with soft hands and kiss her on the back of her ear ,  Hold hands snuggle give her kisses and hug her tightly ] = 0.88, [A group of protesters carrying signs and flags walk down the street ,  A man with a cap and jeans is washing the window not on ground level ] = 0.16 

 ('[a skateboarder jumps over a set of stairs ,  A little girl swimming in a pool ]', 0.22)


In [None]:
%%time

regex_pattern = r"0(\.\d+)?|1(\.0+)?" # accept output as a float from 0 to 1

results = []

# Iterate through the loop
for i in tqdm(range(len(df))):
    sentence_pair = f'[{df.iloc[i, 3]}, {df.iloc[i, 4]}]'
    score = df.iloc[i, 2]

    query = f"""How semantically related are those two sentences on scale from 0 to 1: {sentence_pair}"""

    output = llama2 + f'''\
        Q: {query}
        A: {gen('relatedness', regex=regex_pattern)}'''

    # Append the results to the list
    results.append((score, float(output["relatedness"])))

# Print all predictions and scores at once
# for score, prediction in results:
#     print(f'Score: {score}, Prediction: {prediction}')

In [None]:
import numpy as np
from scipy.stats import spearmanr

results = pd.DataFrame(results, columns = ["Score", "Prediction"])

correlation, p_value = spearmanr(results["Score"], results["Prediction"])

print("Spearman Correlation Coefficient:", np.round(correlation, 2))