In [None]:
history = [12,12,12,'Miko: Hi. This is a test run of you, Lucid. How are you feeling?', 'Lucid:  I am doing well, thank you for asking. How can I assist you today?', 'Miko: Just talk with me. I am testing your talking and memory', 'Lucid:  Of course! What do you want to know about?']
print(f'This is what would have been saved:\n {history[-4:]}')

In [None]:
# Use a pipeline as a high-level helper
import requests

# For local streaming, the websockets are hosted without ssl - http://
HOST = 'localhost:5000'
URI = f'http://{HOST}/api/v1/generate'

# For reverse-proxied streaming, the remote will likely host with ssl - https://
# URI = 'https://your-uri-here.trycloudflare.com/api/v1/generate'


def llm(prompt):
    request = {
        'prompt': prompt,
        'max_new_tokens': 250,
        'do_sample': True,
        'temperature': 0.01,
        'top_p': 0.1,
        'typical_p': 1,
        'epsilon_cutoff': 0,  # In units of 1e-4
        'eta_cutoff': 0,  # In units of 1e-4
        'tfs': 1,
        'top_a': 0,
        'repetition_penalty': 1.18,
        'top_k': 40,
        'min_length': 0,
        'no_repeat_ngram_size': 0,
        'num_beams': 1,
        'penalty_alpha': 0,
        'length_penalty': 1,
        'early_stopping': False,
        'mirostat_mode': 0,
        'mirostat_tau': 5,
        'mirostat_eta': 0.1,
        'seed': -1,
        'add_bos_token': True,
        'truncation_length': 2048,
        'ban_eos_token': False,
        'skip_special_tokens': True,
        'stopping_strings': []
    }

    response = requests.post(URI, json=request)

    if response.status_code == 200:
        result = response.json()['results'][0]['text']
        print(prompt + result)


In [None]:
llm('How to build a robot?')

In [None]:
from transformers import AutoTokenizer, pipeline, logging
from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig

model_name_or_path = "TheBloke/llama2_7b_chat_uncensored-GPTQ"
model_basename = "gptq_model-4bit-128g"

use_triton = False

tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)

model = AutoGPTQForCausalLM.from_quantized(model_name_or_path,
        model_basename=model_basename,
        use_safetensors=True,
        trust_remote_code=True,
        device="cuda:0",
        use_triton=use_triton,
        quantize_config=None)

"""
To download from a specific branch, use the revision parameter, as in this example:

model = AutoGPTQForCausalLM.from_quantized(model_name_or_path,
        revision="gptq-4bit-32g-actorder_True",
        model_basename=model_basename,
        use_safetensors=True,
        trust_remote_code=True,
        device="cuda:0",
        quantize_config=None)
"""

prompt = "Tell me about AI"
prompt_template=f'''### HUMAN:
{prompt}

### RESPONSE:
'''

print("\n\n*** Generate:")

input_ids = tokenizer(prompt_template, return_tensors='pt').input_ids.cuda()
output = model.generate(inputs=input_ids, temperature=0.7, max_new_tokens=512)
print(tokenizer.decode(output[0]))

# Inference can also be done using transformers' pipeline

# Prevent printing spurious transformers error when using pipeline with AutoGPTQ
logging.set_verbosity(logging.CRITICAL)

print("*** Pipeline:")
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=512,
    temperature=0.7,
    top_p=0.95,
    repetition_penalty=1.15
)

print(pipe(prompt_template)[0]['generated_text'])


In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import transformers
import torch
from langchain import HuggingFacePipeline
model = "TheBloke/Luna-AI-Llama2-Uncensored-GPTQ"
#model = AutoModelForCausalLM.from_pretrained(r"C:\Users\User\Desktop\Projects\AIGF\LangChain\models\llm\tiiuae_falcon-7b-instruct")
tokenizer = AutoTokenizer.from_pretrained(model)
#input_ids = input_ids.to('cuda')

llm = HuggingFacePipeline.from_model_id(
    model_id=model,
    task="text-generation",
    model_kwargs={
        "temperature": 0.00,
        #"max_length": 2048,
        #"max_new_tokens":512,
        #"min_length": 20,
        "trust_remote_code": True,
        "device_map":"auto",
        "load_in_8bit":True,
        #'top_p': 0.1,
		#'typical_p': 1,
		#'repetition_penalty': 1.30,
        'no_repeat_ngram_size': 3,
        #'bad_words_ids':[[37]],
        #'num_beams':2, this breaks stuff, idk why
        
  },
    
)

In [None]:
llm('Hello. I am')

In [None]:
import chromadb
client = chromadb.PersistentClient(path="./test",)

text = ''
session = 1
serial_number = 1

from chromadb.utils import embedding_functions
chroma_client = chromadb.Client()
default_ef = embedding_functions.DefaultEmbeddingFunction()
sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(model_name="all-MiniLM-L6-v2")

"""
f = open("session_number.txt", "r")
session = int(f.read)
f.close()

f = open("session_number.txt", "w")
f.write(str(session+1))
f.close()
"""
"""
document_example = [text] 
metadata_example = [{'session':session}] 
id_example = [f'{session}-{number}']
"""

collection = client.get_or_create_collection(name="test",embedding_function=sentence_transformer_ef)
def save_history(text):
    global serial_number
    global session
    collection.add(
        documents=[text],
        metadatas=[{'session':session,'serial_number':serial_number}],
        ids=[f"{session}-{serial_number}"]
    )
    serial_number += 1
    return

def get_history(query):
    results = collection.query(
    query_texts=[query],
    n_results=1,
    )
    print(f'query results: {results}')
    
    serial_number = results['metadatas'][0][0]['serial_number']
    session = results['metadatas'][0][0]['session']
    
    lines_before_results = (collection.get(ids=[f'{session}-{serial_number-1}']))
    lines_before_results = lines_before_results['documents'][0]
    print(f'lines_before_results[\'documents\'][0]= {lines_before_results}')
    
    lines_after_results = (collection.get(ids=[f'{session}-{serial_number+1}']))
    lines_after_results = lines_after_results['documents'][0]
    print(f'lines_after_results[\'documents\'][0]= {lines_after_results}')
    
    results = f'{lines_before_results}\n{results}\n{lines_after_results}'
    return results


In [None]:
def save_history(text):
    global serial_number
    global session
    collection.add(
        documents=text,
        metadatas={'session':session,'serial_number':serial_number},
        ids=f"{session}-{serial_number}"
    )
    serial_number += 1
    return

In [None]:
lines_after_results = (collection.get(ids=[f'{session}-{serial_number+1}']))
print(lines_after_results)
lines_after_results = lines_after_results['documents'][0]
print(lines_after_results)

In [None]:
save_history('teststd6awtf8atwf87')

In [None]:
print(get_history('teststd6awtf8atwf87'))

In [None]:
results = collection.query(
query_texts=['Testing 2'],
n_results=1,
)

print(results['documents'][0][0])
"""results = {
    'ids': [['1-3']],
    'distances': [[0.39626064696324215]],
    'metadatas': [[{'serial_number': 3, 'session': 1}]],
    'embeddings': None,
    'documents': [['Testing 2']]
}"""

# Access 'serial_number' information
serial_number = results['metadatas'][0][0]['serial_number']

# Print the 'serial_number' information
print("Serial Number:", serial_number)


In [22]:
import chromadb
client = chromadb.Client()
from chromadb.utils import embedding_functions

serial_number = 1
session = 1

default_ef = embedding_functions.DefaultEmbeddingFunction()
sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(model_name="all-MiniLM-L6-v2")


"""
document_example = [text] 
metadata_example = [{'session':session}] 
id_example = [f'{session}-{number}']
"""

collection = client.get_or_create_collection(name="history", embedding_function=sentence_transformer_ef)
def save_history(text):
    global serial_number
    global session
    collection.add(
        documents=[text],
        metadatas=[{'session':session,'serial_number':serial_number}],
        ids=[f"{session}-{serial_number}"]
    )
    serial_number += 1
    return
"""
def get_history(query):
    results = collection.query(
    query_texts=[query],
    n_results=1,
    )
    print(f'query results: {results}')
    
    serial_number = results['metadatas'][0][0]['serial_number']
    session = results['metadatas'][0][0]['session']
    
    lines_before_results = (collection.get(ids=[f'{session}-{serial_number-1}']))
    lines_before_results = lines_before_results['documents'][0]
    
    lines_after_results = (collection.get(ids=[f'{session}-{serial_number+1}']))
    lines_after_results = lines_after_results['documents'][0]

    results = f'{lines_before_results}\n{results}\n{lines_after_results}'
    return results
"""

def get_history(query):
    results = collection.query(
    query_texts=[query],
    n_results=1,
    )
    #print(f'query results: {results}')
    try:
        serial_number = results['metadatas'][0][0]['serial_number']
        session = results['metadatas'][0][0]['session']
    except IndexError:
        pass
    try:
        results = results['documents'][0][0]
    except IndexError:
        results = ''
    return results


In [24]:
save_history('test')
print(get_history('test'))

query results: {'ids': [['1-2']], 'distances': [[0.0]], 'metadatas': [[{'serial_number': 2, 'session': 1}]], 'embeddings': None, 'documents': [['test']]}
test
