In [3]:
from mlx_lm import load

# Define your model to import
model_name = "mlx-community/Meta-Llama-3-8B-Instruct-4bit"

# Loading model
model, tokenizer = load(model_name)

Fetching 6 files: 100%|██████████| 6/6 [00:00<00:00, 40265.32it/s]


In [4]:
import numpy as np

import chromadb
from chromadb.utils import embedding_functions

from chromadb.config import Settings

chroma_client = chromadb.HttpClient(host='localhost', port = 8083, settings=Settings(allow_reset=True, anonymized_telemetry=False))

sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(model_name="all-MiniLM-L6-v2")

collection = chroma_client.get_or_create_collection(name="transcripts_mililm_l6_v3", embedding_function=sentence_transformer_ef)



In [5]:
#query one document
results = collection.query(
    query_texts=["Tesla transcripts for the year 2024"],
    n_results=10,
    where = {'$and':[
              {'symbol': {
                       "$in": ['TSLA']}
              }, 
              {'year': {
                        "$gt": 2023}
              }]
         },
    include=['metadatas', 'documents', 'embeddings'],
    where_document={"$contains":"2024"}
)

In [6]:
results

{'ids': [['2024Q2TSLA', '2024Q1TSLA']],
 'distances': None,
 'embeddings': [[[-0.0822194516658783,
    0.009658602997660637,
    0.052224915474653244,
    0.02644718624651432,
    0.04375709965825081,
    -0.012698511593043804,
    -0.000993350287899375,
    0.07763923704624176,
    0.046971507370471954,
    -0.026099272072315216,
    0.003186175599694252,
    0.0008456492214463651,
    0.04624375328421593,
    -0.07265063375234604,
    0.011303565464913845,
    0.02751617692410946,
    0.0003328380116727203,
    -0.1211201548576355,
    -0.06751690059900284,
    0.05124247446656227,
    -0.03266194090247154,
    -0.013144985772669315,
    -0.0125283133238554,
    0.002807856537401676,
    0.04040118679404259,
    -0.01640661247074604,
    -0.03554452955722809,
    0.05132472142577171,
    -0.026458900421857834,
    -0.032147157937288284,
    -0.019565070047974586,
    0.045829273760318756,
    0.03245055675506592,
    0.0004907945403829217,
    -0.02981671132147312,
    0.030537785962

In [7]:
from mlx_lm import generate

In [9]:
def data_create(topic:str,question:str,answer:str):
    chat = {"messages": [
  {"role": "user", "content": f"Instruct: Summarize the following conversation and  and also give topic to it\n.{question}"},
  {"role": "assistant", "content": f"{answer}.\n Topic is : {topic}"},
]}
    output = chat 
    return output

In [None]:
input_ids = tokenizer.apply_chat_template(messages, add_generation_prompt=True)

In [7]:
# Define the role of the chatbot
chatbot_role = "You are a math professor chatbot. You answer with clear step by step solutions."

# Define a mathematical problem
math_question = "Please compute the integral of f(x) = exp(-x^2) over the x domain of (- infinity, infinity), use LaTeX."

# Set up the chat scenario with roles
messages = [
    {"role": "system", "content": chatbot_role},
    {"role": "user", "content": math_question}
]

# Apply the chat template to format the input for the model
input_ids = tokenizer.apply_chat_template(messages, add_generation_prompt=True)

# Decode the tokenized input back to text format to be used as a prompt for the model
prompt = tokenizer.decode(input_ids)

# Generate a response using the model
response = generate(model, tokenizer, max_tokens=512, prompt=prompt)