### **1. Import relevant packages**

In [63]:
import os
import json
import openai
import pandas as pd
import qdrant_client
from tqdm import tqdm
from getpass import getpass
from dotenv import load_dotenv
from datasets import load_dataset
from typing import Optional, List, Tuple
from qdrant_client import QdrantClient, models
from qdrant_client.http.models import PointStruct
from langchain_core.language_models import BaseChatModel

In [64]:
load_dotenv()

True

### **2. Setup your openai key**

In [65]:
if not (openai_api_key := os.environ.get("OPENAI_API_KEY")):
    openai_api_key = getpass("🔑 Enter your OpenAI API key: ")
openai.api_key = openai_api_key
os.environ["OPENAI_API_KEY"] = openai_api_key

### **3.  Retrieve the documents / dataset to be used**

In [66]:

df = pd.read_csv("coindesk_news.csv")
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 25 entries, 0 to 24
Data columns (total 4 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   url          25 non-null     object
 1   title        25 non-null     object
 2   description  25 non-null     object
 3   date         25 non-null     object
dtypes: object(4)
memory usage: 932.0+ bytes


### **4. Process dataset as langchain document for further processing**

In [67]:
from tqdm import tqdm
from langchain.docstore.document import Document as LangchainDocument

# Corrected version matching your dataset columns
langchain_docs = [
    LangchainDocument(
        page_content=f"{row['title']}\n\n{row['description']}",  # Changed 'summary' to 'description'
        metadata={
            "source": row['url'],  # Changed 'click_url' to 'url'
            "date": row['date']    # Added date to metadata if you want to preserve it
        }
    )
    for _, row in tqdm(df.iterrows(), total=len(df))
]

print(f"Total documents: {len(langchain_docs)}")

100%|██████████| 25/25 [00:00<00:00, 8332.61it/s]

Total documents: 25





### **5. Document chunk processing**

Processing each document with desired **TEXT_SPLITTER_ALGO , CHUNK_SIZE , CHUNK_OVERLAP** etc

In [68]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

## Check https://python.langchain.com/v0.2/docs/concepts/#text-splitters for supported text splitters
def split_documents(documents,chunk_size,chunk_overlap):
    docs_processed = []
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap,
        add_start_index=True,
        separators=["\n\n", "\n", ".", " ", ""],
    )
    for doc in documents:
        docs_processed += text_splitter.split_documents([doc])
    return docs_processed

In [69]:
CHUNK_SIZE=512
CHUNK_OVERLAP=50
processed_docs = split_documents(langchain_docs,CHUNK_SIZE,CHUNK_OVERLAP)
len(processed_docs)

25

### **6. Embedding Generation using Fastembed**
[FastEmbed](https://qdrant.github.io/fastembed/) is a lightweight, fast, Python library built for embedding generation.

In [70]:
## Declaring the intended Embedding Model with Fastembed
from fastembed.embedding import TextEmbedding
pd.set_option("display.max_colwidth", None)
a = pd.DataFrame(TextEmbedding.list_supported_models()[6])
print(a["model"].to_string())

hf    sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2


In [71]:
##Initilising embedding model
## Using Default Model - BAAI/bge-small-en-v1.5

## For custom model supported by Fastembed
embedding_model = TextEmbedding(model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2", max_length=512)
#embedding_model = TextEmbedding(model_name="sentence-transformers/all-MiniLM-L6-v2", max_length=384)

embedding_model.model_name

'sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2'

### **7. Setting up our vector store - Qdrant**

Set up the qdrant client and then create a collection so that our document embeddings can be stored.

In [72]:

##Uncomment to initialise qdrant client in memory
client = qdrant_client.QdrantClient(
   location=":memory:",
)

##Uncomment below to connect to Qdrant Cloud
# client = qdrant_client.QdrantClient(
#     os.environ.get("QDRANT_URL"),
#     api_key=os.environ.get("QDRANT_API_KEY"),
# )

## Uncomment below to connect to local Qdrant
# client = qdrant_client.QdrantClient("http://localhost:6333")

In [73]:
## Collection name that will be used throughtout in the notebook
COLLECTION_NAME = "cb_db_5"

In [74]:
## General Collection level operations

## Get information about existing collections
client.get_collections()

## Get information about specific collection
#collection_info = client.get_collection(COLLECTION_NAME)
#print(collection_info)

## Deleting collection , if need be
#client.delete_collection(COLLECTION_NAME)

CollectionsResponse(collections=[])

### **8. Processing chunks into text to be processed by [FASTEMBED](https://qdrant.github.io/fastembed/)**

In [75]:
def process_document_embeddings(docs_processed):
    docs_contents = []
    docs_metadatas = []

    for doc in docs_processed:
        if hasattr(doc, 'page_content') and hasattr(doc, 'metadata'):
            docs_contents.append(doc.page_content)
            docs_metadatas.append(doc.metadata)
        else:
            # Handle the case where attributes are missing
            print("Warning: Some documents do not have 'page_content' or 'metadata' attributes.")
    
    print("data-size : ",len(docs_processed))
    print("content : ",len(docs_contents))
    print("metadata : ",len(docs_metadatas))
    return (docs_contents,docs_metadatas)

### **9. Adding document chunks into Qdrant Collection**

In [76]:
docs_contents , docs_metadatas = process_document_embeddings(docs_processed=langchain_docs)

data-size :  25
content :  25
metadata :  25


In [77]:
import numpy as np
embeddings: list[np.ndarray] = embedding_model.embed(docs_contents)

In [78]:
vectors=[v.tolist() for v in embeddings]

In [79]:
np.save('vectors_news.npy', vectors)

Load

In [80]:
import os
import json
import openai
import pandas as pd
import qdrant_client
from tqdm import tqdm
from getpass import getpass
from dotenv import load_dotenv
from datasets import load_dataset
from typing import Optional, List, Tuple
from qdrant_client import QdrantClient, models
from qdrant_client.http.models import PointStruct
from langchain_core.language_models import BaseChatModel
import numpy as np


In [81]:

client = qdrant_client.QdrantClient(
   location=":memory:",
)
COLLECTION_NAME = "cb_db_5"
client.get_collections()


CollectionsResponse(collections=[])

In [82]:
vectors = np.load('vectors_news.npy')

In [83]:
vectors.shape

(25, 384)

In [84]:
from qdrant_client import QdrantClient
from qdrant_client.models import VectorParams, Distance


if not client.collection_exists(COLLECTION_NAME):
    client.create_collection(
        collection_name=COLLECTION_NAME,
        vectors_config=VectorParams(size=384, distance=Distance.COSINE),
    )


In [85]:
client.upsert(
    collection_name=COLLECTION_NAME,
    points=models.Batch(
        ids=list(range(vectors.shape[0])),
        payloads=[
            {
                "content": docs_contents[i],
                "metadata": docs_metadatas[i]
            }
            for i in range(len(docs_contents))
        ],
        vectors=vectors,
    ),
)


UpdateResult(operation_id=0, status=<UpdateStatus.COMPLETED: 'completed'>)

In [86]:
## Ensuring we have expected number of document chunks
client.count(collection_name=COLLECTION_NAME)

CountResult(count=25)

### **10. Searching for the document**

In [87]:
query_text = "BTC"
embeded_text = embedding_model.embed(query_text)
a = next(embeded_text)
print(len(a))

384


In [88]:
search_result = client.query_points(
    collection_name=COLLECTION_NAME,
    query=a,
    limit=6,
)
[print(search_result.points[i].payload['content'] + '\ndate - ',search_result.points[i].payload['metadata']['date']) for i in range(len(search_result.points))]


CoinDesk Analyst Advises UK Crypto Firm to Set Up Bitcoin Treasury

Coinsilium raised £1.25 million to help establish the BTC treasury, amid record trading volume.
date -  2025-05-16
SEC Is Probing Coinbase Over User Number Misstatement Concern

The investigation began under former SEC Chair Gary Gensler and has continued under the current administration, according to the NYT, which first reported the story.
date -  2025-05-15
KuCoin Enhances Point-of-Sale Mobile Payments With AEON

KuCoin is exploring how it can accelerate crypto adoption by allowing users to easily spend it when carrying out day-to-day transactions
date -  2025-05-20
Cantor Equity Partners Discloses $458M Bitcoin Acquisition

The bitcoin treasury company made the purchase via Tether at an average price of $95,320 per BTC.
date -  2025-05-13
Crypto Exchanges Flock to List NXPC, Token Surges 115% on $1B Volume

The NXPC token is developed by NEXPACE, the blockchain arm of South Korean video game developer Nexon.
date -

[None, None, None, None, None, None]

## **11. Llama**

In [172]:
from llama_cpp import Llama

# Инициализация LLaMA
llm = Llama(
    model_path="llama-2-7b-chat.Q5_K_M.gguf",
    temperature=0.1,
    context_window=3900,
    n_threads=1,
    max_length=1024,
    n_gpu_layers=40,
    # top_p=0.95,              # сгладить распределение
    max_tokens=512,      # > привычных 256
    # repeat_penalty=1.05,     # помягче
    # stop=[]                  # отключаем дефолтные стоп-строки

    # n_ctx=4096,               # обязательно
    # n_batch=512,              # увеличиваем
    # logits_all=True,          # чтобы получать все логиты
    # verbose=True,

    # chat_format="llama-2",  # Ключевой момент
    # logit_bias={2: -100},

)
llm.metadata

llama_model_loader: loaded meta data with 19 key-value pairs and 291 tensors from llama-2-7b-chat.Q5_K_M.gguf (version GGUF V2)
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = LLaMA v2
llama_model_loader: - kv   2:                       llama.context_length u32              = 4096
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   4:                          llama.block_count u32              = 32
llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 11008
llama_model_loader: - kv   6:                 llama.rope.dimension_count u32              = 128
llama_model_loader: - kv   7:                 llama.attention.head_count u32              = 

{'general.name': 'LLaMA v2',
 'general.architecture': 'llama',
 'llama.context_length': '4096',
 'llama.rope.dimension_count': '128',
 'llama.embedding_length': '4096',
 'llama.block_count': '32',
 'llama.feed_forward_length': '11008',
 'llama.attention.head_count': '32',
 'tokenizer.ggml.eos_token_id': '2',
 'general.file_type': '17',
 'llama.attention.head_count_kv': '32',
 'llama.attention.layer_norm_rms_epsilon': '0.000001',
 'tokenizer.ggml.model': 'llama',
 'general.quantization_version': '2',
 'tokenizer.ggml.bos_token_id': '1',
 'tokenizer.ggml.unknown_token_id': '0'}

In [180]:
SYSTEM_PROMPT = """You are a financial analyst specializing in cryptocurrencies. You provide deep, reasoned, and cautious answers. You never guess if data is insufficient. Only write wether the price goes up or down, or stays the same."""

# Векторизация запроса
query_text = "What will happen to the BTC price in the coming week?"
embedded_text = embedding_model.embed(query_text)
query_vector = next(embedded_text)

# Поиск релевантных документов в QDrant
search_result = client.query_points(
    collection_name=COLLECTION_NAME,
    query=query_vector,
    limit=3,
)

# Извлекаем тексты из payload
retrieved_chunks = [
    point.payload.get("content", "") for point in search_result.points
]

# Собираем контекст
retrieved_context = "\n\n".join(retrieved_chunks)
# Формируем final prompt
rag_prompt = f"""[INST] 
<<SYS>>
{SYSTEM_PROMPT}
<</SYS>>
Based on the context below, give your best estimate or analysis.

Context:
{retrieved_context}

Question:
{query_text}

Be concise and confident. Include reasoning. Write the full answer.
[/INST]"""





In [181]:
output = llm(
    prompt=rag_prompt,
    echo=False,
    max_tokens=512,
    temperature=0.1,
    # top_p=1.0,
)
print(output['choices'][0]['text'])

Llama.generate: 48 prefix-match hit, remaining 244 prompt tokens to eval
llama_perf_context_print:        load time =   13792.18 ms
llama_perf_context_print: prompt eval time =   37438.35 ms /   244 tokens (  153.44 ms per token,     6.52 tokens per second)
llama_perf_context_print:        eval time =  138322.17 ms /   219 runs   (  631.61 ms per token,     1.58 tokens per second)
llama_perf_context_print:       total time =  176360.20 ms /   463 tokens


  Based on the current market trends and events, my analysis suggests that the BTC price is likely to increase in the coming week. Here are the reasons behind my prediction:
1. Coinsilium's £1.25 million fundraising: The recent fundraising by Coinsilium, a UK-based crypto firm, to establish a Bitcoin treasury, indicates increased institutional interest in the cryptocurrency. This could lead to higher demand and, subsequently, higher prices.
2. Cantor Equity Partners' Bitcoin acquisition: The news of Cantor Equity Partners acquiring $458 million worth of Bitcoin at an average price of $95,320 per BTC is a positive development for the cryptocurrency. It suggests that institutional investors are confident about the long-term potential of Bitcoin, which could drive up prices.
3. KuCoin's mobile payments enhancement: KuCoin's decision to enhance point
