In [3]:

from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [4]:
!pip install -U sentence-transformers
!pip install chromadb
!pip install gradio
!pip install evaluate

Collecting chromadb
  Downloading chromadb-0.5.16-py3-none-any.whl.metadata (6.8 kB)
Collecting build>=1.0.3 (from chromadb)
  Downloading build-1.2.2.post1-py3-none-any.whl.metadata (6.5 kB)
Collecting chroma-hnswlib==0.7.6 (from chromadb)
  Downloading chroma_hnswlib-0.7.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (252 bytes)
Collecting fastapi>=0.95.2 (from chromadb)
  Downloading fastapi-0.115.4-py3-none-any.whl.metadata (27 kB)
Collecting uvicorn>=0.18.3 (from uvicorn[standard]>=0.18.3->chromadb)
  Downloading uvicorn-0.32.0-py3-none-any.whl.metadata (6.6 kB)
Collecting posthog>=2.4.0 (from chromadb)
  Downloading posthog-3.7.0-py2.py3-none-any.whl.metadata (2.0 kB)
Collecting onnxruntime>=1.14.1 (from chromadb)
  Downloading onnxruntime-1.19.2-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (4.5 kB)
Collecting opentelemetry-exporter-otlp-proto-grpc>=1.2.0 (from chromadb)
  Downloading opentelemetry_exporter_otlp_proto_grpc-1.27.0-py3

In [5]:

from sentence_transformers import SentenceTransformer

class Embedder():
    def __init__(self):
        self.model = SentenceTransformer('sentence-transformers/paraphrase-multilingual-mpnet-base-v2')

    def __call__(self, input):
        input = self.model.encode(input).tolist()
        return input


  from tqdm.autonotebook import tqdm, trange


In [6]:
import chromadb
import numpy as np
from tqdm import tqdm

class CustomChromaDB:
    def __init__(self):
        """
        Initialize the ChromaDB client with a specified path and embedding model.
        """
        self.embedding_model = Embedder()  # Assuming `Embedder` is a defined class or function for generating embeddings
        self.db_client = chromadb.PersistentClient(path='/content/drive/MyDrive/AT-lab5/ChromaDB')
        self.data_collection = self.db_client.get_or_create_collection(
            name="custom_coll",
            embedding_function=self.embedding_model
        )

    def upload_data(self, fragments, metadata, batch_size=20000):
        """
        Upload data fragments and their corresponding metadata to the ChromaDB collection in batches.

        :param fragments: List of text fragments to be uploaded.
        :param metadata: List of metadata entries corresponding to each fragment.
        :param batch_size: The number of fragments to process in one batch.
        """
        # Generate unique identifiers for the fragments
        fragment_ids = [str(index) for index in range(len(metadata))]

        total_batches = (len(fragments) + batch_size - 1) // batch_size  # Calculate the total number of batches
        end = 0
        for batch_idx in tqdm(range(total_batches), desc="Uploading data"):
            start = batch_idx * batch_size
            end = start + batch_size
            batch_fragments = fragments[start:end]
            batch_metadata = metadata[start:end]
            batch_ids = fragment_ids[start:end]

            # Add the batch of fragments to the collection
            self.data_collection.add(
                documents=batch_fragments,
                embeddings=self.embedding_model(batch_fragments),
                metadatas=batch_metadata,
                ids=batch_ids
            )

        # Add any remaining fragments that didn't fit into the last full batch
        remaining_fragments = fragments[end:]
        if remaining_fragments:
            self.data_collection.add(
                documents=remaining_fragments,
                embeddings=self.embedding_model(remaining_fragments),
                metadatas=metadata[end:],
                ids=fragment_ids[end:]
            )

        print("Dataset successfully uploaded to ChromaDB.")

    def search(self, text, count=1):
        """
        Search for the most similar documents to the given text in the ChromaDB collection.

        :param text: The query text to search for.
        :param count: The number of results to return.
        :return: A dictionary containing the search results.
        """
        vector = self.embedding_model(text)
        result = self.data_collection.query(
            query_embeddings=vector,
            n_results=count,
            include=['distances', 'embeddings', 'documents', 'metadatas'],
        )
        return result

In [7]:
cdb = CustomChromaDB()
questions = [
    ['How much money?', 2]
]
count = 0
for question in questions:
    result = cdb.search(question[0], question[1])
    print(f"-----------{count}-----------")
    print(f"Question: {question[0]}")
    print(f"Answer: {result['documents']}")
    print()
    count +=1

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/229 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/122 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/4.13k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/723 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.11G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/402 [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.08M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]



1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

-----------Вопрос №0-----------
Вопрос: How much money?
Ответы: [['for 450-600 million. If I have that much money to throw around in the first place, you betcha Im goi', 't its around 500, which is just about double what my wallet will allow me to spend. And, for 500, I ']]


In [8]:
!pip install requests



In [None]:
import requests
import json

def make_post_request(prompt_text):
    # Define the request URL (Note: Include the actual API key in the URL)
    url = 'https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash-latest:generateContent'

    # Set request headers
    headers = {
        'Content-Type': 'application/json',
    }

    # Build the request payload
    payload = {
        "contents": [
            {
                "parts": [
                    {
                        "text": prompt_text
                    }
                ]
            }
        ]
    }

    try:
        # Send the POST request
        response = requests.post(url, headers=headers, data=json.dumps(payload))

        # Print the response object
        print(response)

        if response.status_code == 200:
            # If the request is successful, print the JSON response
            print(response.json())

            # Return the generated content
            return response.json()['candidates'][0]['content']['parts'][0]['text']
        else:
            # Print error message if the request fails
            print(f"Request failed with status code {response.status_code}: {response.text}")
    except requests.exceptions.RequestException as e:
        # Catch request exceptions and print the error
        print(f"An error occurred during the request: {e}")

# Example call of the function
# result = make_post_request("Hello, how are you?")
# print(result)

In [1]:
!pip install bert_score

Collecting bert_score
  Downloading bert_score-0.3.13-py3-none-any.whl.metadata (15 kB)
Downloading bert_score-0.3.13-py3-none-any.whl (61 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.1/61.1 kB[0m [31m488.6 kB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: bert_score
Successfully installed bert_score-0.3.13


In [None]:
def evaluate_question(question, desired_answer):
    result = cdb.search(question, 3)
    prompt = f"Context: {' '.join(result['documents'][0])}." + f"Question: {question}"
    answer = make_post_request(prompt)
    metric = bertscore.compute(predictions=[answer], references=[desired_answer], model_type="distilbert-base-uncased")
    print(answer)

In [2]:
evaluate_question('What is a computer?', 'electronic device')

A computer is an electronic device that manipulates information or data. It has the ability to store, retrieve, and process data. You can use it with various software packages, including operating systems, to perform a wide range of tasks.


In [3]:
evaluate_question('What is Artificial Intelligence?', 'think like humans')

Artificial Intelligence (AI) refers to the simulation of human intelligence in machines that are programmed to think and learn like humans, performing tasks such as problem-solving, reasoning, and perception. The field involves research and development by scientists and institutions, including M.I.T. and the University of Georgia.


In [5]:
evaluate_question('Who is the best basketball player?', 'Michael Jordan')

The definition of the "best" basketball player can vary, but it often refers to someone who contributes significantly to their team's success, not just through scoring but also through defense, playmaking, and leadership. Names like Michael Jordan are frequently mentioned when discussing the greatest players of all time.


In [6]:
evaluate_question('Who was the first person in space?', 'Yuri Gagarin')

The first person in space was Yuri Gagarin, a Soviet cosmonaut. He completed a single orbit of Earth on April 12, 1961, aboard the Vostok 1 spacecraft. This historic achievement marked the beginning of human spaceflight.


In [7]:
evaluate_question('What is the meaning of life?', 'meanless')

The meaning of life is a philosophical question with many interpretations. It can be about finding personal significance and purpose, contributing positively to the world, or simply existing in a unique way. Different people and cultures may have different views on what gives life its meaning.


In [None]:
from evaluate import load
bertscore = load("bertscore")
predictions = ["hello there", "general kenobi"]
references = ["hello there", "general kenobi"]
results = bertscore.compute(predictions=predictions, references=references, lang="en")

In [None]:
ex = ['How can I optimize the performance of my Python code?', 'What are the main differences between machine learning and deep learning?' ]
demo = gr.ChatInterface(fn=echo, examples=ex, title="AI Bot")
demo.launch(share=True)