In [1]:
from langchain.text_splitter import RecursiveCharacterTextSplitter, TokenTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from transformers import AutoModelForCausalLM, AutoTokenizer

import weaviate
from weaviate.classes.init import Auth
from weaviate.classes.config import Configure
import os
import json
from dotenv import load_dotenv
load_dotenv()

  from .autonotebook import tqdm as notebook_tqdm


True

In [None]:

# Replace with the correct model name for Llama 2 2B (check HuggingFace Model Hub for the exact name)
model_name = "EleutherAI/gpt-neo-2.7B"

tokenizer = AutoTokenizer.from_pretrained(model_name)
llm = AutoModelForCausalLM.from_pretrained(model_name)
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

In [29]:
# Best practice: store your credentials in environment variables
weaviate_url = os.getenv("WEAVIATE_REST_ENDPOINT")
weaviate_api_key = os.getenv("WEAVIATE_API_KEY")

client = weaviate.connect_to_weaviate_cloud(
    cluster_url=weaviate_url,
    auth_credentials=Auth.api_key(weaviate_api_key),
)

print(client.is_ready())  # Should print: `True`


True


In [5]:
raw_data = client.collections.create(
    name="RawPsychologyData",
    vectorizer_config=Configure.Vectorizer.text2vec_weaviate(), # Configure the Weaviate Embeddings integration
    generative_config=Configure.Generative.cohere()             # Configure the Cohere generative AI integration
)

In [6]:
section_page_data = json.load(open("../Data/page_section.json", "r"))
section_page_data[0]

  section_page_data = json.load(open("../Data/page_section.json", "r"))


{'page': 7,
 'heading': 'Introduction to Psychology',
 'data': 'Page 7Introduction to Psychology1FIGURE 1.1 Psychology is the scientific study of mind and behavior. (credit "background": modification of work byNattachai Noogure; credit "top left": modification of work by Peter Shanks; credit "top middle": modification of workby "devinf"/Flickr; credit "top right": modification of work by Alejandra Quintero Sinisterra; credit "bottom left":modification of work by Gabriel Rocha; credit "bottom middle-left": modification of work by Caleb Roenigk; credit"bottom middle-right": modification of work by Staffan Scherz; credit "bottom right": modification of work by CzechProvincial Reconstruction Team)INTRODUCTIONClive Wearing is an accomplished musician who lost his ability to form new memories whenhe became sick at the age of 46. While he can remember how to play the piano perfectly, he cannot rememberwhat he ate for breakfast just an hour ago (Sacks, 2007). James Wannerton experiences a tast

In [9]:
splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=200)

In [12]:
raw_psychology_data = client.collections.get("RawPsychologyData")

with raw_psychology_data.batch.fixed_size(batch_size=200) as batch:
    for d in section_page_data:
        # Split the text into smaller chunks
        chunks = splitter.split_text(d["data"])
        # Add each chunk as a separate object in the batch
        for chunk in chunks:
            batch.add_object(
                {
                    "text": chunk,
                    "page": d["page"],
                    "heading": d["heading"],
                }
            )

In [17]:
questions = json.load(open("../Data/queries.json", "r"))

  questions = json.load(open("../Data/queries.json", "r"))


In [18]:
questions[0]

{'query_id': '1', 'question': 'What is the scientific method in psychology?'}

In [25]:
raw_psychology_data.query.fetch_objects(limit=10)  # or use pagination for large datasets

QueryReturn(objects=[Object(uuid=_WeaviateUUIDInt('0011a6f3-3780-433f-9231-055c15c6f623'), metadata=MetadataReturn(creation_time=None, last_update_time=None, distance=None, certainty=None, score=None, explain_score=None, is_consistent=None, rerank_score=None), properties={'heading': '16.2 Types of Treatment', 'page': 611.0, 'text': 'and return to his relaxed state. They repeat this scenario over and over until Jayden can imagine himselfpressing the call button without anxiety. Over time the therapist and Jayden use progressive relaxation andimagination to proceed through all of the situations on Jayden’s hierarchy until he becomes desensitized toeach one. After this, Jayden and the therapist begin to practice what he only previously envisioned in therapy,gradually going from pressing the button to actually riding an elevator. The goal'}, references=None, vector={}, collection='RawPsychologyData'), Object(uuid=_WeaviateUUIDInt('00221a06-8ed8-4cc9-bae8-d2571b9f8142'), metadata=MetadataRe

In [23]:
response = raw_psychology_data.generate.near_text(
    query="psychology",
    limit=3,
)
response.generated

''

''

In [26]:
client.close()  # Free up resources