In [None]:
!pip install langchain
!pip install openai
!pip install qdrant_client
!pip install datasets
!pip install tiktoken
!pip install langchain-community
!pip install PyMuPDF
!pip install sentence-transformers

Collecting qdrant_client
  Downloading qdrant_client-1.12.1-py3-none-any.whl.metadata (10 kB)
Collecting grpcio-tools>=1.41.0 (from qdrant_client)
  Downloading grpcio_tools-1.67.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.3 kB)
Collecting portalocker<3.0.0,>=2.7.0 (from qdrant_client)
  Downloading portalocker-2.10.1-py3-none-any.whl.metadata (8.5 kB)
Collecting protobuf<6.0dev,>=5.26.1 (from grpcio-tools>=1.41.0->qdrant_client)
  Downloading protobuf-5.28.3-cp38-abi3-manylinux2014_x86_64.whl.metadata (592 bytes)
Collecting grpcio>=1.41.0 (from qdrant_client)
  Downloading grpcio-1.67.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.9 kB)
Collecting h2<5,>=3 (from httpx[http2]>=0.20.0->qdrant_client)
  Downloading h2-4.1.0-py3-none-any.whl.metadata (3.6 kB)
Collecting hyperframe<7,>=6.0 (from h2<5,>=3->httpx[http2]>=0.20.0->qdrant_client)
  Downloading hyperframe-6.0.1-py3-none-any.whl.metadata (2.7 kB)
Collecting hpack<5,>=4.0 (fro

In [None]:
from google.colab import drive
drive.mount('/content/drive')

0.New Cluster

In [None]:
import os
from dotenv import load_dotenv
from qdrant_client import QdrantClient
import openai


# Load the .env file
load_dotenv('/content/drive/MyDrive/NASA Hunch - Psych AI/Hunch.env')

# Access the environment variables
qdrant_key = os.getenv('QDRANT_KEY')
qdrant_url = os.getenv('QDRANT_URL')
langchain_api_key = os.getenv('LANGCHAIN_API_KEY')
openai.api_key = os.getenv('OPENAI_API_KEY')

print(qdrant_key)
print(qdrant_url)
print(langchain_api_key)
print(openai.api_key)

# Example: Using the Qdrant client with the retrieved credentials
qdrant_client = QdrantClient(url=qdrant_url, api_key=qdrant_key)


In [None]:
from qdrant_client.http import models

collection_config = models.VectorParams(
    size=384,  # dimensions of the vectors
    distance=models.Distance.COSINE
)

qdrant_client.create_collection(
    collection_name="Space Psychology",
    vectors_config=collection_config
)

UnexpectedResponse: Unexpected Response: 409 (Conflict)
Raw response content:
b'{"status":{"error":"Wrong input: Collection `Space Psychology` already exists!"},"time":0.080076387}'

In [None]:
# List all available collections
collections = qdrant_client.get_collections()
print(collections)

collections=[CollectionDescription(name='Space Psychology')]


Adding Data to RAG

In [None]:
import os
import fitz  # PyMuPDF
from dotenv import load_dotenv
from qdrant_client import QdrantClient
from sentence_transformers import SentenceTransformer

# Load environment variables
load_dotenv('.env')
qdrant_key = os.getenv('QDRANT_KEY')
qdrant_url = os.getenv('QDRANT_URL')

# Create Qdrant client
qdrant_client = QdrantClient(url=qdrant_url, api_key=qdrant_key)

# Load a pre-trained model for vectorization
model = SentenceTransformer('all-MiniLM-L6-v2')

# Function to extract text from a PDF file
def extract_text_from_pdf(pdf_path):
    text = ""
    with fitz.open(pdf_path) as pdf:
        for page in pdf:
            text += page.get_text()
    return text

# Path to your PDF files
pdf_files = ["/content/THE BURDEN OF SPACE EXPLORATION ON THE MENTAL HEALTH OF ASTRONAUTS.pdf",
             '/content/Applications of methods of psychological support developed for astronauts for use in medical settings.pdf',
             '/content/Psychological Issues Relevant to Astronaut Selection for Long-Duration Space Flight_ A Review of the Literature.pdf',
             '/content/Space Missions Psychological Issues.pdf']  # Add paths to your PDF files

# Prepare data for Qdrant
points = []
for idx, pdf_file in enumerate(pdf_files):
    # Extract text from the PDF
    text = extract_text_from_pdf(pdf_file)

    # Generate vector representation
    vector = model.encode(text).tolist()  # Convert to a list for Qdrant

    # Create a point for Qdrant with a unique integer ID
    point = {
        'id': idx,  # Use an integer ID
        'vector': vector,
        'payload': {
            'text': text,  # Store the extracted text
            'pdf_file': pdf_file,  # Store the file path or name
        }
    }
    points.append(point)

# Upsert the vectors into Qdrant
qdrant_client.upsert(collection_name="Space Psychology", points=points)

print("Research papers have been added to the collection.")


Research papers have been added to the collection.


In [None]:
query_vector = model.encode("mood assessment in space")  # Example query
results = qdrant_client.search(
    collection_name="Space Psychology",
    query_vector=query_vector,
    limit=5  # Number of results to return
)
print(results)



Linking to LLM

In [None]:
import re

def clean_document(text):
    # Remove references section based on common patterns
    cleaned_text = re.sub(r'(References?|Bibliography).*?$', '', text, flags=re.DOTALL)
    return cleaned_text.strip()

In [None]:
def extract_relevant_sections(text):
    sections = text.split("\n\n")  # Split by paragraphs or any other delimiter
    relevant_sections = []

    for section in sections:
        if "Introduction" in section or "Conclusion" in section or "Abstract" in section:  # Keep only specific sections
            relevant_sections.append(section)

    return "\n\n".join(relevant_sections)

In [None]:
def retrieve_relevant_documents(query, collection_name="Space Psychology", top_k=1):
    # Encode the query using the same model used for indexing
    query_vector = model.encode(query).tolist()

    # Perform a search in Qdrant
    search_result = qdrant_client.search(
        collection_name=collection_name,
        query_vector=query_vector,
        limit=top_k  # Retrieve top K most relevant documents
    )

    # Extract the text from the retrieved documents and clean them
    retrieved_texts = [clean_document(hit.payload['text']) for hit in search_result]
    return retrieved_texts

In [None]:
def generate_response_with_context(user_query, retrieved_texts):
    # Combine the retrieved documents into context
    context = "\n\n".join(retrieved_texts)

    # Create a chat-based prompt
    messages = [
        {"role": "system", "content": "You are an expert on astronauts' mental health."},
        {"role": "assistant", "content": f"The following context is from research papers:\n{context}"},
        {"role": "user", "content": user_query}
    ]

    # Generate a response using the Chat API
    response = openai.chat.completions.create(
        model="gpt-4",  # You can use 'gpt-3.5-turbo' or 'gpt-4'
        messages=messages,
        max_tokens=500,  # Adjust the token limit based on your needs
        temperature=0.7
    )

    # Access the content of the first choice correctly
    return response.choices[0].message.content  # Corrected access to content

In [None]:
import openai
from qdrant_client import QdrantClient
from qdrant_client.http import models
from sentence_transformers import SentenceTransformer
import os

# Load environment variables for API keys
openai.api_key = os.getenv('OPENAI_API_KEY')  # Your GPT API key

# Initialize Qdrant Client with the URL and key
qdrant_client = QdrantClient(
    url=os.getenv('QDRANT_URL'),
    api_key=os.getenv('QDRANT_KEY')
)

# Load the model used for creating query vectors (same one used for indexing)
model = SentenceTransformer('all-MiniLM-L6-v2')


# Example usage
user_query = "I have been feeling down, what should I do?"

# Retrieve relevant documents from Qdrant
retrieved_documents = retrieve_relevant_documents(user_query)

# Generate a response using the retrieved context
final_response = generate_response_with_context(user_query, retrieved_documents)

print("Response from the model:")
print(final_response)

Response from the model:
Astronauts face several psychological challenges during long-duration space missions, which can potentially lead to psychological problems. 

1. Physical Factors: Microgravity can impact cell structure and differentiation, as well as the immune and central nervous system. The central nervous system needs to adapt to microgravity since different somatosensory, visual, and vestibular information need to be processed. Space radiation also represents a risk during space missions, leading to several changes in the central nervous system, like shifts in brain fluids, altered sensory perception, and neurovestibular problems.

2. Habitability: Factors of "habitability", referring to the main characteristics of the spacecraft, can contribute to the overall well-being of astronauts. These include light, noise, vibration, and temperature. 

3. Individual Issues: Isolation from family, friends, and life on Earth, coupled with hard work continuously monitored, can lead to p