In [3]:
import os
from dotenv import load_dotenv

load_dotenv()

True

In [1]:
vector_store_path = "hub://secufibre/space_exploration_v1"

In [2]:
from deeplake.core.vectorstore.deeplake_vectorstore import VectorStore
import deeplake.util

ds = deeplake.load(vector_store_path)

This dataset can be visualized in Jupyter Notebook by ds.visualize() or at https://app.activeloop.ai/secufibre/space_exploration_v1



 

hub://secufibre/space_exploration_v1 loaded successfully.



 

In [4]:
vector_store = VectorStore(path=vector_store_path)

Deep Lake Dataset in hub://secufibre/space_exploration_v1 already exists, loading from the storage


In [5]:
import openai

def embedding_function(texts, model="text-embedding-3-small"):
    if isinstance(texts, str):
        texts = [texts]
    texts = [t.replace("\n", " ") for t in texts]
    return [data.embedding for data in openai.embeddings.create(input=texts, model=model).data]

In [6]:
def get_user_prompt():
    return input("Enter your search query: ")

user_prompt = "Tell me about space exploration on the Moon and Mars"

In [7]:
search_results = vector_store.search(embedding_data=user_prompt,
                                     embedding_function=embedding_function)

In [8]:
print(search_results)

{'id': ['f34e422e-1050-11f0-9390-107c61642715', 'f34e8f86-1050-11f0-9390-107c61642715', 'f34e4bde-1050-11f0-9390-107c61642715', 'f34e9a8a-1050-11f0-9390-107c61642715'], 'metadata': [{'source': 'llm.txt'}, {'source': 'llm.txt'}, {'source': 'llm.txt'}, {'source': 'llm.txt'}], 'text': ["Exploration of space, planets, and moons For broader coverage of this topic, see Exploration . Buzz Aldrin taking a core sample of the Moon during the Apollo 11 mission Self-portrait of Curiosity rover on Mars 's surface Part of a series on Spaceflight History History of spaceflight Space Race Timeline of spaceflight Space probes Lunar missions Mars missions Applications Communications Earth observation Exploration Espionage Military Navigation Colonization Habitation Exploration Telescopes Tourism Spacecraft Robotic spacecraft Satellite Space probe Cargo spacecraft Crewed spacecraft Apollo Lunar Module Space capsules Space Shuttle Space stations Spaceplanes Vostok Space launch Spaceport Launch pad Expenda

In [9]:
def wrap_text(text, width=100):
    lines = []
    while len(text) > width:
        split_index = text.rfind(' ', 0, width)
        if split_index == -1:
            split_index = width
        lines.append(text[:split_index])
        text = text[split_index:].strip()
    lines.append(text)
    return '\n'.join(lines)

In [10]:
import textwrap

top_score = search_results['score'][0]
top_text = search_results['text'][0].strip()
top_metadata = search_results['metadata'][0]['source']

print("Top Search Result:")
print(f"Score: {top_score:.2f}")
print(f"Source: {top_metadata}")
print("Text:")
print(wrap_text(top_text))
print("\n")

Top Search Result:
Score: 0.62
Source: llm.txt
Text:
Exploration of space, planets, and moons For broader coverage of this topic, see Exploration . Buzz
Aldrin taking a core sample of the Moon during the Apollo 11 mission Self-portrait of Curiosity
rover on Mars 's surface Part of a series on Spaceflight History History of spaceflight Space Race
Timeline of spaceflight Space probes Lunar missions Mars missions Applications Communications Earth
observation Exploration Espionage Military Navigation Colonization Habitation Exploration
Telescopes Tourism Spacecraft Robotic spacecraft Satellite Space probe Cargo spacecraft Crewed
spacecraft Apollo Lunar Module Space capsules Space Shuttle Space stations Spaceplanes Vostok Space
launch Spaceport Launch pad Expendable and reusable launch vehicles Escape velocity Non-rocket
spacelaunch Spaceflight types Sub-orbital Orbital Interplanetary Interstellar Intergalactic List of
space organizations Space agencies Space forces Companies Spaceflight po

In [11]:
augmented_input = user_prompt + " " + top_text
print(augmented_input)

Tell me about space exploration on the Moon and Mars Exploration of space, planets, and moons For broader coverage of this topic, see Exploration . Buzz Aldrin taking a core sample of the Moon during the Apollo 11 mission Self-portrait of Curiosity rover on Mars 's surface Part of a series on Spaceflight History History of spaceflight Space Race Timeline of spaceflight Space probes Lunar missions Mars missions Applications Communications Earth observation Exploration Espionage Military Navigation Colonization Habitation Exploration Telescopes Tourism Spacecraft Robotic spacecraft Satellite Space probe Cargo spacecraft Crewed spacecraft Apollo Lunar Module Space capsules Space Shuttle Space stations Spaceplanes Vostok Space launch Spaceport Launch pad Expendable and reusable launch vehicles Escape velocity Non-rocket spacelaunch Spaceflight types Sub-orbital Orbital Interplanetary Interstellar Intergalactic List of space organizations Space agencies Space forces Companies Spaceflight po

In [12]:
import time
import openai
from openai import OpenAI

client = OpenAI()
gpt_model = "gpt-4o"
start_time = time.time()

def call_gpt4_with_full_text(itext):
    # Join all lines to form a single string
    text_input = '\n'.join(itext)
    prompt = f"Please summarize or elaborate on the following content:\n{text_input}"

    try:
        response = client.chat.completions.create(
            model=gpt_model,
            messages=[
                {"role": "system", "content": "You are a space exploration expert."},
                {"role": "assistant", "content": "You can read the input and answer in detail."},
                {"role": "user", "content": prompt}
            ],
            temperature=0.1  # Fine-tune parameters as needed
        )
        return response.choices[0].message.content
    except Exception as e:
        return str(e)
    
gpt4_response = call_gpt4_with_full_text(augmented_input)

response_time = time.time() - start_time
print(f"Response Time: {response_time:.2f} seconds")
print(gpt_model, "Response:", gpt4_response)

Response Time: 13.36 seconds
gpt-4o Response: Space exploration is a vast and multifaceted field that involves the study and exploration of outer space, including planets, moons, and other celestial bodies. It encompasses a wide range of activities and technologies aimed at understanding the universe beyond Earth. Here are some key aspects of space exploration, particularly focusing on the Moon and Mars:

### Moon Exploration
- **Historical Missions**: The Moon has been a primary target for space exploration since the early days of spaceflight. The Apollo missions, particularly Apollo 11, marked significant milestones with humans landing on the Moon. Buzz Aldrin, one of the astronauts on Apollo 11, famously took core samples of the lunar surface.
- **Lunar Missions**: Various missions have been launched to study the Moon, including both crewed and uncrewed missions. These missions aim to understand the Moon's geology, potential resources, and its suitability for future human habitation

In [13]:
import re
import markdown
import textwrap

from IPython.display import display, Markdown, HTML

def print_formatted_response(response):
    markdown_patterns = [
        r"^#+\s",           # Headers
        r"^\*+",            # Bullet points
        r"\*\*",            # Bold
        r"_",               # Italics
        r"\[.+\]\(.+\)",    # Links
        r"-\s",             # Dashes used for lists
        r"\`\`\`"           # Code blocks
    ]
    
    if any(re.search(pattern, response, re.MULTILINE) for pattern in markdown_patterns):
        html_output = markdown.markdown(response)
        display(HTML(html_output))
    else:
        wrapper = textwrap.TextWrapper(width=100)
        wrapped_text = wrapper.fill(response)

        print("Text Response:")
        print("-" * 100)
        print(wrapped_text)
        print("-" * 100)

print_formatted_response(gpt4_response)

## Evaluating the output with Cosine Similarity

In [14]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

def calculate_cosine_similarity(text1, text2):
    vectorizer = TfidfVectorizer()
    tfidf_matrix = vectorizer.fit_transform([text1, text2])
    return cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])[0][0]

In [15]:
similarity_score = calculate_cosine_similarity(user_prompt, gpt4_response)
print(f"Cosine Similarity Score: {similarity_score:.2f}")


Cosine Similarity Score: 0.48


In [17]:
similarity_score = calculate_cosine_similarity(augmented_input, gpt4_response)
print(f"Cosine Similarity Score: {similarity_score:.2f}")


Cosine Similarity Score: 0.64


In [18]:
from sentence_transformers import SentenceTransformer

model = SentenceTransformer('all-MiniLM-L6-v2')

def calculate_cosine_similarity_with_embeddings(text1, text2):
    embedding1 = model.encode(text1)
    embedding2 = model.encode(text2)
    similarity = cosine_similarity([embedding1], [embedding2])
    return similarity[0][0]

similarity_score = calculate_cosine_similarity_with_embeddings(augmented_input, gpt4_response)
print(f"Cosine Similarity Score: {similarity_score:.2f}")

Cosine Similarity Score: 0.78
