In [1]:
import ollama
from ollama import Client

client = Client(host='http://jupyter.weburban.com:10434')

# Use list to see what models you have pulled.
client.list()

response = client.chat(model='qwen:0.5b', messages=[
  {
    'role': 'user',
    'content': 'Why is the sky blue?',
  },
])

print(response['message']['content'])


The sky appears blue because it absorbs blue light. When sunlight reaches Earth, it hits its atmosphere, which then absorbs some of that light. This process continues until the light reaches our eyes.

In summary, the sky appears blue because it absorbs blue light from the sun.


In [2]:
import ollama
from ollama import Client
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
from langchain import PromptTemplate
from nltk.tokenize import sent_tokenize
import nltk

# Initialize the LLM client (ensure the host URL is correct)
client = Client(host='http://jupyter.weburban.com:10434')

# Use list to see available models
print(client.list())

# Function to load text files
def load_text_file(file_path):
    with open(file_path, 'r', encoding='utf-8', errors='replace') as file:
        return file.read()

# Load example hypothesis and new document
example_text_path = "Data/Gen2Doc.txt"
example_hypothesis_path = "Data/Gen2Hypo.txt"
example_experiment_path = "Data/Gen2Ex.txt"
new_text_path = "Data/blood_cells.txt"

example_text = load_text_file(example_text_path)
example_hypothesis = load_text_file(example_hypothesis_path)
example_experiment = load_text_file(example_experiment_path)
new_text = load_text_file(new_text_path)

# Split the new document into chunks using NLTK sentence tokenization
nltk.download('punkt')
chunks = sent_tokenize(new_text)

# Initialize a BERT model for sentence embeddings
model = SentenceTransformer('multi-qa-MiniLM-L6-cos-v1')

# Convert the document's sentences into embeddings
sentence_embeddings = model.encode(chunks, convert_to_tensor=False)

# Define the query
query = """
Extract relevant text for hypothesis and experiment extraction from the following document content.

**Definition of Hypothesis:**
A research hypothesis is a concise statement about the expected result of an experiment or project, often derived from prior research and observations.

**Definition of Experiment:**
An experiment to prove a hypothesis is a structured and controlled procedure designed to test whether a hypothesis is true or false. In an experiment, specific variables are manipulated, while others are kept constant, allowing researchers to observe and measure the effects of the changes.
"""

# Convert the query into an embedding
query_embedding = model.encode(query, convert_to_tensor=False)

# Compute similarity scores between the query and sentence embeddings
similarity_scores = cosine_similarity([query_embedding], sentence_embeddings)[0]

# Retrieve the top-k most relevant chunks (adjust k as needed)
top_k = 10  # Number of chunks to retrieve
top_k_indices = similarity_scores.argsort()[-top_k:][::-1]

# Retrieve the relevant chunks and add some context windows
relevant_chunks = []
for idx in top_k_indices:
    start = max(0, idx - 1)  # Add previous sentence as context
    end = min(len(chunks), idx + 2)  # Add next sentence as context
    relevant_chunks.extend(chunks[start:end])

# Combine relevant chunks into one text
main_relevant_text = " ".join(relevant_chunks)

# Prepare the refined prompt with clearer instructions and chain-of-thought guidance
prompt_template = PromptTemplate(
    template=f"""
    The following is an example of a text with its corresponding hypothesis and experiment. This is provided to illustrate the structure, but you are required to focus **only** on the new relevant text provided below.

    **Example (for reference only):**
    
    **Example Text:**
    {example_text}
    
    **Example Hypothesis:**
    {example_hypothesis}
    
    **Example Experiment:**
    {example_experiment}
    
    **Definitions:**
    - A **Hypothesis** is a concise statement about the expected result of an experiment or project.
    - An **Experiment** is a structured and controlled procedure designed to test whether a hypothesis is true or false.
    
    Now, analyze the following **Relevant Text** and extract the hypothesis and experiment **from this new text only**:

    **Relevant Text:**
    {main_relevant_text}

    **Step-by-step reasoning**:
    1. Read the relevant text carefully and ignore the example text provided above.
    2. Look for statements in the new text that propose a relationship between variables or an expected result—this is the hypothesis.
    3. Then, find any statements that describe how the hypothesis is tested or validated—this will be the experiment.
    4. Write the hypothesis in 1-2 sentences and the experiment in 2-3 sentences.


    Please ensure you are only using the main_relevant_text to extract the hypothesis and experiment.
    
    **Extracted Hypothesis:**
    - Provide the hypothesis from the relevant text in 1-2 concise sentences.
    
    **Extracted Experiment:**
    - Provide a detailed experimental procedure from the relevant text in 2-3 sentences.
    """,
    input_variables=["text"]
)


# Fill the prompt with the relevant text
filled_prompt = prompt_template.format(text=main_relevant_text)

# Use the updated Ollama client to invoke the LLM
response = client.chat(model='qwen2.5:0.5b', messages=[
    {
        'role': 'user',
        'content': filled_prompt,
    }
])

# Print the generated hypothesis and experiment
print("\nGenerated Response:")
print(response['message']['content'])


{'models': [{'name': 'flanT5:latest', 'model': 'flanT5:latest', 'modified_at': '2024-10-18T09:30:47.189735154Z', 'size': 496288729, 'digest': '7a929ef5596adbc4f8a090da61da9c23e02c8cacce0c547059ddc9765cda8eb3', 'details': {'parent_model': '', 'format': 'gguf', 'family': 't5', 'families': ['t5'], 'parameter_size': '247.58M', 'quantization_level': 'F16'}}, {'name': 'flanT5_Q8:latest', 'model': 'flanT5_Q8:latest', 'modified_at': '2024-10-18T01:48:56.267189427Z', 'size': 310495610, 'digest': 'f3d524247f7e302770f085c1e52f0b913ab336d5a1c75edcafe654b3b8625120', 'details': {'parent_model': '', 'format': 'gguf', 'family': 't5', 'families': ['t5'], 'parameter_size': '247.58M', 'quantization_level': 'Q8_0'}}, {'name': 'phi3.5:latest', 'model': 'phi3.5:latest', 'modified_at': '2024-10-18T01:41:23.843218919Z', 'size': 2176178843, 'digest': '61819fb370a3c1a9be6694869331e5f85f867a079e9271d66cb223acb81d04ba', 'details': {'parent_model': '', 'format': 'gguf', 'family': 'phi3', 'families': ['phi3'], 'par

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\amirs\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!



Generated Response:
**Relevant Text:**
Design an experiment involving 200 high school students, randomly assigned to two groups: one with AI-driven personalized learning tools and the other with traditional instructional methods. Both groups will undergo the same curriculum but with different learning supports.

The hypothesis is that using AI-driven personalized learning tools increases student engagement and improves academic performance in high school mathematics. The experiment involves comparing the two groups based on the statistical analysis of academic performance and engagement scores before and after the intervention period.

**Extracted Hypothesis:**
**Hypothesis:** Using AI-driven personalized learning tools increases student engagement and improves academic performance in high school mathematics.

**Extracted Experiment:**
- **Group A (AI-driven personalization):** 200 high school students using an AI platform to provide personalized learning recommendations based on thei

In [4]:
import ollama
from ollama import Client
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
from langchain import PromptTemplate
from nltk.tokenize import sent_tokenize
import nltk

# Initialize the LLM client (ensure the host URL is correct)
client = Client(host='http://jupyter.weburban.com:10434')

# Use list to see available models
print(client.list())

# Function to load text files
def load_text_file(file_path):
    with open(file_path, 'r', encoding='utf-8', errors='replace') as file:
        return file.read()

# Load example hypothesis and new document
example_text_path = "Data/Gen2Doc.txt"
example_hypothesis_path = "Data/Gen2Hypo.txt"
example_experiment_path = "Data/Gen2Ex.txt"
new_text_path = "Data/blood_cells.txt"

example_text = load_text_file(example_text_path)
example_hypothesis = load_text_file(example_hypothesis_path)
example_experiment = load_text_file(example_experiment_path)
new_text = load_text_file(new_text_path)

# Split the new document into chunks using NLTK sentence tokenization
nltk.download('punkt')
chunks = sent_tokenize(new_text)

# Initialize a BERT model for sentence embeddings
model = SentenceTransformer('multi-qa-MiniLM-L6-cos-v1')

# Convert the document's sentences into embeddings
sentence_embeddings = model.encode(chunks, convert_to_tensor=False)

# Define the query
query = """
Extract relevant text for hypothesis and experiment extraction from the following document content.

**Definition of Hypothesis:**
A research hypothesis is a concise statement about the expected result of an experiment or project, often derived from prior research and observations.

**Definition of Experiment:**
An experiment to prove a hypothesis is a structured and controlled procedure designed to test whether a hypothesis is true or false. In an experiment, specific variables are manipulated, while others are kept constant, allowing researchers to observe and measure the effects of the changes.
"""

# Convert the query into an embedding
query_embedding = model.encode(query, convert_to_tensor=False)

# Compute similarity scores between the query and sentence embeddings
similarity_scores = cosine_similarity([query_embedding], sentence_embeddings)[0]

# Retrieve the top-k most relevant chunks (adjust k as needed)
top_k = 10  # Number of chunks to retrieve
top_k_indices = similarity_scores.argsort()[-top_k:][::-1]

# Retrieve the relevant chunks and add some context windows
relevant_chunks = []
for idx in top_k_indices:
    start = max(0, idx - 1)  # Add previous sentence as context
    end = min(len(chunks), idx + 2)  # Add next sentence as context
    relevant_chunks.extend(chunks[start:end])

# Combine relevant chunks into one text
main_relevant_text = " ".join(relevant_chunks)

# Prepare the refined prompt with clearer instructions and chain-of-thought guidance
prompt_template = PromptTemplate(
    template=f"""
    Below is an example that illustrates how to extract a hypothesis and experiment from a document.
    Please focus only on the **Relevant Text** provided after the example, ignoring the example for your extraction task.
    
    **Example (for reference only):**
    ---
    **Example Text:**
    {example_text}

    **Example Hypothesis:**
    {example_hypothesis}

    **Example Experiment:**
    {example_experiment}
    ---
    
    **New Task Instructions:**
    Now, analyze the following **Relevant Text** and extract the hypothesis and experiment based only on this new text:

    **Relevant Text:**
    {main_relevant_text}

    **Step-by-step reasoning**:
    1. Ignore the example text.
    2. Look for a hypothesis in the relevant text, which suggests a relationship between variables or an expected outcome.
    3. Find any experiment that tests this hypothesis in the relevant text.
    4. Provide the hypothesis in 1-2 sentences and the experiment in 2-3 sentences.

    **Extracted Hypothesis:**
    - Provide the hypothesis from the relevant text in 1-2 concise sentences.

    **Extracted Experiment:**
    - Provide the experiment in 2-3 concise sentences, describing how the hypothesis is tested.
    """,
    input_variables=["text"]
)

# Fill the prompt with the relevant text
filled_prompt = prompt_template.format(text=main_relevant_text)

# Use the updated Ollama client to invoke the LLM without the temperature argument
response = client.chat(model='qwen2.5:0.5b', messages=[
    {
        'role': 'user',
        'content': filled_prompt,
    }
])

# Print the generated hypothesis and experiment
print("\nGenerated Response:")
print(response['message']['content'])


{'models': [{'name': 'flanT5:latest', 'model': 'flanT5:latest', 'modified_at': '2024-10-18T09:30:47.189735154Z', 'size': 496288729, 'digest': '7a929ef5596adbc4f8a090da61da9c23e02c8cacce0c547059ddc9765cda8eb3', 'details': {'parent_model': '', 'format': 'gguf', 'family': 't5', 'families': ['t5'], 'parameter_size': '247.58M', 'quantization_level': 'F16'}}, {'name': 'flanT5_Q8:latest', 'model': 'flanT5_Q8:latest', 'modified_at': '2024-10-18T01:48:56.267189427Z', 'size': 310495610, 'digest': 'f3d524247f7e302770f085c1e52f0b913ab336d5a1c75edcafe654b3b8625120', 'details': {'parent_model': '', 'format': 'gguf', 'family': 't5', 'families': ['t5'], 'parameter_size': '247.58M', 'quantization_level': 'Q8_0'}}, {'name': 'phi3.5:latest', 'model': 'phi3.5:latest', 'modified_at': '2024-10-18T01:41:23.843218919Z', 'size': 2176178843, 'digest': '61819fb370a3c1a9be6694869331e5f85f867a079e9271d66cb223acb81d04ba', 'details': {'parent_model': '', 'format': 'gguf', 'family': 'phi3', 'families': ['phi3'], 'par

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\amirs\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!



Generated Response:
### Extracted Hypothesis:
A randomized controlled trial comparing two methods of personalized learning to assess their impact on student engagement and academic performance in high school mathematics.

### Extracted Experiment:
The experiment involves randomly assigning 600 high school students (200 in each group) to either an AI-driven personalized learning group or a traditional instructional method group. Both groups follow the same math curriculum but receive different learning supports, including access to AI tools and more structured instruction. Pre- and post-intervention academic performance in mathematics is measured using standardized tests, while engagement is assessed through surveys and classroom observation.

Over the course of a semester, the AI group demonstrates a marked improvement in test scores compared to the control group. Data are statistically significant if p < 0.05. Participation rates among students in the AI group are higher than those i