## Final Project: Query-Driven Retrieval-Augmented Graph Exploration Tool
By Karl Simon

### Step 1: Load the dataset into PyG (PyTorch Geometric)

In [1]:
# Necessary imports for entire notebook
import json
import torch
import re
from IPython.display import display, HTML
from torch_geometric.data import HeteroData
from collections import defaultdict
from torch_geometric.utils import k_hop_subgraph
from sklearn.feature_extraction.text import TfidfVectorizer
import numpy as np
import requests
import os
from openai import OpenAI

# Load JSON data from file
file_path = "/home/karlsimon/CSCI6365/final/graph.json"
graph_data = []

# Load data line by line to prevent memory overload
with open(file_path, "r") as f:
    for line in f:
        try:
            graph_data.append(json.loads(line))
        except json.JSONDecodeError as e:
            print(f"Error decoding JSON line: {e}")
            continue

# Initialize HeteroData object
data = HeteroData()

# Mapping for node indices per node type
node_mappings = defaultdict(dict)

# Temporary storage for properties
node_properties = defaultdict(lambda: defaultdict(list))
edge_indices = defaultdict(lambda: defaultdict(list))

# # Define limits for node subsets based on type
# node_limits = {
#     'Publication': 1000,
#     'Dataset': 500,
#     'ScienceKeyword': 300,
#     'Instrument': 200,
#     'Platform': 150,
#     'Project': 100,
#     'DataCenter': 50
# }

# Track the number of nodes added per type
node_counts = defaultdict(int)

# Process nodes with limits
for item in graph_data:
    if item['type'] == 'node':
        node_type = item['labels'][0]
        # if node_counts[node_type] >= node_limits.get(node_type, 50):
        #     continue  # Skip nodes once the limit is reached

        node_id = item['id']
        properties = item['properties']

        # Store the node index mapping
        node_index = len(node_mappings[node_type])
        node_mappings[node_type][node_id] = node_index
        node_counts[node_type] += 1

        # Store properties temporarily by type
        for key, value in properties.items():
            if isinstance(value, list) and all(isinstance(v, (int, float)) for v in value):
                node_properties[node_type][key].append(torch.tensor(value, dtype=torch.float))
            elif isinstance(value, (int, float)):
                node_properties[node_type][key].append(torch.tensor([value], dtype=torch.float))
            else:
                node_properties[node_type][key].append(value)  # non-numeric properties as lists

# # Define limits for relationships based on type
# relationship_limits = {
#     'CITES': 2000,
#     'HAS_APPLIED_RESEARCH_AREA': 1000,
#     'HAS_SCIENCEKEYWORD': 500,
#     'HAS_PLATFORM': 500,
#     'HAS_DATASET': 500,
#     'OF_PROJECT': 300,
#     'HAS_INSTRUMENT': 200
# }

# Track the number of relationships added per type
relationship_counts = defaultdict(int)

# Filter relationships to only include sampled nodes
for item in graph_data:
    if item['type'] == 'relationship':
        start_type = item['start']['labels'][0]
        end_type = item['end']['labels'][0]
        start_id = item['start']['id']
        end_id = item['end']['id']
        edge_type = item['label']

        # # Skip if relationship limit reached
        # if relationship_counts[edge_type] >= relationship_limits.get(edge_type, 100):
        #     continue

        # Check if start and end nodes exist in the sampled nodes
        if start_id in node_mappings[start_type] and end_id in node_mappings[end_type]:
            start_idx = node_mappings[start_type][start_id]
            end_idx = node_mappings[end_type][end_id]

            # Append to edge list
            edge_indices[(start_type, edge_type, end_type)]['start'].append(start_idx)
            edge_indices[(start_type, edge_type, end_type)]['end'].append(end_idx)
            relationship_counts[edge_type] += 1

# Finalize node properties by batch processing
for node_type, properties in node_properties.items():
    data[node_type].num_nodes = len(node_mappings[node_type])
    for key, values in properties.items():
        if isinstance(values[0], torch.Tensor):
            data[node_type][key] = torch.stack(values)
        else:
            data[node_type][key] = values  # Keep non-tensor properties as lists

# Finalize edge indices in bulk
for (start_type, edge_type, end_type), indices in edge_indices.items():
    edge_index = torch.tensor([indices['start'], indices['end']], dtype=torch.long)
    data[start_type, edge_type, end_type].edge_index = edge_index

# Display statistics for verification
print("Nodes and Properties:")
for node_type in data.node_types:
    print(f"\nNode Type: {node_type}")
    print(f"Number of Nodes: {data[node_type].num_nodes}")
    for key, value in data[node_type].items():
        if key != 'num_nodes':
            if isinstance(value, torch.Tensor):
                print(f"  - {key}: {value.shape}")
            else:
                print(f"  - {key}: {len(value)} items (non-numeric)")

print("\nEdges and Types:")
for edge_type in data.edge_types:
    edge_index = data[edge_type].edge_index
    print(f"Edge Type: {edge_type} - Number of Edges: {edge_index.size(1)} - Shape: {edge_index.shape}")


Nodes and Properties:

Node Type: Dataset
Number of Nodes: 6390
  - temporalExtentStart: 6375 items (non-numeric)
  - seCorner: 5330 items (non-numeric)
  - cmrId: 6390 items (non-numeric)
  - globalId: 6390 items (non-numeric)
  - fastrp_embedding_with_labels: torch.Size([6390, 512])
  - abstract: 6390 items (non-numeric)
  - daac: 6131 items (non-numeric)
  - nwCorner: 5330 items (non-numeric)
  - temporalFrequency: 6390 items (non-numeric)
  - pagerank_global: torch.Size([6390, 1])
  - temporalExtentEnd: 3765 items (non-numeric)
  - shortName: 6390 items (non-numeric)
  - landingPageUrl: 3037 items (non-numeric)
  - doi: 6390 items (non-numeric)
  - longName: 6390 items (non-numeric)

Node Type: DataCenter
Number of Nodes: 184
  - pagerank_global: torch.Size([184, 1])
  - globalId: 184 items (non-numeric)
  - fastrp_embedding_with_labels: torch.Size([184, 512])
  - shortName: 184 items (non-numeric)
  - url: 184 items (non-numeric)
  - longName: 184 items (non-numeric)

Node Type: P

In [2]:
# Functions definitions for keywords, search and display used in next cell
def extract_keywords(query):
    keywords = re.findall(r'\b\w+\b', query)
    return [kw.lower() for kw in keywords]

# TODO: improve search metrics (i.e. amond preliminary results, find the most relevant ones)
def search_graph(data, keywords, node_types=['Dataset', 'Project', 'ScienceKeyword', 'Instrument' ,'Platform', 'Publication']):
    results = []
    
    for node_type in node_types:
        for key in data[node_type]:
            if key == 'num_nodes':
                continue
            
            values = data[node_type][key]
            if isinstance(values, list):
                for idx, value in enumerate(values):
                    if any(kw in str(value).lower() for kw in keywords):
                        results.append((node_type, idx, key, value))
    
    return results

def display_results(results):
    if not results:
        print("No relevant nodes found.")
        return

    with open("query_results.txt", "w") as file:
        print(f"\nFound {len(results)} relevant nodes:\n")
        for node_type, idx, key, value in results:
            file.write(f"Node Type: {node_type} | Index: {idx} | Property: {key} | Value: {value}\n")



In [3]:
# Given query, extract keywords, search the graph for relevant nodes, and display the results
# NOTE: currently only searches for exact keyword matches in node properties

def get_subgraph(data, node_type, node_indices, num_hops=2):
    # Find all edge types where the node_type is either the source or target
    relevant_edges = [
        (src, rel, dst) for (src, rel, dst) in data.edge_types if src == node_type or dst == node_type
    ]
    
    print("relevant_edges = ", relevant_edges)

    if not relevant_edges:
        print(f"No edges found for node type '{node_type}'")
        return None, None, None

    # Combine edge indices from all relevant edge types
    combined_edge_index = []
    combined_edge_types = []

    for edge_type in relevant_edges:
        edge_index = data[edge_type].edge_index
        combined_edge_index.append(edge_index)
        combined_edge_types.append(edge_type)

    # Stack all edge indices into a single tensor
    combined_edge_index = torch.cat(combined_edge_index, dim=1)

    # Extract the subgraph using the combined edge index
    subset, edge_index, _, _ = k_hop_subgraph(node_idx=node_indices, num_hops=num_hops, edge_index=combined_edge_index)
    return subset, edge_index, combined_edge_types


# Explore subgraphs based on the search results.
def explore_subgraphs(data, results, num_hops=2):
    if not results:
        print("No nodes to explore for subgraphs.")
        return

    # Group the results by node type
    nodes_by_type = defaultdict(list)
    for node_type, idx, _, _ in results:
        nodes_by_type[node_type].append(idx)

    # Extract and display subgraphs for each node type
    for node_type, indices in nodes_by_type.items():
        print(f"\nExploring subgraph for node type: {node_type}")
        
        # Get the valid range for node indices
        num_nodes = data[node_type].num_nodes
        valid_indices = [idx for idx in indices if idx < num_nodes]

        if not valid_indices:
            print(f"No valid indices for node type '{node_type}'.")
            continue

        node_indices = torch.tensor(valid_indices[:10])  # Limit to 10 nodes to keep it manageable
        print(f"Exploring subgraph for node indices: {node_indices}") # may not be sequential due to search results ordering 
        subset, edge_index, edge_type = get_subgraph(data, node_type, node_indices, num_hops=num_hops)

        if subset is not None and edge_index is not None:
            print(f"Extracted subgraph with {len(subset)} nodes and {edge_index.size(1)} edges.")
            print(f"Edge Type: {edge_type}")
        else:
            print(f"Could not extract subgraph for node type: {node_type}")

# Example: Run the combined query and subgraph exploration module
# query = input("Enter your query (e.g., 'Find datasets related to climate change projects'): ")
query = "climate change" #TODO: remove hardcoded query
keywords = extract_keywords(query)
print(f"\nExtracted Keywords: {keywords}")

results = search_graph(data, keywords)
display_results(results)

# Explore subgraphs based on the results
explore_subgraphs(data, results)




Extracted Keywords: ['climate', 'change']

Found 72539 relevant nodes:


Exploring subgraph for node type: Dataset
Exploring subgraph for node indices: tensor([ 0,  1,  2, 11, 12, 13, 14, 15, 16, 17])
relevant_edges =  [('DataCenter', 'HAS_DATASET', 'Dataset'), ('Dataset', 'OF_PROJECT', 'Project'), ('Dataset', 'HAS_PLATFORM', 'Platform'), ('Dataset', 'HAS_SCIENCEKEYWORD', 'ScienceKeyword')]
Extracted subgraph with 6130 nodes and 44118 edges.
Edge Type: [('DataCenter', 'HAS_DATASET', 'Dataset'), ('Dataset', 'OF_PROJECT', 'Project'), ('Dataset', 'HAS_PLATFORM', 'Platform'), ('Dataset', 'HAS_SCIENCEKEYWORD', 'ScienceKeyword')]

Exploring subgraph for node type: Project
Exploring subgraph for node indices: tensor([137,   0,  26,  59,  89, 105, 108, 109, 117, 121])
relevant_edges =  [('Dataset', 'OF_PROJECT', 'Project')]
Extracted subgraph with 197 nodes and 206 edges.
Edge Type: [('Dataset', 'OF_PROJECT', 'Project')]

Exploring subgraph for node type: ScienceKeyword
Exploring subgraph for

In [4]:
# # USING MEDIAWIKI API FOR SNIPPIT

# def fetch_wikipedia_context(keywords):
#     search_term = " ".join(keywords)
#     url = "https://en.wikipedia.org/w/api.php"
    
#     # Parameters for the MediaWiki API search request
#     params = {
#         "action": "query",
#         "list": "search",
#         "srsearch": search_term,
#         "srlimit": 5,  # Limit to top 5 search results
#         "srprop": "snippet|timestamp",
#         "format": "json"
#     }
    
#     headers = {
#         "User-Agent": "GraphExplorationTool/1.0 (karlsimon@example.com)"
#     }
    
#     try:
#         # Make the request to the MediaWiki API
#         response = requests.get(url, params=params, headers=headers)
#         response.raise_for_status()
#         data = response.json()
        
#         search_results = data.get("query", {}).get("search", [])
        
#         if not search_results:
#             return None
        
#         # Collect the top search results
#         context_list = []
#         for result in search_results:
#             title = result.get("title", "No Title")
#             snippet = result.get("snippet", "No Snippet Available.")
#             timestamp = result.get("timestamp", "No Timestamp Available.")
#             page_url = f"https://en.wikipedia.org/wiki/{title.replace(' ', '_')}"
            
#             context_entry = {
#                 "title": title,
#                 "snippet": snippet,
#                 "timestamp": timestamp,
#                 "link": page_url
#             }
#             context_list.append(context_entry)
        
#         return context_list

#     except requests.RequestException as e:
#         print(f"Error fetching Wikipedia context: {e}")
#         return None


# # Display the Wikipedia context for the extracted keywords
# def display_wikipedia_context(context_list):
#     if not context_list:
#         print("\nNo external context available from Wikipedia.")
#         return

#     print("\nWikipedia Context using MEDIAWIKI API:")
#     for i, context in enumerate(context_list, start=1):
#         print(f"\nResult {i}:")
#         print(f"Title: {context['title']}")
#         print(f"Snippet: {context['snippet']}")
#         print(f"Timestamp: {context['timestamp']}")
#         print(f"Link: {context['link']}")

# wikipedia_context = fetch_wikipedia_context(keywords)
# display_wikipedia_context(wikipedia_context)

In [5]:
# Get external context from Wikipedia using the REST API
def fetch_wikipedia_context(keywords):
    search_term = " ".join(keywords)
    
    # Step 1: Use the Action API to get the top 5 search results
    search_url = "https://en.wikipedia.org/w/api.php"
    search_params = {
        "action": "query",
        "list": "search",
        "srsearch": search_term,
        "srlimit": 5,
        "format": "json"
    }
    
    headers = {
        "User-Agent": "GraphExplorationTool/1.0 (ksimon24@gwu.edu)"
    }
    
    try:
        search_response = requests.get(search_url, params=search_params, headers=headers)
        search_response.raise_for_status()
        search_data = search_response.json()
        
        search_results = search_data.get("query", {}).get("search", [])
        
        if not search_results:
            return None

        # Step 2: Fetch summaries using the REST API for each search result
        context_list = []
        for result in search_results:
            page_title = result.get("title")
            rest_url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{page_title.replace(' ', '_')}"
            
            rest_response = requests.get(rest_url, headers=headers)
            rest_response.raise_for_status()
            rest_data = rest_response.json()
            
            # Extract relevant information
            title = rest_data.get("title", "No Title")
            description = rest_data.get("description", "No Description Available.")
            summary = rest_data.get("extract", "No Summary Available.")
            link = rest_data.get("content_urls", {}).get("desktop", {}).get("page", "No Link Available.")
            thumbnail = rest_data.get("thumbnail", {}).get("source", None)
            
            context_entry = {
                "title": title,
                "description": description,
                "summary": summary,
                "link": link,
                "thumbnail": thumbnail
            }
            context_list.append(context_entry)
        
        return context_list

    except requests.RequestException as e:
        print(f"Error fetching Wikipedia context: {e}")
        return None
    
def display_wikipedia_context(context_list):
    if not context_list:
        print("\nNo external context available from Wikipedia.")
        return

    print("\nWikipedia Context:")
    for i, context in enumerate(context_list, start=1):
        print(f"\nResult {i}:")
        print(f"Title: {context['title']}")
        print(f"Description: {context['description']}")
        print(f"Summary: {context['summary']}")
        print(f"Link: {context['link']}")
        if context['thumbnail']:
            print(f"Thumbnail: {context['thumbnail']}")

wikipedia_context = fetch_wikipedia_context(keywords)
display_wikipedia_context(wikipedia_context)


Wikipedia Context:

Result 1:
Title: Climate change
Description: Human-caused changes to climate on Earth
Summary: Present-day climate change includes both global warming—the ongoing increase in global average temperature—and its wider effects on Earth's climate. Climate change in a broader sense also includes previous long-term changes to Earth's climate. The current rise in global temperatures is driven by human activities, especially fossil fuel burning since the Industrial Revolution. Fossil fuel use, deforestation, and some agricultural and industrial practices release greenhouse gases. These gases absorb some of the heat that the Earth radiates after it warms from sunlight, warming the lower atmosphere. Carbon dioxide, the primary greenhouse gas driving global warming, has grown by about 50% and is at levels not seen for millions of years.
Link: https://en.wikipedia.org/wiki/Climate_change
Thumbnail: https://upload.wikimedia.org/wikipedia/commons/thumb/e/e0/Change_in_Average_Tem

In [20]:
# Next Steps:
# 1. improve graph search and rank results.
# 2. improve subgraph exploration.
# 3. improve external context retrieval (NASA API).

# Updated search_graph function with TF-IDF scoring
# TODO: make max_per_type specific to each node type
def search_graph(data, keywords, node_types=['Dataset', 'Project', 'ScienceKeyword', 'Instrument', 'Platform', 'Publication'], max_results=50, max_per_type=10):
    results = []
    texts = []  # Collect text data for TF-IDF processing
    metadata = []  # To store corresponding metadata (node type, index, key, value)

    # Step 1: Collect all matching nodes and their text data
    for node_type in node_types:
        for key in data[node_type]:
            if key == 'num_nodes':
                continue
            
            values = data[node_type][key]
            if isinstance(values, list):
                for idx, value in enumerate(values):
                    value_str = str(value).lower()
                    if any(kw in value_str for kw in keywords):
                        texts.append(value_str)
                        metadata.append((node_type, idx, key, value))

    if not texts:
        return []

    ############ Visualization of TF-IDF scoring:############
    # Example scores output. Shape = (n_samples, n_keywords), and we sum along rows (i.e. sum across the columns)
    # [[0.5 0.8]  # TF-IDF scores for "climate change impacts ecosystems..."
    # [0.8 0.7]  # TF-IDF scores for "mitigation strategies for climate change..."
    # [0.9 0.2]] # TF-IDF scores for "climate change denial rejects..."
    # 
    # After summation:
    # [1.3, 1.5, 1.1]  # Sum of TF-IDF scores for each text in `texts`
    # and indices: [0, 1, 2]  # Indices of the summed scores
    # 
    # After sorting in descending order:
    # [1.5, 1.3, 1.1]  # Sorted TF-IDF scores
    # and indices of the sorted scores in descending order: [2, 0, 1]
    ########################################################

    # Step 2: Compute TF-IDF scores for the collected texts
    # NOTE: texts stores the properties of the nodes which contain the keywords
    vectorizer = TfidfVectorizer(vocabulary=keywords)
    tfidf_matrix = vectorizer.fit_transform(texts)
    scores = tfidf_matrix.sum(axis=1).A1  # Sum the TF-IDF scores for each text

    # Step 3: Sort the results by TF-IDF score in descending order
    sorted_indices = np.argsort(scores)[::-1]
    sorted_results = [metadata[i] for i in sorted_indices]
    with open("sorted_results.txt", "w") as file:
        for result in sorted_results:
            file.write(f"{result}\n")

    # Step 4: Limit the number of results overall and per node type
    final_results = []
    counts_per_type = {node_type: 0 for node_type in node_types}

    for result in sorted_results:
        node_type = result[0]
        if len(final_results) >= max_results:
            break
        if counts_per_type[node_type] < max_per_type:
            final_results.append(result)
            counts_per_type[node_type] += 1

    return final_results

# Updated display_results function to trim long values
def display_results(results, max_value_length=200):
    if not results:
        print("No relevant nodes found.")
        return

    with open("query_results.txt", "w") as file:
        print(f"\nFound {len(results)} relevant nodes:\n")
        for node_type, idx, key, value in results:
            value_str = str(value)
            if len(value_str) > max_value_length:
                value_str = value_str[:max_value_length] + "..."
            output_line = f"Node Type: {node_type} | Index: {idx} | Property: {key} | Value: {value_str}\n"
            file.write(output_line)

# Get user query
# query = input("Enter your query (e.g., 'Find datasets related to climate change projects'): ")
query = "climate change" #TODO: remove hardcoded query
keywords = extract_keywords(query)
print(f"\nExtracted Keywords: {keywords}")

# Search the graph with TF-IDF ranking
graph_results = search_graph(data, keywords)
display_results(graph_results)

# Fetch Wikipedia context
wikipedia_context = fetch_wikipedia_context(keywords)
display_wikipedia_context(wikipedia_context)

# Explore subgraphs based on the results
explore_subgraphs(data, graph_results)



Extracted Keywords: ['climate', 'change']

Found 50 relevant nodes:


Wikipedia Context:

Result 1:
Title: Climate change
Description: Human-caused changes to climate on Earth
Summary: Present-day climate change includes both global warming—the ongoing increase in global average temperature—and its wider effects on Earth's climate. Climate change in a broader sense also includes previous long-term changes to Earth's climate. The current rise in global temperatures is driven by human activities, especially fossil fuel burning since the Industrial Revolution. Fossil fuel use, deforestation, and some agricultural and industrial practices release greenhouse gases. These gases absorb some of the heat that the Earth radiates after it warms from sunlight, warming the lower atmosphere. Carbon dioxide, the primary greenhouse gas driving global warming, has grown by about 50% and is at levels not seen for millions of years.
Link: https://en.wikipedia.org/wiki/Climate_change
Thumbnail: https://u

In [12]:
############ OpenAI API ############
# import os
# from openai import OpenAI
# # Define the path to the text file containing the API key
# file_path = "/home/karlsimon/CSCI6365/final/api_key.txt"
# with open(file_path, "r") as file:
#     api_key = file.read().strip()
# print(api_key)

# client = OpenAI(
#     api_key=api_key,
# )

# chat_completion = client.chat.completions.create(
#     messages=[
#         {
#             "role": "user",
#             "content": "Say this is a test",
#         }
#     ],
#     model="gpt-4o-mini",  # Use 'gpt-4-turbo' if 'gpt-4o' isn't available
# )

# # Output 12.09: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}

In [21]:
############ Gemini API ############
import google.generativeai as genai

# Define the path to the text file containing the API key
file_path = "/home/karlsimon/CSCI6365/final/gemini_api_key.txt"
with open(file_path, "r") as file:
    api_key = file.read().strip()
# print(api_key)
genai.configure(api_key=api_key)

# Create a model instance (using Gemini 1.5 Flash in this case)
model = genai.GenerativeModel('gemini-1.5-flash-latest')


In [25]:
# Function to summarize combined results using the LLM
def summarize_results_with_llm(graph_results, wikipedia_context):
    prompt = "Summarize the following search results and Wikipedia context:\n\n"

    # Add graph results to the prompt
    prompt += "Graph Search Results:\n"
    for node_type, idx, key, value in graph_results[:10]:  # Limit to top 5 results for brevity
        prompt += f"- Node Type: {node_type}, Property: {key}, Value: {str(value)[:2000]}...\n"

    # Add Wikipedia context to the prompt
    prompt += "\nWikipedia Context:\n"
    for i, context in enumerate(wikipedia_context, start=1):
        prompt += f"{i}. Title: {context['title']}\n"
        prompt += f"   Summary: {context['summary'][:300]}...\n"
    
    with open("prompt_file.txt", "w") as file:
        file.write(f"{prompt}\n")

    # Call the Gemini model to generate the summary
    response = model.generate_content(prompt)
    return response.text

# Function to generate explanations for subgraphs using the LLM
def explain_subgraph_with_llm(node_type, edge_types, num_nodes, num_edges):
    prompt = (
        f"Explain the significance of a subgraph extracted for node type '{node_type}'. "
        f"The subgraph contains {num_nodes} nodes and {num_edges} edges. "
        f"The edge types in this subgraph are: {', '.join([str(edge) for edge in edge_types])}."
    )

    # Call the Gemini model to generate the explanation
    response = model.generate_content(prompt)
    return response.text

# Generate an LLM summary of the combined results
summary = summarize_results_with_llm(graph_results, wikipedia_context)
print("\nLLM-Generated Summary:")
print(summary)

# Explore subgraphs based on the results
explore_subgraphs(data, graph_results)

# Generate LLM explanations for each explored subgraph
for node_type, idx, key, value in graph_results[:3]:  # Limit to 3 nodes for brevity
    # Get subgraph for the current node type and indices
    subset, edge_index, edge_types = get_subgraph(data, node_type, torch.tensor([idx]))
    explanation = explain_subgraph_with_llm(node_type, edge_types, len(subset), edge_index.size(1))
    print(f"\nLLM-Generated Explanation for Node Type '{node_type}':")
    print(explanation)



LLM-Generated Summary:
The provided text focuses on various research studies investigating the effects of climate change and its interaction with other factors.  Several studies analyze the impact of climate change on evapotranspiration (ET), finding a decrease in ET in some regions due to deforestation, despite increased temperatures potentially raising ET.  Other studies examine the link between climate change and forest fires (increased fire frequency in Yunnan, China, correlated with decreased precipitation and water storage), and the impact of changing prey quality on Chinook salmon due to climate change-induced shifts in fatty acid composition.  Additional research explores the interplay of climate change with stratospheric ozone, the equilibrium climate sensitivity of climate models, and the drivers of human migration in climate-vulnerable regions (economic factors outweighing environmental concerns).  Finally, studies address the impact of climate change on groundwater levels 