In [1]:
from dotenv import load_dotenv
import os

# Common data processing
import json
import textwrap

# Langchain
from langchain_community.graphs import Neo4jGraph
from langchain_community.vectorstores import Neo4jVector
from langchain_openai import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQAWithSourcesChain
from langchain_openai import ChatOpenAI


# Warning control
import warnings
warnings.filterwarnings("ignore")

In [11]:
# Load from environment
load_dotenv('../.env', override=True)
NEO4J_URI = os.getenv('NEO4J_URI')
NEO4J_USERNAME = os.getenv('NEO4J_USERNAME')
NEO4J_PASSWORD = os.getenv('NEO4J_PASSWORD')
NEO4J_DATABASE = os.getenv('NEO4J_DATABASE') or 'neo4j'
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
# Note the code below is unique to this course environment, and not a 
# standard part of Neo4j's integration with OpenAI. Remove if running 
# in your own environment.
OPENAI_ENDPOINT = os.getenv('OPENAI_BASE_URL') + '/embeddings'

# Global constants
VECTOR_INDEX_NAME = 'APPLE_WIKIPEDIA'
VECTOR_NODE_LABEL = 'Chunk'
VECTOR_SOURCE_PROPERTY = 'text'
VECTOR_EMBEDDING_PROPERTY = 'textEmbedding'

In [13]:
from dotenv import load_dotenv
import os
import warnings
import wikipediaapi
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings

# Warning control
warnings.filterwarnings("ignore")

# Load environment variables (optional if you need to use OpenAI keys, etc.)
load_dotenv()

# Initialize Wikipedia API
user_agent = "MyApp/1.0 (contact@example.com)"  # Replace with your app and contact info
wiki_wiki = wikipediaapi.Wikipedia(
    language='en',
    user_agent=user_agent
)

# Function to fetch content from a Wikipedia page
def fetch_wikipedia_page_content(page_title):
    page = wiki_wiki.page(page_title)
    if page.exists():
        return page.text
    else:
        raise ValueError(f"The page '{page_title}' does not exist on Wikipedia.")

# Fetch the content
page_title = "Artificial intelligence"  # Replace with your desired Wikipedia page title
content = fetch_wikipedia_page_content(page_title)

# Use RecursiveCharacterTextSplitter to split the content into manageable chunks
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 2000,
    chunk_overlap  = 200,
    length_function = len,
    is_separator_regex = False,
)
chunks = text_splitter.split_text(content)

 # Optional if using LlamaIndex
# Or directly use chunks as strings
print(f"Total chunks created: {len(chunks)}")
print(f"Sample chunk:\n{textwrap.shorten(chunks[0], width=500)}")

# The `chunks` can now be used for further processing, e.g.:
# - Storing in a vector store like Neo4jVector
# - Building a RetrievalQAWithSourcesChain


Total chunks created: 63
Sample chunk:
Artificial intelligence (AI), in its broadest sense, is intelligence exhibited by machines, particularly computer systems. It is a field of research in computer science that develops and studies methods and software that enable machines to perceive their environment and use learning and intelligence to take actions that maximize their chances of achieving defined goals. Such machines may be called AIs. High-profile applications of AI include advanced web search engines (e.g., Google [...]


'Artificial intelligence (AI), in its broadest sense, is intelligence exhibited by machines, particularly computer systems. It is a field of research in computer science that develops and studies methods and software that enable machines to perceive their environment and use learning and intelligence to take actions that maximize their chances of achieving defined goals. Such machines may be called AIs.\nHigh-profile applications of AI include advanced web search engines (e.g., Google Search); recommendation systems (used by YouTube, Amazon, and Netflix); virtual assistants (e.g., Google Assistant, Siri, and Alexa); autonomous vehicles (e.g., Waymo); generative and creative tools (e.g., ChatGPT and AI art); and superhuman play and analysis in strategy games (e.g., chess and Go). However, many AI applications are not perceived as AI: "A lot of cutting edge AI has filtered into general applications, often without being called AI because once something becomes useful enough and common eno