# Read the data 

In [None]:
from llama_index.core import SimpleDirectoryReader
from dotenv import load_dotenv
import os

load_dotenv()

In [None]:

base_folder = "./docs"
reader = SimpleDirectoryReader(base_folder, recursive=True, exclude_hidden=True)

In [None]:
reader.input_files

In [None]:
docs = reader.load_data()

In [None]:
print(docs[0].get_metadata_str())

In [None]:
print(docs[0].__dict__)

# Node Parser

In [None]:
from llama_index.core.node_parser import SentenceSplitter

node_parser = SentenceSplitter(chunk_overlap=0, chunk_size=200)

In [None]:
nodes = node_parser.get_nodes_from_documents(docs, show_progress=True)

In [None]:
print(len(nodes))

In [None]:
nodes

# Indexing

## ``` VectorStoreIndex ```
By default it uses a in-memory ```SimpleVectorStore``` that's initialized as part of the default storage context

In [None]:
from llama_index.core import VectorStoreIndex
from llama_index.embeddings.google_genai import GoogleGenAIEmbedding

In [None]:
embed_model = GoogleGenAIEmbedding(
    model_name="text-embedding-004",
    embed_batch_size=100,
    api_key=os.getenv("GOOGLE_GENAI_API_KEY")
)

### Create an Index

In [None]:
index = VectorStoreIndex(nodes, embed_model=embed_model)

### Persist the index

In [None]:
%rmdir /S /Q storage

In [None]:
index_dir = "./storage"

In [None]:
index.storage_context.persist(index_dir)