In [1]:
import weaviate
import openai 
import os

api_key = os.environ.get('OPENAI_APIKEY')
openai.api_key = api_key
weaviate_client_url = "http://localhost:8080"

client = weaviate.Client(
    url=weaviate_client_url,  # Replace with your endpoint
    additional_headers={
        "X-OpenAI-Api-Key": api_key  # Or "X-Cohere-Api-Key" or "X-HuggingFace-Api-Key"
    }
)

# copy pasted stuff from the weaviate introduction below

# ===== Import data =====
# Configure the batch import
client.batch.configure(
    batch_size=100,
)

# Settings for displaying the import progress
counter = 0
interval = 100  # print progress every this many records

# Create a pandas dataframe iterator with lazy-loading,
# so we don't load all records in RAM at once.
import pandas as pd
csv_iterator = pd.read_csv(
    'vector_database_wikipedia_articles_embedded/vector_database_wikipedia_articles_embedded.csv',
    usecols=['id', 'url', 'title', 'text', 'content_vector'],
    chunksize=100,  # number of rows per chunk
    # nrows=350  # optionally limit the number of rows to import
)

# Iterate through the dataframe chunks and add each CSV record to the batch
import ast
for chunk in csv_iterator:
    for index, row in chunk.iterrows():

        properties = {
            "title": row.title,
            "content": row.text,
            "url": row.url
        }

        # Convert the vector from CSV string back to array of floats
        vector = ast.literal_eval(row.content_vector)

        # Add the object to the batch, and set its vector embedding
        client.batch.add_data_object(properties, "Article", vector=vector)

        # Calculate and display progress
        counter += 1
        if counter % interval == 0:
            print(f"Imported {counter} articles...")

client.batch.flush()
print(f"Finished importing {counter} articles.")

Imported 100 articles...
Imported 200 articles...
Imported 300 articles...
Imported 400 articles...
Imported 500 articles...
Imported 600 articles...
Imported 700 articles...
Imported 800 articles...
Imported 900 articles...
Imported 1000 articles...
Imported 1100 articles...
Imported 1200 articles...
Imported 1300 articles...
Imported 1400 articles...
Imported 1500 articles...
Imported 1600 articles...
Imported 1700 articles...
Imported 1800 articles...
Imported 1900 articles...
Imported 2000 articles...
Imported 2100 articles...
Imported 2200 articles...
Imported 2300 articles...
Imported 2400 articles...
Imported 2500 articles...
Imported 2600 articles...
Imported 2700 articles...
Imported 2800 articles...
Imported 2900 articles...
Imported 3000 articles...
Imported 3100 articles...
Imported 3200 articles...
Imported 3300 articles...
Imported 3400 articles...
Imported 3500 articles...
Imported 3600 articles...
Imported 3700 articles...
Imported 3800 articles...
Imported 3900 article