<a href="https://colab.research.google.com/github/IkuStudies/abraxalexicon/blob/main/VectorMemoryManagement.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#prepare the files for vector database

import pandas as pd

# Read the input files as plain text files
with open('input_file1.csv', 'r') as f1, open('input_file2.csv', 'r') as f2:
    lines_words = f1.readlines()
    lines_phonemes = f2.readlines()

# Remove any leading or trailing whitespace from each line
lines_words = [line.strip() for line in lines_words]
lines_phonemes = [line.strip() for line in lines_phonemes]

# Combine the words and phonemes into a single dataframe
df_combined = pd.DataFrame({'word': lines_words, 'phoneme': lines_phonemes})

# Prepare the data for Milvus
data = df_combined.apply(lambda row: [row['word'], row['phoneme']], axis=1).tolist()

# Save the formatted data to a file
with open('formatted_data.csv', 'w') as f:
    for row in data:
        f.write(','.join(row) + '\n')


In [None]:
#install, configure, and initiate milvus

!pip install pymilvus==2.0.0
!pip install ujson==4.0.2
!pip install python-rapidjson==1.4

import os

# Set the username and password for Milvus
username = "josh"
password = "password"

# Install and start Milvus
!wget https://milvus.io/download/milvus-2.0.0-ubuntu20.04.tar.gz
!tar -xvf milvus-2.0.0-ubuntu20.04.tar.gz
%cd milvus
!bash install_milvus.sh
!bash start_milvus.sh

# Configure Milvus
!milvus config set server_addr=localhost:19530
!milvus config set username={username}
!milvus config set password={password}
!milvus config show


In [None]:
import csv
from milvus import Milvus, DataType, FieldSchema, CollectionSchema

# Connect to Milvus
milvus = Milvus()
milvus.connect(host='localhost', port='19530')

# Define collection and field names
collection_name = 'my_collection'
word_field = 'word'
phoneme_field = 'phoneme'

# Create collection schema
collection_schema = CollectionSchema(collection_name, fields=[
    FieldSchema(name=word_field, dtype=DataType.STRING),
    FieldSchema(name=phoneme_field, dtype=DataType.STRING),
])

# Create the collection
milvus.create_collection(collection_schema)

# Load data from formatted_data.csv
with open('formatted_data.csv', 'r') as f:
    reader = csv.reader(f)
    entities = []
    for row in reader:
        word = row[0]
        phoneme = row[1]
        entity = {
            word_field: word,
            phoneme_field: phoneme
        }
        entities.append(entity)

# Insert the entities into the collection
milvus.insert(collection_name, entities)

# Flush the inserted data to disk
milvus.flush([collection_name])

# Disconnect from Milvus
milvus.disconnect()


In [None]:
#export finished vector memory files

from milvus import Milvus

# Connect to Milvus
milvus = Milvus()
milvus.connect(host='localhost', port='19530')

# Get a list of all collections
collections = milvus.list_collections()

# Export each collection as a separate file
for collection_name in collections:
    output_file = f"{collection_name}.csv"
    milvus.export(collection_name, output_file)

# Disconnect from Milvus
milvus.disconnect()
