## Ingestion to Azure Cache for Redis

### Set environment variables

In [None]:
import os
from dotenv import load_dotenv

load_dotenv()

pg_host  = os.getenv("POSTGRESQL_HOST")
if pg_host is None or pg_host == "":
    print("POSTGRESQL_HOST environment variable not set.")
    exit()

pg_user  = os.getenv("POSTGRESQL_USERNAME")
if pg_user is None or pg_user == "":
    print("POSTGRESQL_USERNAME environment variable not set.")
    exit()

pg_password  = os.getenv("POSTGRESQL_PASSWORD")
if pg_password is None or pg_password == "":
    print("POSTGRESQL_PASSWORD environment variable not set.")
    exit()

db_name  = os.getenv("POSTGRESQL_DATABASE")
if db_name is None or db_name == "":
    print("POSTGRESQL_DATABASE environment variable not set.")
    exit()

text_table_name = 'text_sample'
doc_table_name = 'doc_sample'
image_table_name = 'image_sample'

postgresql_params = {
    "host": pg_host,
    "port": "5432", 
    "dbname": db_name,
    "user": pg_user,
    "password": pg_password
}

#### Create table

In [None]:
import psycopg2 

connection = psycopg2.connect(**postgresql_params)
print("Connection established.")

## Create text_sample table
table_schema_data = """
    id smallint PRIMARY KEY,
    title text,
    content text,
    category text,
    title_vector VECTOR(1536),
    content_vector VECTOR(1536)
 """

cursor = connection.cursor()
cursor.execute(f"DROP TABLE IF  EXISTS {text_table_name} ")
cursor.execute(f"CREATE TABLE IF NOT EXISTS {text_table_name} ({table_schema_data});")
connection.commit()
print("Table text_sample created.")
cursor.close()

## Create doc_sample table
table_schema_data = """
    id smallint PRIMARY KEY,
    chunk_content text,
    chunk_content_vector VECTOR(1536)
 """

cursor = connection.cursor()
cursor.execute(f"DROP TABLE IF  EXISTS {doc_table_name} ")
cursor.execute(f"CREATE TABLE IF NOT EXISTS {doc_table_name} ({table_schema_data});")
connection.commit()
print("Table text_sample created.")
cursor.close()

## Create doc_sample table
table_schema_data = """
    id smallint PRIMARY KEY,
    image text,
    image_vector VECTOR(1024)
 """

cursor = connection.cursor()
cursor.execute(f"DROP TABLE IF  EXISTS {image_table_name} ")
cursor.execute(f"CREATE TABLE IF NOT EXISTS {image_table_name} ({table_schema_data});")
connection.commit()
print("Table text_sample created.")
cursor.close()

connection.close()

#### Ingest text sample with embeddings

In [None]:
import pandas as pd

text_df = pd.read_json('../data/text/product_docs_embeddings.json')

connection = psycopg2.connect(**postgresql_params)
print("Connection established.")

insert_sql = f"INSERT INTO {text_table_name}(id, title, content, category, title_vector, content_vector) VALUES(%s, %s, %s, %s, %s, %s)"

records = text_df.values.tolist()
cursor = connection.cursor()
cursor.executemany(insert_sql, records)

connection.commit()
print("Text sample ingested.")

cursor.close()
connection.close()

#### Ingest doc sample with embeddings

In [None]:
doc_df = pd.read_json('../data/docs/employee_handbook_embeddings.json')

connection = psycopg2.connect(**postgresql_params)
print("Connection established.")

insert_sql = f"INSERT INTO {doc_table_name}(id, chunk_content, chunk_content_vector) VALUES(%s, %s, %s)"

records = doc_df.values.tolist()
cursor = connection.cursor()
cursor.executemany(insert_sql, records)

connection.commit()
print("Doc sample ingested.")

cursor.close()
connection.close()

#### Ingest image sample with embeddings

In [None]:
image_df = pd.read_json('../data/images/images_embeddings.json')

connection = psycopg2.connect(**postgresql_params)
print("Connection established.")

insert_sql = f"INSERT INTO {image_table_name}(id, image, image_vector) VALUES(%s, %s, %s)"

records = image_df.values.tolist()
cursor = connection.cursor()
cursor.executemany(insert_sql, records)

connection.commit()
print("Image sample ingested.")

cursor.close()
connection.close()