## Prepare maintenance documents

In [0]:
manual_bearings = """Title: Motor Bearing Inspection and Replacement Guide

1. Purpose:
   Bearings should be inspected when vibration exceeds 3.5 mm/s or temperature rises above 55°C.

2. Symptoms of bearing wear:
   - Increased vibration or noise
   - Abnormal heat near shaft
   - Reduced RPMs or power draw increase

3. Recommended Actions:
   - Shut down equipment safely
   - Use IR thermometer to confirm localized heating
   - Check for play in shaft coupling
   - Replace both drive-end and non-drive-end bearings
   - Log bearing condition in CMMS after replacement"""


manual_overheating = """Title: Troubleshooting Overheating in Electric Motors

1. Common Causes:
   - Blocked ventilation or dirty cooling fans
   - Electrical overload
   - Shaft misalignment
   - Lubrication failure (dry bearing)

2. Indicators:
   - Temperature exceeding 60°C during normal load
   - Motor casing warm to touch or smelling of burnt insulation
   - Vibrations > 3.2 mm/s

3. Recommended Actions:
   - Clean cooling ducts and check fan blades
   - Verify motor current vs rated amps
   - Inspect for shaft alignment using dial indicators
   - Relubricate bearings or check for grease breakdown"""


manual_presure_drop = """Title: Pressure Drop in Hydraulic Systems

1. Normal operating pressure: 30–35 PSI
2. Warning level: < 28 PSI
3. Possible Causes:
   - Internal leak in actuator or valve
   - Blockage in fluid line
   - Air entrapment or low fluid level

4. Fix Procedures:
   - Check reservoir level and refill if low
   - Bleed air from the system using purge valves
   - Inspect filters and lines for clogs or wear
   - Run system diagnostics to isolate fault location"""


# Upload manually using FileStore, or:
dbutils.fs.put("/FileStore/maintenance_docs/manual_bearings.txt", manual_bearings, True)
dbutils.fs.put("/FileStore/maintenance_docs/manual_overheating.txt", manual_overheating, True)
dbutils.fs.put("/FileStore/maintenance_docs/manual_pressure_drop.txt", manual_presure_drop, True)

In [0]:
%pip install langchain openai tiktoken python-dotenv
%pip install -qU langchain-openai
%pip install -U langchain-community

In [0]:
%restart_python

In [0]:
%pip install numpy==1.26.4 --force-reinstall

## Load, Chunk and Embedded the Documents

In [0]:
# from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
# from langchain.embeddings.openai import OpenAIEmbeddings
from langchain_openai import AzureOpenAIEmbeddings
from langchain_community.document_loaders.text import TextLoader
import uuid
from pathlib import Path

import os
from dotenv import load_dotenv
load_dotenv()

In [0]:
# Set up OpenAI Configures
AZURE_OPENAI_API_KEY = os.getenv("AZURE_OPENAI_API_KEY")
AZURE_OPENAI_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT")
AZURE_OPENAI_EMBEDDING_DEPLOYMENT = os.getenv("AZURE_OPENAI_EMBEDDING_DEPLOYMENT")
AZURE_OPENAI_API_VERSION = os.getenv("AZURE_OPENAI_API_VERSION")

In [0]:
# Create Embedding Model
embedding_model = AzureOpenAIEmbeddings(
    model=AZURE_OPENAI_EMBEDDING_DEPLOYMENT,
    azure_endpoint=AZURE_OPENAI_ENDPOINT,
    api_key=AZURE_OPENAI_API_KEY,
    openai_api_version=AZURE_OPENAI_API_VERSION
)

In [0]:
# Define paths to all manuals
manual_dir = "dbfs:/FileStore/maintenance_docs/"
manual_files = [f.path for f in dbutils.fs.ls(manual_dir) if f.path.endswith(".txt")]

# Load and chunk all docs
splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=50)
all_chunks = []

for file_path in manual_files:
    doc_content = dbutils.fs.head(file_path)
    chunks = splitter.split_text(doc_content)
    all_chunks.extend(chunks)

records = []

for chunk in all_chunks:
    vector = embedding_model.embed_query(chunk)
    records.append((str(uuid.uuid4()), chunk, vector))

df_embed = spark.createDataFrame(records, schema="id STRING, chunk_text STRING, embedding ARRAY<FLOAT>")

In [0]:
# Define Unity Catalog table name
catalog = "main"
schema = "default"
table_name = "maintenance_docs_chunks"

spark.sql(f"CREATE SCHEMA IF NOT EXISTS {catalog}.{schema}")

# Save to Delta table
df_embed.write.mode("overwrite").saveAsTable(f"{catalog}.{schema}.{table_name}")

## Create Vector Index

In [0]:
%pip install databricks-vectorsearch databricks-langchain openai
dbutils.library.restartPython()

### Create a Vector Search Endpoint

In [0]:
from databricks.vector_search.client import VectorSearchClient

client = VectorSearchClient()
endpoint_name = "maintenance_endpoint"

# Create endpoint (Standard or Storage Optimized)
client.create_endpoint(name=endpoint_name, endpoint_type="STANDARD")
client.wait_for_endpoint(name=endpoint_name, verbose=True)
print(f"✅ Endpoint '{endpoint_name}' is ready.")


### Create a Vector Index

In [0]:
client.create_delta_sync_index(
  endpoint_name=endpoint_name,
  index_name="main.ai_agent.maintenance_docs_index",
  primary_key="id",
  source_table_name="hive_metastore.ai_agent.maintenance_docs_chunks",
  pipeline_type="TRIGGERED",
  embedding_vector_column="embedding",
  embedding_dimension=3072
)