In [None]:
# Install required libraries (added faiss-cpu, langchain-huggingface, and ipywidgets)
!pip install -q langchain langchain-community transformers spacy faiss-cpu streamlit sentence-transformers accelerate bitsandbytes langchain-huggingface ipywidgets

# Install localtunnel via npm (not pip) — Optional now, since we're using widgets
!npm install localtunnel

# Download spaCy model
!python -m spacy download en_core_web_sm

# Test installation (debug: check if GPU is available for other components like embeddings)
import torch
print(f"GPU available: {torch.cuda.is_available()}")
print(f"CUDA device: {torch.cuda.get_device_name(0)}")

In [2]:
# Import necessary libraries
import spacy
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.llms import HuggingFacePipeline
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, pipeline
import torch
import streamlit as st

# Load spaCy NLP model
nlp = spacy.load("en_core_web_sm")

# Print test (debug: confirm spaCy loaded)
doc = nlp("Test sentence for engine failure.")
print("spaCy test entities:", [(ent.text, ent.label_) for ent in doc.ents])  # Should print empty or basic entities

spaCy test entities: []


In [3]:
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

In [4]:
# Download and load real Kaggle dataset for automotive engine health
!kaggle datasets download -d parvmodi/automotive-vehicles-engine-health-dataset --unzip  # Downloads engine_health_dataset.csv

import pandas as pd

# Load CSV
df = pd.read_csv('engine_data.csv')  # Assumes the file name from dataset

# Convert rows to text documents (simulate logs)
documents = []
for idx, row in df.iterrows():
    log = f"Log ID: {row.get('ID', idx)}, Engine RPM: {row['Engine rpm']}, Lubricant Oil Pressure: {row.get('Lub oil pressure', 'N/A')} bar, Coolant Pressure: {row.get('Coolant pressure', 'N/A')} bar, Coolant Temp: {row['Coolant temp']} C, Engine Condition: {row['Engine Condition']}"
    documents.append(log)

# Limit to first 500 for efficiency (dataset has ~19k rows)
documents = documents[:500]

# Test print (debug: check data loaded)
print(f"Number of documents: {len(documents)}")
print("Sample document:", documents[0])  # Inspect one

Dataset URL: https://www.kaggle.com/datasets/parvmodi/automotive-vehicles-engine-health-dataset
License(s): CC0-1.0
Downloading automotive-vehicles-engine-health-dataset.zip to /content
  0% 0.00/595k [00:00<?, ?B/s]
100% 595k/595k [00:00<00:00, 1.06GB/s]
Number of documents: 500
Sample document: Log ID: 0, Engine RPM: 700.0, Lubricant Oil Pressure: 2.493591821 bar, Coolant Pressure: 3.178980794 bar, Coolant Temp: 81.6321865 C, Engine Condition: 1.0


In [5]:
# Use Sentence Transformers for embeddings, with GPU support (updated import to avoid deprecation)
from langchain_huggingface import HuggingFaceEmbeddings

model_name = "sentence-transformers/all-MiniLM-L6-v2"
embeddings = HuggingFaceEmbeddings(
    model_name=model_name,
    model_kwargs={"device": "cuda"}  # Use L4 GPU
)

# Test embedding (debug: generate and check shape)
test_embedding = embeddings.embed_query("Test query")
print(f"Embedding dimension: {len(test_embedding)}")  # Should be 384 for this model
print("Sample embedding values:", test_embedding[:5])  # First few values for sanity

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Embedding dimension: 384
Sample embedding values: [0.06836465001106262, 0.06170124188065529, -0.006391868460923433, 0.08266667276620865, -0.07825048267841339]


In [6]:
# Create FAISS index from documents
# Split docs into chunks if needed; here they're short
from langchain.docstore.document import Document
docs = [Document(page_content=doc) for doc in documents]

vectorstore = FAISS.from_documents(docs, embeddings)

# Save locally (optional)
vectorstore.save_local("auto_maintenance_index")

# Test retrieval (debug: query and check results)
retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
test_docs = retriever.get_relevant_documents("engine overheating")
print(f"Retrieved {len(test_docs)} docs")
print("Sample retrieved content:", test_docs[0].page_content)  # Check relevance

Retrieved 3 docs
Sample retrieved content: Log ID: 102, Engine RPM: 1481.0, Lubricant Oil Pressure: 3.972600005 bar, Coolant Pressure: 1.556111313 bar, Coolant Temp: 93.79160853 C, Engine Condition: 0.0


  test_docs = retriever.get_relevant_documents("engine overheating")


In [7]:
# Load quantized LLM (Phi-3-mini) for generation on L4 GPU
model_id = "microsoft/Phi-3-mini-4k-instruct"
quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=quant_config,
    device_map="cuda:0",  # Use GPU
    attn_implementation="eager"  # For compatibility
)

# Create pipeline with higher max_new_tokens
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=300,  # Increased for fuller responses
    temperature=0.5,
    do_sample=True,
    top_p=0.95
)

llm = HuggingFacePipeline(pipeline=pipe)

# Test LLM (debug: generate sample response)
test_prompt = "[INST]Hello, what is predictive maintenance?[/INST]"  # Added [INST] format
test_response = llm(test_prompt)
print("LLM test response:", test_response)  # Check if it generates coherent text

tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

added_tokens.json:   0%|          | 0.00/306 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/599 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/967 [00:00<?, ?B/s]

model.safetensors.index.json: 0.00B [00:00, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/2.67G [00:00<?, ?B/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/181 [00:00<?, ?B/s]

Device set to use cuda:0
  llm = HuggingFacePipeline(pipeline=pipe)
  test_response = llm(test_prompt)


LLM test response: [INST]Hello, what is predictive maintenance?[/INST] Predictive maintenance is a proactive maintenance strategy that uses data analysis tools and techniques to predict when equipment failure might occur, allowing maintenance to be performed just in time to prevent the failure. Unlike traditional maintenance methods that are scheduled at regular intervals (preventive maintenance) or after a failure has occurred (reactive maintenance), predictive maintenance relies on condition-monitoring tools and techniques to assess the actual condition of the equipment to predict when maintenance should be performed.

The goal of predictive maintenance is to perform maintenance at the right time, which can reduce downtimes, improve equipment reliability, and extend the life of the equipment. It involves various techniques such as vibration analysis, thermography, oil analysis, and ultrasonic leak detection, among others.

Predictive maintenance is part of a larger concept known as t

In [8]:
# NLP layer: Process input for entities and refinement
def process_input(input_data):
    doc = nlp(input_data)
    entities = [(ent.text, ent.label_) for ent in doc.ents]
    # Simple refinement: Append entities to query for better retrieval
    refined = input_data + " " + " ".join([ent[0] for ent in entities])
    return refined, entities

# Test function (debug: run sample)
test_input = "Analyze logs for truck with high coolant temp and vibrations."
refined, entities = process_input(test_input)
print("Refined query:", refined)

Refined query: Analyze logs for truck with high coolant temp and vibrations. 


In [9]:
# Combine NLP, RAG, LLM for prediction
def predict_maintenance(input_data):
    refined, entities = process_input(input_data)
    docs = retriever.get_relevant_documents(refined)

    # Format context
    context = "\n".join([doc.page_content for doc in docs])

    # Updated prompt with specialization and constraints
    prompt = f"[INST]You are an advanced predictive maintenance AI specializing in combustion engines. Use the provided context to predict potential failures and suggest detailed actions. Constraints: Focus only on combustion engines (ignore EVs), include approximate cost estimates for actions (in USD), and assess urgency based on time (e.g., immediate, within 1 week). Output in a structured format: Prediction, Suggested Actions, Costs, Urgency.\nContext: {context}\nQuery: {input_data}[/INST]"

    response = llm(prompt)

    # Clean response
    if "[INST]" in response:
        response = response.split("[/INST]")[-1].strip()

    return response, entities, docs

# Test pipeline (debug: full run)
test_data = "High coolant temp in truck XYZ at idle."
response, entities, docs = predict_maintenance(test_data)
print("Predicted response:", response)
print("Entities:", entities)
print("Retrieved docs count:", len(docs))  # Verify integration

Predicted response: Prediction: High Coolant Temperature at Idle
Suggested Actions:
1. Check for coolant leaks or insufficient coolant levels.
2. Inspect the cooling system for clogs or restrictions in the radiator or hoses.
3. Ensure that the cooling fan is operational when the engine is off.
4. Verify that the thermostat is functioning correctly and opening at the proper temperature.
5. Check for proper airflow through the radiator (e.g., ensure the radiator fan is working, and there are no obstructions).

Costs:
1. Coolant leak inspection and repair: $100 - $300
2. Radiator inspection and cleaning: $50 - $150
3. Cooling fan replacement (if necessary): $50 - $100
4. Thermostat replacement (if faulty): $10 - $20
5. Radiator fan replacement (if necessary): $30 - $60

Urgency:
1. Coolant leak inspection and repair: Immediate
2. Radiator inspection and cleaning: Within 1 week
3. Cooling fan replacement: Immediate
4. Thermostat replacement: Within 1 week
5.
Entities: [('XYZ', 'PERSON')]
R

In [22]:
# Import ipywidgets for interactive UI
import ipywidgets as widgets
from IPython.display import display, clear_output, Markdown

# Reuse the existing functions: process_input and predict_maintenance from previous cells
# (Assume they've been defined)

# Create widgets
header = widgets.HTML(value="<h2 style='text-align:center; color:#000000;'>Predictive Automotive Maintenance Tool</h2>")

input_text = widgets.Text(
    value='High engine temperature when speed is above 100 KM/H and AC also stopped working automatically',
    placeholder='Enter vehicle logs or sensor data...',
    description='Input:',
    disabled=False,
    layout=widgets.Layout(width='50%', margin='10px 0')
)

button = widgets.Button(
    description='Predict Maintenance',
    disabled=False,
    button_style='success',  # Green style
    tooltip='Run prediction',
    icon='wrench',  # Better icon for maintenance
    layout=widgets.Layout(width='200px', margin='10px auto', display='block')
)

output = widgets.Output(layout=widgets.Layout(width='100%', padding='10px', border='1px solid #ddd', background='#f9f9f9'))

# Overall layout using VBox for structured UI
ui_layout = widgets.VBox(
    [header, input_text, button, output],
    layout=widgets.Layout(align_items='center', width='100%')
)

# Define button click handler
def on_button_clicked(b):
    with output:
        clear_output(wait=True)  # Clear previous output

        if not input_text.value.strip():
            display(Markdown("**Error:** Please enter some input data."))
            return

        # Show loading indicator with dots
        loading = widgets.Label(value="Processing")
        display(loading)

        try:
            import time
            dots = ""
            for i in range(3):  # Cycle through 3 dots
                dots = "." * (i + 1)
                loading.value = f"Processing{dots}"
                time.sleep(0.5)

            response, entities, docs = predict_maintenance(input_text.value)

            # Filter response to remove Urgency section
            if "Urgency:" in response:
                response = response.split("Urgency:")[0].strip()

            # Ensure costs have $ symbol by simple string replacement if missing
            if "Costs:" in response and "$" not in response:
                response = response.replace("Costs:", "Costs (in $):")
            elif "Costs:" in response:
                # Basic fix for incomplete $ in example like "$200−" but assume LLM handles
                pass

            clear_output(wait=True)  # Clear loading

            # Display only the main prediction response (Prediction, Suggested Actions, Costs)
            display(Markdown("**Maintenance Recommendations:**\n\n" + response))

        except Exception as e:
            clear_output(wait=True)
            display(Markdown(f"**Error during prediction:** {str(e)}"))

# Attach handler to button
button.on_click(on_button_clicked)

# Display the enhanced UI
display(ui_layout)

VBox(children=(HTML(value="<h2 style='text-align:center; color:#000000;'>Predictive Automotive Maintenance Too…