In [16]:
import pandas as pd
import numpy as np
import ast
import re
from sentence_transformers import SentenceTransformer
import faiss

In [None]:
dataset=pd.read_csv("./data/automotive_faults.csv")
dataset.head()

Unnamed: 0,category,subcategory,symptoms,diagnosis_steps
0,ABS System,ABS Control Module,"['ABS warning light on', 'Brake pedal pulsation']","[{'step': 'Check ABS fuse', 'result': ['Blown'..."
1,ABS System,ABS Wheel Speed Sensor,"['ABS light stays on', 'Erratic speedometer']",[{'step': 'Inspect wheel speed sensor connecti...
2,ABS System,Brake Booster,"['Spongy brake pedal', 'Hard brake pedal']","[{'step': 'Check brake booster vacuum hose', '..."
3,ABS System,Brake Caliper,"['Brake pulling to one side', 'Brake dragging']","[{'step': 'Inspect brake caliper for leaks', '..."
4,ABS System,Brake Hose,"['Brake fluid leak', 'Soft brake pedal']",[{'step': 'Inspect brake hose for cracks or le...


In [18]:
dataset.columns

Index(['category', 'subcategory', 'symptoms', 'diagnosis_steps'], dtype='object')

In [19]:
temp_dataset=dataset[['category', 'subcategory', 'symptoms', 'diagnosis_steps']]

In [20]:
temp_dataset.head()

Unnamed: 0,category,subcategory,symptoms,diagnosis_steps
0,ABS System,ABS Control Module,"['ABS warning light on', 'Brake pedal pulsation']","[{'step': 'Check ABS fuse', 'result': ['Blown'..."
1,ABS System,ABS Wheel Speed Sensor,"['ABS light stays on', 'Erratic speedometer']",[{'step': 'Inspect wheel speed sensor connecti...
2,ABS System,Brake Booster,"['Spongy brake pedal', 'Hard brake pedal']","[{'step': 'Check brake booster vacuum hose', '..."
3,ABS System,Brake Caliper,"['Brake pulling to one side', 'Brake dragging']","[{'step': 'Inspect brake caliper for leaks', '..."
4,ABS System,Brake Hose,"['Brake fluid leak', 'Soft brake pedal']",[{'step': 'Inspect brake hose for cracks or le...


In [21]:
def clean_text(text):
    text=text.lower()
    text=re.sub(r'[^a-zA-z0-9\s]',' ',text)
    text=re.sub(r'\s+',' ',text)
    return text

In [22]:
def format_chunk(row):
    symptoms_list = ast.literal_eval(row["symptoms"])
    symptoms_text = "; ".join(symptoms_list)

    steps_list = ast.literal_eval(row["diagnosis_steps"])
    steps_text = ""
    for i, step in enumerate(steps_list, start=1):
        results = ", ".join(step["result"])
        steps_text += f"{i}. {step['step']} â†’ {results}\n"
    
    chunk = f"""
    Category: {row['category']}
    Subcategory: {row['subcategory']}
    Symptoms: {symptoms_text}
    Diagnosis Steps:
    {steps_text}
    """.strip()
    return chunk

In [None]:
temp_dataset["chunk"] = temp_dataset.apply(format_chunk, axis=1)
temp_dataset["chunk"] = temp_dataset["chunk"].apply(clean_text)
temp_dataset.to_csv("/data/processed_automotive_faults.csv", index=False)

In [25]:
model=SentenceTransformer('all-MiniLM-L6-v2')
embeddings=model.encode(temp_dataset['chunk'].values)
embeddings=np.array(embeddings)
np.save("chunk.npy",embeddings)

In [27]:
embeddings=np.load("chunk.npy")

In [28]:
dimension=embeddings.shape[1]
faiss_index=faiss.IndexFlatL2(dimension)
faiss_index.add(embeddings)
faiss.write_index(faiss_index,"chunk_faiss_index.index")

In [29]:
def get_similar_query(query,model=model,faiss_index=faiss_index,count=3):
    query=clean_text(query)
    query_embedding=model.encode([query])
    distance,indices=faiss_index.search(query_embedding,count)
    results=[]
    for i in range(count):
        results.append(temp_dataset["chunk"].iloc[indices[0][i]])
    return results

In [30]:
get_similar_query("ABS light is on and the brake pedal vibrates when braking.")

 'category abs system subcategory abs wheel speed sensor symptoms abs light stays on erratic speedometer diagnosis steps 1 inspect wheel speed sensor connections loose connections secure connections 2 check wheel speed sensor for debris or damage dirty damaged clean intact',
 'category abs system subcategory brake booster symptoms spongy brake pedal hard brake pedal diagnosis steps 1 check brake booster vacuum hose leaking intact 2 test brake booster vacuum pressure low vacuum pressure normal vacuum pressure']