<a href="https://colab.research.google.com/github/Thivirug/GEN-AI-challenge/blob/main/Challenge.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# mount gdrive
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [2]:
import os

PARENT_PATH = os.path.join('gdrive', 'My Drive', 'GenAI-Challenge')
print(PARENT_PATH)

gdrive/My Drive/GenAI-Challenge


----

> # 1) Explarotary Understanding

In [3]:
import pandas as pd
import pprint

In [4]:
diagnostic_codes = pd.read_json(os.path.join(PARENT_PATH, 'dictionary.json'))

pprint.pprint(diagnostic_codes.sample(10))

      code                                        description
103  P2601     Coolant Pump Control Circuit Range/Performance
33   P0205                 Injector Circuit/Open - Cylinder 5
25   P0172                           System Too Rich (Bank 1)
0    P0100        Mass or Volume Air Flow Circuit Malfunction
88   P1700            Transmission Control System Malfunction
56   P0441  Evaporative Emission Control System Incorrect ...
69   P0730                               Incorrect Gear Ratio
24   P0171                           System Too Lean (Bank 1)
28   P0200                       Injector Circuit Malfunction
53   P0420  Catalyst System Efficiency Below Threshold (Ba...


In [5]:
mechanic_notes = pd.read_json(os.path.join(PARENT_PATH, 'sample_data.json'))

pprint.pprint(mechanic_notes.sample(10))

                                     note   code
44                        chk sum err pcm  P0601
23                    cyl3 inj wire short  P0203
22         inj 2 stuck open, flooding eng  P0202
50           gear ratio error, trans slip  P0730
19       b2 lean, hi idle, vac leak maybe  P0174
40                spdometre dead, VSS bad  P0500
21             inj #1 no pulse, cyl1 dead  P0201
29              knock sensor b2 no signal  P0330
8   engine revs by itself, tps sig too lo  P0122
41                  idle surgin, iac prob  P0505


* Notes have a considerable amount of spelling errors in the context of general English, but they are more like abbreviations in mechanic lingo rather than spelling mistakes.
* There is no consideration about grammar whatsoever, possibly because these are notes taken during examination which requires short yet precise notes that could be understood later.

> # 2) Baseline Approach

The success of this method hinges entirely on the quality of the prompt. The prompt would be designed with clarity and constraint in mind.

In [6]:
!pip install transformers torch accelerate



In [7]:
import torch
from transformers import pipeline

device = 'cuda' if torch.cuda.is_available else 'cpu'
print(device)

cuda


In [8]:
from huggingface_hub import notebook_login

notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [9]:
import transformers

def get_generator(model_id: str = "google/flan-t5-base") -> transformers.pipelines.text2text_generation.Text2TextGenerationPipeline: # ~ 250 mil
  """
    Get the required model's text generation pipeline from HF
  """

  # Pass the config to the pipeline
  return pipeline(
      "text2text-generation",
      model = model_id,
      model_kwargs = {
          "device_map": "auto",
      }
  )

gen = get_generator()

config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/990M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json: 0.00B [00:00, ?B/s]

Device set to use cpu


In [21]:
type(gen)

In [45]:
def get_code(mechanic_note: str, txt_gen_pipeline: transformers.pipelines.text2text_generation.Text2TextGenerationPipeline, is_zero_shot: bool) -> str:
  """
    Generate the code using in-built knowlegdge of the model.
    is_zero_shot determines which type of prompt to use.
  """

  PROMPT_TEMPLATE_ZERO_SHOT = (
  f"""
    You are an expert automotive AI assistant specializing in engine diagnostics.
    Your task is to analyze a mechanic's note and determine the single most likely Diagnostic Trouble Code (DTC).

    Mechanic's Note: "{mechanic_note}"

    Respond with ONLY the 5-character DTC code (e.g., P0102). Do not include any explanations, apologies, or additional text.
   """
  )

  PROMPT_TEMPLATE_FEW_SHOT = (
  f"""
    You are an expert automotive AI assistant specializing in engine diagnostics.
    Your task is to analyze a mechanic's note and determine the single most likely Diagnostic Trouble Code (DTC).

    ---
    Example 1:
    Mechanic's Note: "car stals @ idle, maf might b bad, low airflw readin"
    DTC: P0102

    Example 2:
    Mechanic's Note: "upstream O2 slow 2 swtch, laggy"
    DTC: P0133
    ---

    Now, complete the following:
    Mechanic's Note: "{mechanic_note}"
    DTC:
   """
  )

  # get the required prompt style
  if is_zero_shot:
    prompt_template = PROMPT_TEMPLATE_ZERO_SHOT.format(mechanic_note = mechanic_note)
  else:
    prompt_template = PROMPT_TEMPLATE_FEW_SHOT.format(mechanic_note = mechanic_note)

  outputs = txt_gen_pipeline(
    prompt_template,
    max_new_tokens=5,
    do_sample=False # want the most likely token, not a creative one
  )

  return outputs[0]['generated_text']

Save results of zero shot, few shot prompts into separate .json files

In [46]:
# helper
def _zero_shot_results(mechanic_note: str, code: str, txt_gen_pipeline: transformers.pipelines.text2text_generation.Text2TextGenerationPipeline) -> dict:
  """
   Generate the code using zero shot prompt engineering.
  """
  return {
      'mechanic_note': mechanic_note,
      'code': code,
      'predicted_code': get_code(mechanic_note, txt_gen_pipeline, is_zero_shot = True)
  }


In [47]:
# helper
def _few_shot_results(mechanic_note: str, code: str, txt_gen_pipeline: transformers.pipelines.text2text_generation.Text2TextGenerationPipeline) -> dict:
  """
   Generate the code using few shot prompt engineering.
  """
  return {
      'mechanic_note': mechanic_note,
      'code': code,
      'predicted_code': get_code(mechanic_note, txt_gen_pipeline, is_zero_shot = False)
  }


In [48]:
def _get_codes(mechanic_notes: pd.DataFrame, txt_gen_pipeline: transformers.pipelines.text2text_generation.Text2TextGenerationPipeline) -> tuple[list, list]:
  """
   Generate the codes using zero shot and few shot prompt engineering.
  """
  zero_shot_results = []
  few_shot_results = []

  for _, row in mechanic_notes.iterrows():
    code = row['code']
    note = row['note']

    # zero shot results
    zero_shot_results.append(_zero_shot_results(note, code, txt_gen_pipeline))

    # few shot results
    few_shot_results.append(_few_shot_results(note, code, txt_gen_pipeline))

  return zero_shot_results, few_shot_results

In [49]:
import json

def save_to_json(zero_shot_results: list, few_shot_results: list) -> None:
  """
    Save results of zero & few shot prompting to .json files
  """

  # save zero shot results
  with open(os.path.join(PARENT_PATH, 'zero_shot_results.json'), 'w') as f:
    json.dump(zero_shot_results, f)

  # save few shot reuslts
  with open(os.path.join(PARENT_PATH, 'few_shot_results.json'), 'w') as f:
    json.dump(few_shot_results, f)

In [50]:
# run
zero_shot_results, few_shot_results = _get_codes(mechanic_notes, gen)
save_to_json(zero_shot_results, few_shot_results)

> # 3) Improved approach with RAG

In [10]:
!pip install -U sentence-transformers



In [11]:
from sentence_transformers import SentenceTransformer, util

embedding_model = SentenceTransformer('all-MiniLM-L6-v2')

# --- Generate Embeddings for the Dictionary ---
dictionary_embeddings = embedding_model.encode(diagnostic_codes['description'].tolist(), convert_to_tensor=True)
print("Dictionary embeddings created.")

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Dictionary embeddings created.


In [12]:
type(embedding_model)

In [34]:
import sentence_transformers

def get_top_candidates(mechanic_note: str, diagnostic_codes: pd.DataFrame, embed_model: sentence_transformers.SentenceTransformer, embeddings_dict: torch.Tensor, top_n: int = 5) -> list:
  """
    Takes a single mechanic's note, embeds it, and compares it against all the dictionary embeddings to find the best code matches.
  """

  # embed mechanic's notes
  mechanic_note_embedding = embed_model.encode(mechanic_note, convert_to_tensor=True)

  # compute cosine similarity between the note and all dictionary entries
  cosine_scores = util.cos_sim(mechanic_note_embedding, embeddings_dict)[0]

  # get indices and scores of top n cadidates
  top_results = torch.topk(cosine_scores, k=top_n)

  # format the candidates - list containing the codes and their descriptions
  candidates = []

  for _, idx in zip(top_results[0], top_results[1]): # create pairs of score, index
    # convert to int
    idx = idx.item()

    candidates.append(
      # get the code and description from the dictionary DataFrame
      (
        f"Code: {diagnostic_codes.iloc[idx]['code']}, Description: {diagnostic_codes.iloc[idx]['description']}"
      )
    )

  return candidates

# # example usage
# can = get_top_candidates("b2 lean, hi idle, vac leak maybe", diagnostic_codes, embedding_model, dictionary_embeddings)

# "\n".join([f"{i+1}. {c}" for i, c in enumerate(can)])

In [38]:
def get_rag_code(mechanic_note: str, candidates: list, txt_gen_pipeline: transformers.pipelines.text2text_generation.Text2TextGenerationPipeline) -> str:
  """
    Generate code for the note using an LLM to select the best code from a list of retrieved candidates.
  """

  # convert the candidates list into a single clean string
  cand_str = "\n".join(
      [
          f"{i+1}. {c}"
          for i, c in enumerate(candidates)
      ]
  )

  rag_prompt = f"""
    You are an expert automotive AI assistant. Your task is to analyze a mechanic's note and select the single best Diagnostic Trouble Code (DTC) from the provided list of candidates.

    ---
    Mechanic's Note: "{mechanic_note}"

    Candidate Codes and their descriptions:
    {cand_str}
    ---

    Review the note and the candidate codes. Respond with ONLY the 5-character DTC of the best match (e.g., P0102).
    """

  outputs = txt_gen_pipeline(
      rag_prompt,
      max_new_tokens=5,
      do_sample=False # want the most likely token, not a creative one
  )

  return outputs[0]['generated_text']

# # example usage
# NOTE = "b2 lean, hi idle, vac leak maybe"
# cands = get_top_candidates(NOTE, diagnostic_codes, embedding_model, dictionary_embeddings)
# get_rag_code(NOTE, cands, gen)

 saving the predictions to .json

In [39]:
# helper
def _get_code_rag(mechanic_note: str, code: str, txt_gen_pipeline: transformers.pipelines.text2text_generation.Text2TextGenerationPipeline, diagnostic_codes: pd.DataFrame, embed_model: sentence_transformers.SentenceTransformer, embeddings_dict: torch.Tensor) -> dict:
  """
   Generate the code using prompt engineering complemented with RAG.
  """

  return {
      'mechanic_note': mechanic_note,
      'code': code,
      'predicted_code': get_rag_code(mechanic_note, get_top_candidates(mechanic_note, diagnostic_codes, embed_model, embeddings_dict), txt_gen_pipeline)
  }

In [40]:
def _get_codes_rag(mechanic_notes: pd.DataFrame, txt_gen_pipeline: transformers.pipelines.text2text_generation.Text2TextGenerationPipeline, diagnostic_codes: pd.DataFrame, embed_model: sentence_transformers.SentenceTransformer, embeddings_dict: torch.Tensor) -> list:
  """
   Generate the codes for all notes using prompt engineering complemented with RAG.
  """
  results = []

  for _, row in mechanic_notes.iterrows():
    code = row['code']
    note = row['note']

    # results
    results.append(
        _get_code_rag(note, code, txt_gen_pipeline, diagnostic_codes, embed_model, embeddings_dict)
    )

  return results

In [43]:
import json
def save_to_json_rag(rag_results: list) -> None:
  """
    Save results of RAG augemnted pipeline to a .json file
  """

  # save results
  with open(os.path.join(PARENT_PATH, 'RAG_results.json'), 'w') as f:
    json.dump(rag_results, f)

In [44]:
# run
rag_results = _get_codes_rag(mechanic_notes, gen, diagnostic_codes, embedding_model, dictionary_embeddings)
save_to_json_rag(rag_results)

> # 4) Testing

In [47]:
import pandas as pd
import json

# load all prediction files into DataFrames

df_zero_shot = pd.read_json(os.path.join(PARENT_PATH, 'zero_shot_results.json'))
df_few_shot = pd.read_json(os.path.join(PARENT_PATH, 'few_shot_results.json'))
df_rag = pd.read_json(os.path.join(PARENT_PATH, 'RAG_results.json'))

def calculate_accuracy(df: pd.DataFrame) -> float:
  """
    Calculates the accuracy from a predictions DataFrame
  """
  # Compare the predicted code column with the true code column
  correct_predictions = (df['predicted_code'] == df['code']).sum()
  total_predictions = len(df)
  accuracy = (correct_predictions / total_predictions) * 100
  return accuracy

# --- Calculate and print the results ---
acc_zero_shot = calculate_accuracy(df_zero_shot)
acc_few_shot = calculate_accuracy(df_few_shot)
acc_rag = calculate_accuracy(df_rag)

print("--- Performance Metrics ---\n")
print(f"Baseline (Zero-Shot) Accuracy: {acc_zero_shot:.2f}%")
print(f"Baseline (Few-Shot) Accuracy: {acc_few_shot:.2f}%")
print(f"Improved (RAG) Accuracy:      {acc_rag:.2f}%")

--- Performance Metrics ---

Baseline (Zero-Shot) Accuracy: 1.32%
Baseline (Few-Shot) Accuracy: 1.32%
Improved (RAG) Accuracy:      34.21%


In [49]:
def get_wrong_preds(df: pd.DataFrame, n: int) -> list:
  """
    Get first 'n' wrong predictions in a DataFrame
  """
  wrong_preds = pd.DataFrame(columns = df.columns)

  count = 0
  for _, row in df.iterrows():
    if count == n:
      break

    if row['predicted_code'] != row['code']:
      wrong_preds.loc[count] = row

      count += 1

  return wrong_preds

In [50]:
# disaply wrong preds
get_wrong_preds(df_zero_shot, 10)

Unnamed: 0,mechanic_note,code,predicted_code
0,"chk eng lite on, maf high volt detectd",P0103,P0102
1,"maf sig intermittnt, cuts out sumtimes",P0104,P0102
2,"engin rough cold strt, iat sensor low",P0112,P0102
3,air temp sensor jumps 200F instantly!!,P0113,P0102
4,"coolnt gage dead, sensor opn circut",P0117,P0102
5,"coolnt temp pegged hi, ect sensor probly bad",P0118,P0102
6,"poor throttle rspns, TPS outta range",P0121,P0102
7,"engine revs by itself, tps sig too lo",P0122,P0102
8,"CEL on, TPS stuck hi",P0123,P0102
9,"car wont go closed loop, coolant nvr warms up",P0125,P0102


In [51]:
get_wrong_preds(df_few_shot, 10)

Unnamed: 0,mechanic_note,code,predicted_code
0,"chk eng lite on, maf high volt detectd",P0103,P0102
1,"maf sig intermittnt, cuts out sumtimes",P0104,P0102
2,"engin rough cold strt, iat sensor low",P0112,P0102
3,air temp sensor jumps 200F instantly!!,P0113,P0102
4,"coolnt gage dead, sensor opn circut",P0117,P0102
5,"coolnt temp pegged hi, ect sensor probly bad",P0118,P0102
6,"poor throttle rspns, TPS outta range",P0121,P0102
7,"engine revs by itself, tps sig too lo",P0122,P0102
8,"CEL on, TPS stuck hi",P0123,TPS ---
9,"car wont go closed loop, coolant nvr warms up",P0125,P0102


In [52]:
get_wrong_preds(df_rag, 10)

Unnamed: 0,mechanic_note,code,predicted_code
0,"chk eng lite on, maf high volt detectd",P0103,P0306
1,"maf sig intermittnt, cuts out sumtimes",P0104,P0601
2,"engin rough cold strt, iat sensor low",P0112,P0705
3,air temp sensor jumps 200F instantly!!,P0113,P0110
4,"coolnt gage dead, sensor opn circut",P0117,P0115
5,"coolnt temp pegged hi, ect sensor probly bad",P0118,P0115
6,"engine revs by itself, tps sig too lo",P0122,P0174
7,"CEL on, TPS stuck hi",P0123,P1600
8,"o2 b1s1 stuck lean, LTFT +25",P0131,P0174
9,oxgn sensor hi volt b1s1,P0132,P0133
