In [1]:
import pandas as pd
import traceback

import os
import tiktoken
from openai import OpenAI
from dotenv import load_dotenv

from langchain.schema import Document
from langchain.prompts import PromptTemplate
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.llms.base import LLM
from typing import List, Optional
from langchain.chains import RetrievalQA

import warnings
warnings.filterwarnings("ignore")

In [2]:
ahj = pd.read_excel("D:\\CodingSystem\\assets\\AHJ_PriceList.xlsx")

print(ahj.shape)
ahj.head()

(111792, 7)


Unnamed: 0,INSURANCE_COMPANY,SERVICE_CODE,SERVICE_DESCRIPTION,PRICE,SERVICE_KEY,SERVICE_CLASSIFICATION,SERVICE_CATEGORY
0,Cash,LA0013674,AFP**,300.0,75905,LAB Services,LAB-Hormones
1,Islamic Bank,LA0013674,AFP**,300.0,75905,LAB Services,LAB-Hormones
2,Gulf Union,LA0013674,AFP**,315.0,75905,LAB Services,LAB-Hormones
3,MOH,LA0013674,AFP**,393.288,75905,LAB Services,LAB-Hormones
4,AXA,LA0013674,AFP**,415.0,75905,LAB Services,LAB-Hormones


In [3]:
ahj['SERVICE_CODE'].nunique(), ahj['SERVICE_DESCRIPTION'].nunique()

(16302, 16298)

In [4]:
ahj = ahj[~ahj['INSURANCE_COMPANY'].isin([0, 'Cash', 'Item Cash', 'OUTSIDE DOCTOR (CASH)'])]
ahj.shape

(92164, 7)

In [5]:
ahj['INSURANCE_COMPANY'].nunique()

49

In [6]:
ahj['SERVICE_CODE'].nunique(), ahj['SERVICE_DESCRIPTION'].nunique()

(13385, 13380)

In [7]:
sbs = pd.read_excel("D:\\CodingSystem\\assets\\SBS_Services.xlsx")
sbs['Short Description'] = sbs['Short Description'].str.strip().str.upper()
sbs['Long Description'] = sbs['Long Description'].str.strip().str.upper()

print(sbs.shape)
sbs.head()

(10081, 7)


Unnamed: 0,SBS Code,SBS Code (Hyphenated),Short Description,Long Description,Definition,Chapter Name,Block Name
0,408030000,40803-00-00,INTRACRANIAL STEREOTACTIC LOCALISATION,INTRACRANIAL STEREOTACTIC LOCALIZATION,,Procedures on nervous system,"Examination of skull, meninges or brain"
1,409030000,40903-00-00,NEUROENDOSCOPY,NEUROENDOSCOPY,,Procedures on nervous system,"Examination of skull, meninges or brain"
2,390030000,39003-00-00,CISTERNAL PUNCTURE,CISTERNAL PUNCTURE,A needle placed below the occipital bone (back...,Procedures on nervous system,Cranial tap or puncture
3,390060000,39006-00-00,VENTRICULAR PUNCTURE,VENTRICULAR PUNCTURE,,Procedures on nervous system,Cranial tap or puncture
4,390090000,39009-00-00,TAP FOR SUBDURAL HAEMORRHAGE,TAP FOR SUBDURAL HAEMORRHAGE,,Procedures on nervous system,Cranial tap or puncture


In [8]:
sbs['SBS Code (Hyphenated)'].nunique(), sbs['Short Description'].nunique(), sbs['Long Description'].nunique()

(10081, 10079, 10076)

In [9]:
# Add new column with 'PK-' prefix removed if present:
ahj['NEW_SERVICE_DESCRIPTION'] = ahj['SERVICE_DESCRIPTION'].str.replace(r'^(PK-)+', '', regex=True)

# Merge on NEW_SERVICE_DESCRIPTION == Long Description
merge_long = ahj.merge(
    sbs,
    how='left',
    left_on='NEW_SERVICE_DESCRIPTION',
    right_on='Long Description'
)

# Merge on NEW_SERVICE_DESCRIPTION == Short Description
merge_short = ahj.merge(
    sbs,
    how='left',
    left_on='NEW_SERVICE_DESCRIPTION',
    right_on='Short Description'
)

# Combine the two
exact_services = pd.concat([merge_long, merge_short])

# Keep only rows with a match
exact_services = exact_services[
    exact_services['Long Description'].notnull() | exact_services['Short Description'].notnull()
]

# Remove duplicates if any
exact_services = exact_services.drop_duplicates()

print(exact_services.shape)
exact_services.head()

(31081, 15)


Unnamed: 0,INSURANCE_COMPANY,SERVICE_CODE,SERVICE_DESCRIPTION,PRICE,SERVICE_KEY,SERVICE_CLASSIFICATION,SERVICE_CATEGORY,NEW_SERVICE_DESCRIPTION,SBS Code,SBS Code (Hyphenated),Short Description,Long Description,Definition,Chapter Name,Block Name
3977,ANDALUSIA CLINICS FOR KIDS WELLNESS-SARI,LA0009004,ORAL GLUCOSE TOLERANCE TEST,16.2,76961,LAB Services,LAB-Biochemistry,ORAL GLUCOSE TOLERANCE TEST,665420020,66542-00-20,ORAL GLUCOSE TOLERANCE TEST,ORAL GLUCOSE TOLERANCE TEST,A test that measures the body's response to su...,"Non-invasive, cognitive and other intervention...",Physiological assessment
3978,Globe Med,LA0009004,ORAL GLUCOSE TOLERANCE TEST,100.0,76961,LAB Services,LAB-Biochemistry,ORAL GLUCOSE TOLERANCE TEST,665420020,66542-00-20,ORAL GLUCOSE TOLERANCE TEST,ORAL GLUCOSE TOLERANCE TEST,A test that measures the body's response to su...,"Non-invasive, cognitive and other intervention...",Physiological assessment
3979,LAB TO LAB (NEW),LA0009004,ORAL GLUCOSE TOLERANCE TEST,115.0,76961,LAB Services,LAB-Biochemistry,ORAL GLUCOSE TOLERANCE TEST,665420020,66542-00-20,ORAL GLUCOSE TOLERANCE TEST,ORAL GLUCOSE TOLERANCE TEST,A test that measures the body's response to su...,"Non-invasive, cognitive and other intervention...",Physiological assessment
3980,Islamic Bank,LA0009004,ORAL GLUCOSE TOLERANCE TEST,131.0,76961,LAB Services,LAB-Biochemistry,ORAL GLUCOSE TOLERANCE TEST,665420020,66542-00-20,ORAL GLUCOSE TOLERANCE TEST,ORAL GLUCOSE TOLERANCE TEST,A test that measures the body's response to su...,"Non-invasive, cognitive and other intervention...",Physiological assessment
3981,MOH,LA0009004,ORAL GLUCOSE TOLERANCE TEST,143.345,76961,LAB Services,LAB-Biochemistry,ORAL GLUCOSE TOLERANCE TEST,665420020,66542-00-20,ORAL GLUCOSE TOLERANCE TEST,ORAL GLUCOSE TOLERANCE TEST,A test that measures the body's response to su...,"Non-invasive, cognitive and other intervention...",Physiological assessment


In [10]:
exact_services['SERVICE_CODE'].nunique(), exact_services['SERVICE_DESCRIPTION'].nunique()

(3842, 3842)

In [11]:
different_ahj = ahj[~ahj['SERVICE_DESCRIPTION'].isin(list(exact_services['SERVICE_DESCRIPTION'].unique()))]
different_ahj.shape

(61083, 8)

In [12]:
different_ahj['SERVICE_CODE'].nunique(), different_ahj['SERVICE_DESCRIPTION'].nunique()

(9543, 9538)

In [12]:
bupa_ahj = different_ahj[different_ahj['INSURANCE_COMPANY'] == 'Bupa']
validated_unique_services = list(bupa_ahj['SERVICE_DESCRIPTION'].unique())

In [13]:
other_accounts = different_ahj[~different_ahj['SERVICE_DESCRIPTION'].isin(validated_unique_services)]
other_accounts['SERVICE_DESCRIPTION'].nunique()

7045

In [14]:
unique_ahj_services = other_accounts[['SERVICE_CODE', 'SERVICE_DESCRIPTION','SERVICE_CLASSIFICATION', 'SERVICE_CATEGORY']].drop_duplicates()
unique_ahj_services.reset_index(drop= True, inplace= True)
unique_ahj_services.shape

(7049, 4)

In [15]:
services_to_be_resent = pd.read_excel("D:/CodingSystem/notebooks/services_to_be_resent.xlsx")
services_to_be_resent.shape

(1008, 2)

In [16]:
resent_services_results = pd.read_csv("D:/CodingSystem/notebooks/resent_services_results.csv")
resent_services_results.shape

(320, 5)

In [17]:
failed_again = services_to_be_resent[~services_to_be_resent['SERVICE_CODE'].isin(list(resent_services_results['Internal_Service_Code'].unique()))]
failed_again.shape

(689, 2)

In [18]:
unique_failed_services = list(failed_again['SERVICE_DESCRIPTION'].unique())
failed_ahj_services = ahj[ahj['SERVICE_DESCRIPTION'].isin(unique_failed_services)]
failed_ahj_services = failed_ahj_services[['SERVICE_CODE', 'SERVICE_DESCRIPTION','SERVICE_CLASSIFICATION', 'SERVICE_CATEGORY']].drop_duplicates()
failed_ahj_services.reset_index(inplace=True)
failed_ahj_services.shape

(677, 5)

In [13]:
class FireworksLLM(LLM):
    model: str
    api_key: str
    base_url: str = "https://api.fireworks.ai/inference/v1"
    temperature: float = 0
    top_p: float = 0

    @property
    def _llm_type(self) -> str:
        return "fireworks"

    def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
        client = OpenAI(
            api_key=self.api_key,
            base_url=self.base_url
        )

        response = client.chat.completions.create(
            model=self.model,
            messages=[
                {"role": "system", "content": "You are an expert in medical coding and service mapping."},
                {"role": "user", "content": prompt}
            ],
            temperature=self.temperature,
            top_p=self.top_p
        )
        return response.choices[0].message.content

load_dotenv()

api_key = os.getenv("FIREWORKS_NEW_API_KEY")

fireworks_llm = FireworksLLM(
    model="accounts/fireworks/models/deepseek-v3-0324",
    api_key=api_key,
)

def create_faiss_index(documents, embeddings_model):
    embeddings = HuggingFaceEmbeddings(model_name=embeddings_model)
    return FAISS.from_documents(documents, embeddings)

# Create the QA Chain
def create_rag_chain(vectorstore, prompt_template):
    rag_chain = RetrievalQA.from_chain_type(
        llm=fireworks_llm,
        retriever=vectorstore.as_retriever(search_kwargs={"k": 3}),
        return_source_documents=True,
        chain_type_kwargs={"prompt": prompt_template}
    )
    return rag_chain

def parse_llm_answer(answer: str):
    best_code = ""
    best_desc = ""
    explanation = ""

    for line in answer.split('\n'):
        if line.startswith("Best SBS Code:"):
            best_code = line.replace("Best SBS Code:", "").strip()
        elif line.startswith("Best SBS Description:"):
            best_desc = line.replace("Best SBS Description:", "").strip()
        elif line.startswith("Explanation:"):
            explanation = line.replace("Explanation:", "").strip()

    return best_code, best_desc, explanation

In [20]:
def convert_sbs_to_docs(sbs_df):
    docs = []
    for _, row in sbs_df.iterrows():
        service_text = (
            f"**Service Short Description:** {row.get('Short Description', 'Not Mentioned')}\n"
            f"Service Long Description: {row.get('Long Description', 'Not Mentioned')}\n"
            f"Definition: {row.get('Definition', 'Not Mentioned')}\n"
            f"Service Category: {row.get('Block Name', 'Not Mentioned')}\n"
            f"Service Classification: {row.get('Chapter Name', 'Not Mentioned')}\n"
            f"Service Code: {row.get('SBS Code (Hyphenated)', 'Not Mentioned')}"
        )
        docs.append(Document(page_content=service_text.strip(), metadata={
            "Service Code": row['SBS Code (Hyphenated)'],
            "Short Description": row['Short Description']
        }))
    return docs

In [21]:
# Convert SBS services to documents
sbs_docs = convert_sbs_to_docs(sbs)

# Create vectorstore
vectorstore = create_faiss_index(sbs_docs, "sentence-transformers/all-MiniLM-L6-v2")

# Prompt: explicitly ask the LLM to show reasoning before the final answer
prompt_template = PromptTemplate.from_template (
       """
        You are an expert medical coding analyst.

        You are given:
        Internal service details:
        {question}

        And these possible standard SBS codes:
        {context}

        Your task:
        1. Carefully compare the internal service’s description, category, and classification with each SBS option’s short and long descriptions, definition, category (Block Name), and classification (Chapter Name).
        2. Identify the single best-matching SBS code that most accurately represents the internal service.
        3. Provide the SBS short description for the selected code.
        4. Write a clear, one-sentence explanation of why this SBS code is the best match, mentioning key matching aspects.

        If you are unsure, pick the SBS code that has the closest clinical purpose or wording.

        Respond in this exact format ONLY:
        Best SBS Code: <code>
        Best SBS Description: <short description>
        Explanation: <one-sentence reason>
        """
)

In [130]:
def test_single_row_cost(ahj_services_df, vectorstore, prompt_template, row_idx=0):
    row = ahj_services_df.iloc[row_idx]

    query = (
        f"Service Code: {row['SERVICE_CODE']}\n"
        f"Description: {row['SERVICE_DESCRIPTION']}\n"
        f"Classification: {row['SERVICE_CLASSIFICATION']}\n"
        f"Category: {row['SERVICE_CATEGORY']}"
    )

    rag_chain = create_rag_chain(vectorstore, prompt_template)

    print(f"\n🚀 Running single row test for SERVICE_CODE: {row['SERVICE_CODE']}\n")

    response = rag_chain({"query": query})
    answer = response['result']

    best_code, best_desc, explanation = parse_llm_answer(answer)

    print("✅ LLM Result")
    print(f"Best SBS Code: {best_code}")
    print(f"Best SBS Description: {best_desc}")
    print(f"Explanation: {explanation}")

    # Use tiktoken encoding approximation for GPT-4 (or pick a close match for Qwen)
    enc = tiktoken.encoding_for_model("gpt-4")  

    prompt_tokens = len(enc.encode(query))
    answer_tokens = len(enc.encode(answer))

    input_cost_per_token = 0.22 / 1_000_000
    output_cost_per_token = 0.88 / 1_000_000

    input_cost = prompt_tokens * input_cost_per_token
    output_cost = answer_tokens * output_cost_per_token

    total_cost = input_cost + output_cost

    print(f"\n🔢 Prompt tokens: {prompt_tokens}")
    print(f"📝 Answer tokens: {answer_tokens}")
    print(f"💵 Input token cost: ${input_cost:.6f}")
    print(f"💵 Output token cost: ${output_cost:.6f}")
    print(f"💵 Estimated total cost for this row: ${total_cost:.6f}")

# ---------------------------------------------
# ✅ Example usage
# ---------------------------------------------
if __name__ == "__main__":
    sbs_docs = convert_sbs_to_docs(sbs)
    vectorstore = create_faiss_index(sbs_docs, "sentence-transformers/all-MiniLM-L6-v2")

    test_single_row_cost(
        ahj_services_df=different_ahj,
        vectorstore=vectorstore,
        prompt_template=prompt_template,
        row_idx=0  # change index to any row you want to test
    )


🚀 Running single row test for SERVICE_CODE: LA0013674

✅ LLM Result
Best SBS Code: 73050-01-70
Best SBS Description: AFP (TOTAL); SERUM
Explanation: The internal service (LAB-Hormones) matches the SBS code for total alpha-fetoprotein in serum, as both specify quantitation of total AFP without isoform differentiation or amniotic fluid testing, aligning with the general hormone assay category.

🔢 Prompt tokens: 23
📝 Answer tokens: 597
💵 Input token cost: $0.000005
💵 Output token cost: $0.000525
💵 Estimated total cost for this row: $0.000530


In [22]:
def map_service_codes(ahj_services_df, vectorstore, prompt_template):
    rag_chain = create_rag_chain(vectorstore, prompt_template)

    results_file = "failed_services_results.csv"
    failures_file = "failed_services_failures.csv"

    results_cols = [
        "Internal_Service_Code",
        "Internal_Description",
        "Matched_SBS_Code",
        "Matched_SBS_Short_Description",
        "LLM_Explanation"
    ]
    failures_cols = ahj_services_df.columns.tolist() + ["Error", "Traceback"]

    # ✅ Check for existing results to resume
    if os.path.exists(results_file):
        done_codes = pd.read_csv(results_file)["Internal_Service_Code"].unique().tolist()
        print(f"🔄 Found {len(done_codes)} completed rows in checkpoint. Will skip them.")
    else:
        done_codes = []
        # Only write header if file does NOT exist
        pd.DataFrame(columns=results_cols).to_csv(results_file, index=False)

    # Same for failures file
    if not os.path.exists(failures_file):
        pd.DataFrame(columns=failures_cols).to_csv(failures_file, index=False)

    # ✅ Skip already processed rows
    to_process = ahj_services_df[~ahj_services_df["SERVICE_CODE"].isin(done_codes)]
    print(f"🚀 Total to process this run: {len(to_process)} rows")

    for idx, (_, row) in enumerate(to_process.iterrows()):
        query = (
            f"Service Code: {row['SERVICE_CODE']}\n"
            f"Description: {row['SERVICE_DESCRIPTION']}\n"
            f"Classification: {row['SERVICE_CLASSIFICATION']}\n"
            f"Category: {row['SERVICE_CATEGORY']}"
        )

        try:
            response = rag_chain({"query": query})
            answer = response['result']

            best_code, best_desc, explanation = parse_llm_answer(answer)

            result_row = {
                "Internal_Service_Code": row['SERVICE_CODE'],
                "Internal_Description": row['SERVICE_DESCRIPTION'],
                "Matched_SBS_Code": best_code,
                "Matched_SBS_Short_Description": best_desc,
                "LLM_Explanation": explanation
            }

            pd.DataFrame([result_row]).to_csv(results_file, mode='a', header=False, index=False)

            print(f"✅ Processed row {idx+1}/{len(to_process)} — {row['SERVICE_CODE']}")

        except Exception as e:
            tb = traceback.format_exc()
            print(f"❌ Error at row {idx+1}/{len(to_process)}: {e}")

            failed_row = row.to_dict()
            failed_row["Error"] = str(e)
            failed_row["Traceback"] = tb

            pd.DataFrame([failed_row]).to_csv(failures_file, mode='a', header=False, index=False)

    print(f"🎉 Done! All rows processed or skipped. Results: {results_file}, Failures: {failures_file}")

In [24]:
llm_result = map_service_codes(failed_ahj_services, vectorstore, prompt_template)

🔄 Found 223 completed rows in checkpoint. Will skip them.
🚀 Total to process this run: 455 rows
✅ Processed row 1/455 — PR83214
✅ Processed row 2/455 — PR83215
✅ Processed row 3/455 — PR83242
✅ Processed row 4/455 — PR83261
✅ Processed row 5/455 — PR83262
✅ Processed row 6/455 — 83303
✅ Processed row 7/455 — PR83312
✅ Processed row 8/455 — PR83321
✅ Processed row 9/455 — PR83328
✅ Processed row 10/455 — PR83338
✅ Processed row 11/455 — PR83339
✅ Processed row 12/455 — PR83349
✅ Processed row 13/455 — PR83354
✅ Processed row 14/455 — PR83355
✅ Processed row 15/455 — PR83379
✅ Processed row 16/455 — PR83418
✅ Processed row 17/455 — PR83423
✅ Processed row 18/455 — PR83428
✅ Processed row 19/455 — PR83429
✅ Processed row 20/455 — PR83440
✅ Processed row 21/455 — PR83461
✅ Processed row 22/455 — PR83466
✅ Processed row 23/455 — PR83470
✅ Processed row 24/455 — PR83479
✅ Processed row 25/455 — PR83523
✅ Processed row 26/455 — PR83576
✅ Processed row 27/455 — PR83577
✅ Processed row 28/455 —

# Merge Services:

In [16]:
sbs_tabular = pd.read_excel('D:/CodingSystem/assets/SBS_Services.xlsx', sheet_name= 'tabular')
sbs_tabular['Short Description'] = sbs_tabular['Short Description'].str.strip().str.upper()
sbs_tabular['Long Description'] = sbs_tabular['Long Description'].str.strip().str.upper()
sbs_tabular.rename(columns={"Short Description": "SHORT_DESCRIPTION"}, inplace= True)
sbs_tabular.shape

(10081, 7)

In [17]:
sbs_technial = pd.read_excel('D:/CodingSystem/assets/SBS_Services.xlsx', sheet_name= 'technical')
sbs_technial = sbs_technial[['Inactive date', 'Inactive Code Mapping', 'SBS Code', 'SBS Code (Hyphenated)', 'Short Description',
                             'Long Description', 'Chapter Name', 'Block Name']]
sbs_technial['Short Description'] = sbs_technial['Short Description'].str.strip().str.upper()
sbs_technial['Long Description'] = sbs_technial['Long Description'].str.strip().str.upper()
sbs_technial.rename(columns={"Short Description": "SHORT_DESCRIPTION"}, inplace= True)
sbs_technial.shape

(10213, 8)

In [18]:
sbs_tabular = sbs_tabular.merge(sbs_technial, how='left', on=['SBS Code', 'SBS Code (Hyphenated)', 'SHORT_DESCRIPTION',
                             'Long Description', 'Chapter Name', 'Block Name'])

sbs_tabular.shape

(10081, 9)

In [19]:
exact_services.rename(columns={'Short Description': 'SHORT_DESCRIPTION'}, inplace= True)

In [20]:
exact_ahj_services_with_revision_details = exact_services.merge(sbs_tabular, how='left', on=['SBS Code', 'SBS Code (Hyphenated)', 'SHORT_DESCRIPTION',
                             'Long Description', 'Chapter Name', 'Block Name', 'Definition'])

exact_ahj_services_with_revision_details.shape

(31081, 17)

In [21]:
mapped_services = pd.read_excel("D:/CodingSystem/assets/mapped_services.xlsx")
mapped_services.rename(columns={"SBS_DESCRIPTION": "SHORT_DESCRIPTION"}, inplace= True)

mapped_services.shape

(9477, 5)

In [22]:
different_ahj_with_llm_result = different_ahj.merge(mapped_services, how= 'left', on=['SERVICE_CODE', 'SERVICE_DESCRIPTION'])
different_ahj_with_llm_result.rename(columns={'SBS_CODE': 'SBS Code (Hyphenated)'}, inplace= True)
different_ahj_with_llm_result.shape

(61158, 11)

In [23]:
different_ahj_with_llm_result.drop(columns=['LLM_EXPLAINATION'], inplace= True)

In [24]:
different_ahj_with_sbs_details = different_ahj_with_llm_result.merge(sbs_tabular, how= 'left', on=['SBS Code (Hyphenated)', 'SHORT_DESCRIPTION'])
different_ahj_with_sbs_details.shape

(61158, 17)

In [25]:
full_mapping_df = pd.concat([exact_ahj_services_with_revision_details, different_ahj_with_sbs_details], axis=0)
full_mapping_df.drop(columns=['NEW_SERVICE_DESCRIPTION'], inplace= True)
full_mapping_df.shape

(92239, 16)

In [26]:
full_mapping_df.head()

Unnamed: 0,INSURANCE_COMPANY,SERVICE_CODE,SERVICE_DESCRIPTION,PRICE,SERVICE_KEY,SERVICE_CLASSIFICATION,SERVICE_CATEGORY,SBS Code,SBS Code (Hyphenated),SHORT_DESCRIPTION,Long Description,Definition,Chapter Name,Block Name,Inactive date,Inactive Code Mapping
0,ANDALUSIA CLINICS FOR KIDS WELLNESS-SARI,LA0009004,ORAL GLUCOSE TOLERANCE TEST,16.2,76961,LAB Services,LAB-Biochemistry,665420020,66542-00-20,ORAL GLUCOSE TOLERANCE TEST,ORAL GLUCOSE TOLERANCE TEST,A test that measures the body's response to su...,"Non-invasive, cognitive and other intervention...",Physiological assessment,NaT,
1,Globe Med,LA0009004,ORAL GLUCOSE TOLERANCE TEST,100.0,76961,LAB Services,LAB-Biochemistry,665420020,66542-00-20,ORAL GLUCOSE TOLERANCE TEST,ORAL GLUCOSE TOLERANCE TEST,A test that measures the body's response to su...,"Non-invasive, cognitive and other intervention...",Physiological assessment,NaT,
2,LAB TO LAB (NEW),LA0009004,ORAL GLUCOSE TOLERANCE TEST,115.0,76961,LAB Services,LAB-Biochemistry,665420020,66542-00-20,ORAL GLUCOSE TOLERANCE TEST,ORAL GLUCOSE TOLERANCE TEST,A test that measures the body's response to su...,"Non-invasive, cognitive and other intervention...",Physiological assessment,NaT,
3,Islamic Bank,LA0009004,ORAL GLUCOSE TOLERANCE TEST,131.0,76961,LAB Services,LAB-Biochemistry,665420020,66542-00-20,ORAL GLUCOSE TOLERANCE TEST,ORAL GLUCOSE TOLERANCE TEST,A test that measures the body's response to su...,"Non-invasive, cognitive and other intervention...",Physiological assessment,NaT,
4,MOH,LA0009004,ORAL GLUCOSE TOLERANCE TEST,143.345,76961,LAB Services,LAB-Biochemistry,665420020,66542-00-20,ORAL GLUCOSE TOLERANCE TEST,ORAL GLUCOSE TOLERANCE TEST,A test that measures the body's response to su...,"Non-invasive, cognitive and other intervention...",Physiological assessment,NaT,


In [27]:
full_mapping_df.to_excel("full_mappings.xlsx")

In [137]:
mappig_sample = full_mapping_df.sample(n=500)

In [138]:
mappig_sample.shape

(500, 16)

In [139]:
mappig_sample.to_excel('second_mapping_sample.xlsx')