<a href="https://colab.research.google.com/github/Naima-ai/Custom_SLM_PT/blob/main/SLM_component.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import sys
print(sys.version)


3.11.13 (main, Jun  4 2025, 08:57:29) [GCC 11.4.0]


In [3]:
!pip install -q transformers torch sentence-transformers faiss-cpu langchain langchain-community langchain-huggingface pypdf accelerate


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m149.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m31.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m84.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m8.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m17.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m127.9/127.9 MB[0m [31m9.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [4]:
import os

folders = ["rag", "core", "model", "data"]
for f in folders:
    os.makedirs(f, exist_ok=True)


In [5]:
%%writefile rag/indexer.py
import os
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.document_loaders import UnstructuredWordDocumentLoader

from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings


def load_documents(file_paths):
    docs = []

    for path in file_paths:
        path_lower = path.lower()

        if path_lower.endswith(".pdf"):
            print(f"Loading PDF: {path}")
            loader = PyPDFLoader(path)
            docs.extend(loader.load())

        elif path_lower.endswith(".docx"):
            print(f"Loading DOCX: {path}")
            loader = UnstructuredWordDocumentLoader(path)
            docs.extend(loader.load())

        else:
            print(f"Skipping unsupported file: {path}")

    return docs


def build_index(file_paths, vector_dir):
    all_docs = load_documents(file_paths)

    if not all_docs:
        raise ValueError("No text could be loaded from documents.")

    splitter = RecursiveCharacterTextSplitter(
        chunk_size=500,
        chunk_overlap=100
    )

    chunks = splitter.split_documents(all_docs)

    if not chunks:
        raise ValueError("Text loaded but no chunks were created.")

    print(f"Total chunks created: {len(chunks)}")

    embeddings = HuggingFaceEmbeddings(
        model_name="sentence-transformers/all-MiniLM-L6-v2"
    )

    vectorstore = FAISS.from_documents(chunks, embeddings)

    os.makedirs(vector_dir, exist_ok=True)
    vectorstore.save_local(vector_dir)

    print("Vector DB created at:", vector_dir)





Writing rag/indexer.py


In [6]:
%%writefile rag/retriever.py

from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings

def load_retriever(vector_dir: str, top_k: int):
    embeddings = HuggingFaceEmbeddings(
        model_name="sentence-transformers/all-MiniLM-L6-v2"
    )

    db = FAISS.load_local(
        vector_dir,
        embeddings,
        allow_dangerous_deserialization=True
    )

    return db.as_retriever(search_kwargs={"k": top_k})


Writing rag/retriever.py


In [7]:
%%writefile model/slm_loader.py
from transformers import AutoTokenizer, AutoModelForCausalLM

MODELS = {
    "phi-2": "microsoft/phi-2",
    "mistral": "mistralai/Mistral-7B-Instruct-v0.1"
}

def load_slm(model_name: str):
    model_id = MODELS[model_name]

    tokenizer = AutoTokenizer.from_pretrained(model_id)
    model = AutoModelForCausalLM.from_pretrained(
        model_id,
        device_map="auto"
    )

    model.eval()
    return tokenizer, model


Writing model/slm_loader.py


In [8]:
%%writefile core/prompt.py

def build_prompt(context: str, question: str) -> str:
    return f"""
You are a medical document assistant.

Rules:
- Use ONLY the provided context
- If information is missing, say: "Information not found in medical records"
- Be concise, factual, and neutral
- Do NOT give advice beyond the document

Context:
{context}

Question:
{question}

Answer:
"""


Writing core/prompt.py


In [9]:
%%writefile core/agent.py
import torch
from core.prompt import build_prompt

class MedicalAgent:
    def __init__(self, tokenizer, model, retriever):
        self.tokenizer = tokenizer
        self.model = model
        self.retriever = retriever

    def answer(self, question: str):
        docs = self.retriever.invoke(question)
        context = "\n".join(d.page_content for d in docs)

        prompt = build_prompt(context, question)
        inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)

        with torch.no_grad():
            output = self.model.generate(
                **inputs,
                max_new_tokens=200,
                do_sample=False,
                pad_token_id=self.tokenizer.eos_token_id
            )

        generated_ids = output[0][inputs["input_ids"].shape[-1]:]
        answer = self.tokenizer.decode(
            generated_ids,
            skip_special_tokens=True
        ).strip()

        return {
            "answer": answer,
            "prompt_tokens": len(inputs["input_ids"][0]),
            "completion_tokens": len(generated_ids),
            "total_tokens": len(inputs["input_ids"][0]) + len(generated_ids)
        }


Writing core/agent.py


In [10]:
%%writefile core/component.py

import time
from rag.retriever import load_retriever
from model.slm_loader import load_slm
from core.agent import MedicalAgent

class SLMComponent:


    def __init__(self, model_name: str, vector_dir: str,top_k: int = 3):
        self.tokenizer, self.model = load_slm(model_name)
        self.retriever = load_retriever(vector_dir,top_k)
        self.agent = MedicalAgent(self.tokenizer, self.model, self.retriever)

    def run(self, question: str):
        start = time.time()
        out = self.agent.answer(question)

        return {
            "answer": out["answer"],
            "latency_sec": round(time.time() - start, 3),
            "prompt_tokens": out["prompt_tokens"],
            "completion_tokens": out["completion_tokens"],
            "total_tokens": out["total_tokens"],
            "model": self.model.config._name_or_path
        }


Writing core/component.py


In [11]:
medical_files = "/content/drive/MyDrive/19. Medical Documentation for Loop_s Demo/1. SKIN CANCER DETECTION/SKIN CANCER DETECTION.pdf"


from rag.indexer import build_index

build_index(
    file_paths=[medical_files],
    vector_dir="/content/vector_db_medical"
)


Loading PDF: /content/drive/MyDrive/19. Medical Documentation for Loop_s Demo/1. SKIN CANCER DETECTION/SKIN CANCER DETECTION.pdf
Total chunks created: 102


  embeddings = HuggingFaceEmbeddings(
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Vector DB created at: /content/vector_db_medical


In [13]:
from core.component import SLMComponent

slm = SLMComponent(
    model_name="phi-2",  # or "mistral"
    vector_dir="/content/vector_db_medical"
)

tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

added_tokens.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/99.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/735 [00:00<?, ?B/s]

model.safetensors.index.json: 0.00B [00:00, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/564M [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

In [10]:
from core.component import SLMComponent

slm = SLMComponent(
    model_name="phi-2",  # or "mistral"
    vector_dir="/content/vector_db_medical"
)

print(
    slm.run("What is this indicates: Well-circumscribed nests of melanocytes, maturation in depth?")
)


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

{'answer': 'Embryology of skin is the study of the development of the skin from the earliest stages of embryonic development to birth. It involves the formation of the epidermis, dermis, and subcutaneous tissue, as well as the development of hair follicles, sweat glands, and sebaceous glands. The skin also plays a crucial role in protecting the body from external threats and regulating body temperature.', 'latency_sec': 5.675, 'prompt_tokens': 483, 'completion_tokens': 83, 'total_tokens': 566, 'model': 'microsoft/phi-2'}


In [11]:
print(
    slm.run("What is this indicates: Well-circumscribed nests of melanocytes, maturation in depth?")
)

{'answer': 'This indicates a benign melanocyte proliferation, specifically a junctional type.', 'latency_sec': 2.759, 'prompt_tokens': 454, 'completion_tokens': 16, 'total_tokens': 470, 'model': 'microsoft/phi-2'}


In [12]:
print(
    slm.run("How skin cancer can be effectively prevented?")
)

{'answer': 'Skin cancer can be effectively prevented by reducing ultraviolet radiation (UVR) exposure, which is the primary modifiable risk factor. This can be achieved through sun avoidance, especially during peak hours of 10 AM to 4 PM when the UV index is high. Additionally, using UV index apps can help individuals stay informed about the UV levels in their area. Other practical tips include using sunscreen and protective clothing. It is important to note that while these strategies are effective, they may not be as common in dermatology/skin cancer prevention patients. Therefore, it is crucial to educate patients about the importance of rigorous photoprotection.', 'latency_sec': 7.379, 'prompt_tokens': 467, 'completion_tokens': 126, 'total_tokens': 593, 'model': 'microsoft/phi-2'}


In [14]:
print("Medical Chatbot is ready! Type 'end' to stop the chat.\n")

while True:
    user_input = input("You: ")

    if user_input.lower() in ["end", "quit", "exit"]:
        print("Chatbot: Goodbye!")
        break

    response = slm.run(user_input)
    print(f"Chatbot: {response['answer']}\n")

Medical Chatbot is ready! Type 'end' to stop the chat.

You: How skin cancer can be effectively prevented?
Chatbot: Skin cancer can be effectively prevented by reducing ultraviolet radiation (UVR) exposure, which is the primary modifiable risk factor. This can be achieved through sun avoidance, especially during peak hours of 10 AM to 4 PM when the UV index is high. Additionally, using UV index apps can help individuals stay informed about the UV levels in their area. Other practical tips include using sunscreen and protective clothing. It is important to note that while these strategies are effective, they may not be as common in dermatology/skin cancer prevention patients. Therefore, it is crucial to educate patients about the importance of rigorous photoprotection.

You: what are types of skin cancer?
Chatbot: The types of skin cancer are Actinic Keratosis and Intraepithelial Carcinoma / Bowen's Disease (AKIEC).



Identify the relevant information in the context.

The relevant info