In [1]:
from tqdm.auto import tqdm
tqdm.__init__ = lambda *a, **k: __import__('tqdm').tqdm(*a, **{**k, "disable": False})


In [1]:
import os
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
from faster_whisper import WhisperModel


  import pkg_resources


In [2]:
import re
import time
from faster_whisper import WhisperModel

In [3]:
def extract_entities(text):
    patterns = {
        'medicines': r'\b[A-Za-z]{3,}(?:mycin|cillin|oxacin|azole|prazole|dipine|olol|sartan|statin|pril)\b',
        'dosages': r'\b\d+\s*(?:mg|ml|mcg|g|kg|cc|iu|units?)\b',
        'frequencies': r'\b(?:once|twice|thrice|daily|BD|TDS|QDS|OD|HS|PRN|SOS)\b',
        'duration': r'\b\d+\s*(?:days?|weeks?|months?|hours?)\b',
        'numbers': r'\b\d+(?:\.\d+)?\b',
        'english_words': r'\b[A-Za-z]{2,}\b'
    }
    result = {k: list(set(re.findall(v, text, re.I))) for k, v in patterns.items()}
    return result

In [4]:

model_dir = "./models/large-v3" 
start_total = time.time()
print("Loading Whisper model (large-v3)...")
load_start = time.time()
whisper = WhisperModel(model_dir, device="cuda", compute_type="float16")
load_time = time.time() - load_start
print(f"Model loaded in {load_time:.2f} sec\n")

#===========================================================================
audio_path = r"E:\Projects\Med_Scribe\Testing\output_audio.wav"

#===========================================================================
print("Transcribing English pass...")
start_en = time.time()
segments_en , info_en = whisper.transcribe(audio_path, language='en', beam_size=15, vad_filter=True)
en_text = " ".join([seg.text.strip() for seg in segments_en])
lat_en = time.time() - start_en
print(f"English Pass Time  : {lat_en:.2f} sec")
#===========================================================================
print("Transcribing Marathi/Hindi pass...")
start_mr = time.time()
segments_mr , info_mr = whisper.transcribe(audio_path, language='mr', beam_size=15, vad_filter=True)
mr_text = " ".join([seg.text.strip() for seg in segments_mr])
lat_mr = time.time() - start_mr
print(f"Marathi transcription done in {lat_mr:.2f} sec\n")

#============================================================================
start_ent = time.time()
entities = extract_entities(en_text)
lat_ent = time.time() - start_ent
print(f"Entity extraction completed in {lat_ent:.2f} sec\n")

total_time = time.time() - start_total
print("="*80)
print("FINAL SUMMARY")
print("="*80)
print(f"Model Load Time    : {load_time:.2f} sec")
print(f"English Pass Time  : {lat_en:.2f} sec")
print(f"Marathi Pass Time  : {lat_mr:.2f} sec")
print(f"Entity Extraction  : {lat_ent:.2f} sec")
print(f"TOTAL PIPELINE LATENCY : {total_time:.2f} sec")
print("="*80)

print("\nENGLISH TRANSCRIPT:\n", en_text.strip()[:500], "..." if len(en_text) > 500 else "")
print("\nMARATHI TRANSCRIPT:\n", mr_text.strip()[:500], "..." if len(mr_text) > 500 else "")
print("\nEXTRACTED ENTITIES:")
for k, v in entities.items():
    if v:
        print(f"{k.capitalize()}: {', '.join(v[:10])}{'...' if len(v)>10 else ''}")

Loading Whisper model (large-v3)...
Model loaded in 4.78 sec

Transcribing English pass...
English Pass Time  : 2.77 sec
Transcribing Marathi/Hindi pass...
Marathi transcription done in 4.87 sec

Entity extraction completed in 0.00 sec

FINAL SUMMARY
Model Load Time    : 4.78 sec
English Pass Time  : 2.77 sec
Marathi Pass Time  : 4.87 sec
Entity Extraction  : 0.00 sec
TOTAL PIPELINE LATENCY : 12.42 sec

ENGLISH TRANSCRIPT:
 Let's see what I am doing. I am going to use a new model and I am going to see what features are there in this model. So, let's see what is there. 

MARATHI TRANSCRIPT:
 बगाता में काई करतो, मी नवीन मॉडल यूज़ करालोई अनी हे मॉडल मधे काई काई फीचर्स हाई ते पन बगायलोई मी, सो बगीवा काई हाई अता 

EXTRACTED ENTITIES:
English_words: to, are, Let, and, new, in, am, use, what, going...


ModuleNotFoundError: No module named 'llama_cpp'

In [5]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
import json

model_dir = "./models/gemma-3-1b-it"
tokenizer = AutoTokenizer.from_pretrained(model_dir)
model = AutoModelForCausalLM.from_pretrained(model_dir)

device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)

transcript = " Mr. Patil, after reading your reports, I can see that you do have some fatty liver and sugar levels, but don't worry. It's the early stage. Take Metformin 500mg in the morning and evening and take 1 tablet after breakfast. Take 2 tsp of Live 1252 Syrup twice a day. Stop eating oily and sugary foods. Walk for 30 minutes daily. One more thing, do an ultrasound of abdomen for the next visit. I want to see your liver condition. Take medicine continuously for 30 days and follow it. And yes, take food on time. Don't eat late at night. Otherwise, you won't be able to control your sugar levels. Let's take a look. "

user_prompt = f"""
You are a medical prescription parser. Extract ONLY information explicitly stated.

Rules:
1. Extract medicines with EXACT dosages mentioned
2. If dosage/frequency unclear, mark as "unspecified"
3. Do NOT infer or assume any information
4. If doctor says "continue previous meds", extract NOTHING
5. Output valid JSON only

Output format:
{{
"medicines": [{{"name": str, "dosage": str, "frequency": str, "duration": str}}],
"diseases": [str],
"tests": [{{"name": str, "timing": str}}]
}}

Extract from this prescription conversation:
{transcript}

Remember: Only extract explicitly stated information. No assumptions.
"""
import psutil
print(f"RAM used: {psutil.Process().memory_info().rss / 1024**2:.2f} MB")

import torch
if torch.cuda.is_available():
    print(f"GPU used: {torch.cuda.memory_allocated()/1024**2:.2f} MB")
    print(f"GPU reserved: {torch.cuda.memory_reserved()/1024**2:.2f} MB")

inputs = tokenizer(user_prompt, return_tensors="pt").to(device)
outputs = model.generate(**inputs, max_new_tokens=512)
result_text = tokenizer.decode(outputs[0], skip_special_tokens=True)


try:
    results_json = json.loads(result_text)
except:
    result_json = {"error": "Invalid JSON output", "raw_output": result_text}

import json

raw = result_json['raw_output']  # your previous output

# Find first { and last } and extract
start = raw.find("{")
end = raw.rfind("}") + 1

if start != -1 and end != -1:
    json_str = raw[start:end]
    try:
        clean_json = json.loads(json_str)
        print(json.dumps(clean_json, indent=2))
    except json.JSONDecodeError:
        print("JSON found but invalid")
else:
    print("No JSON found")

print(json.dumps(result_json, indent=2))

print(f"RAM used: {psutil.Process().memory_info().rss / 1024**2:.2f} MB")

if torch.cuda.is_available():
    print(f"GPU used: {torch.cuda.memory_allocated()/1024**2:.2f} MB")
    print(f"GPU reserved: {torch.cuda.memory_reserved()/1024**2:.2f} MB")

RAM used: 2919.48 MB
GPU used: 3839.26 MB
GPU reserved: 3930.00 MB


KeyboardInterrupt: 