# Implement a rule-based pattern to extract drug doses from medical text

In [1]:
import pandas as pd

df = pd.read_csv('medical_drug_doses.csv')

In [2]:
DOSE_PATTERN = r'(\d+\.?\d*)\s*(mg|ml|IU|%)'

In [3]:
DRUG_NAME_PATTERN = r'(?:of|Prescribe|Administer|Instill|Take|tablets of)\s+([A-Z]\w+)'

In [4]:
import re

def extract(text):
  dose_match = re.search(DOSE_PATTERN, text, re.IGNORECASE)
  dose = dose_match.group(0) if dose_match else "N/A"

  drug_match = re.search(DRUG_NAME_PATTERN, text)
  drug = drug_match.group(1) if drug_match else "Unknown Drug"

  return drug, dose

In [5]:
df[['Extracted Drug', 'Extracted Dose']] = df['text'].apply(
    lambda x: pd.Series(extract(x))
)

In [6]:
df

Unnamed: 0,text,Extracted Drug,Extracted Dose
0,Patient is advised to take 500 mg of Paracetam...,Paracetamol,500 mg
1,Administer 250 mg of Amoxicillin thrice daily ...,Amoxicillin,250 mg
2,Take 2 tablets of Ibuprofen 200 mg orally ever...,Ibuprofen,200 mg
3,Prescribe Metformin 500 mg once daily after br...,Metformin,500 mg
4,Patient should instill 1 drop of Timolol 0.5% ...,Timolol,0.5%
5,Administer 50 mg of Losartan orally once a day...,Losartan,50 mg
6,Take 5 ml of Cough Syrup every 8 hours to reli...,Cough,5 ml
7,Patient to take 10 mg of Loratadine orally onc...,Loratadine,10 mg
8,Administer 0.25 mg of Digoxin orally once dail...,Digoxin,0.25 mg
9,Prescribe 20 mg of Omeprazole orally every mor...,Omeprazole,20 mg
