In [1]:
import os
import json
import cv2
import torch
import numpy as np
import torch.nn as nn
import torch.optim as optim
from PIL import Image
from torch.utils.data import Dataset, DataLoader, random_split
from torchvision import transforms, models
from paddleocr import PaddleOCR
from tqdm import tqdm
from datetime import datetime, timedelta
from sklearn.metrics import f1_score
import logging
import matplotlib.pyplot as plt
import gc
from functools import lru_cache
import uuid
import spacy
from gtts import gTTS
from apscheduler.schedulers.background import BackgroundScheduler
from transformers import pipeline
import nltk
from nltk.tokenize import word_tokenize
from nltk.tag import pos_tag
from nltk.chunk import ne_chunk

# Download required NLTK data
try:
    nltk.data.find('tokenizers/punkt')
except LookupError:
    nltk.download('punkt')
try:
    nltk.data.find('taggers/averaged_perceptron_tagger')
except LookupError:
    nltk.download('averaged_perceptron_tagger')
try:
    nltk.data.find('chunkers/maxent_ne_chunker')
except LookupError:
    nltk.download('maxent_ne_chunker')
try:
    nltk.data.find('corpora/words')
except LookupError:
    nltk.download('words')

# Load spaCy model for NLP
try:
    nlp = spacy.load("en_core_web_sm")
except OSError:
    print("Downloading spaCy model...")
    os.system("python -m spacy download en_core_web_sm")
    nlp = spacy.load("en_core_web_sm")

# Initialize scheduler
scheduler = BackgroundScheduler()
scheduler.start()

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - [%(levelname)8s] %(filename)s:%(lineno)d - %(message)s',
    handlers=[
        logging.FileHandler('prescription_process.log'),
        logging.StreamHandler()
    ]
)
logger = logging.getLogger(__name__)

# Set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
logger.info(f"Using device: {device}")

# Initialize NLP components
ner_pipeline = pipeline("ner", model="dslim/bert-base-NER")
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

class PrescriptionDataset(Dataset):
    """
    Dataset class for prescription images
    """
    def __init__(self, image_dir, transform=None, cache_dir="ocr_cache"):
        self.image_dir = image_dir
        self.transform = transform
        self.cache_dir = cache_dir
        
        # Create cache directory if it doesn't exist
        os.makedirs(cache_dir, exist_ok=True)
        
        # Set up image and label directories
        self.images_dir = os.path.join(image_dir, "resized_images")
        self.labels_dir = os.path.join(image_dir, "labels")
        
        if not os.path.exists(self.images_dir) or not os.path.exists(self.labels_dir):
            raise ValueError(f"Required directories not found in {image_dir}. Expected 'resized_images' and 'labels' directories.")
        
        # Get all image files from the resized_images directory
        self.image_files = [f for f in os.listdir(self.images_dir) 
                          if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
        
        # Load cached results
        self.cached_results = self._load_cached_results()
        
        # Print dataset statistics
        self.print_statistics()
    
    def _check_cache_exists(self):
        """
        Check if cache exists for all images
        """
        for img_file in self.image_files:
            cache_path = os.path.join(self.cache_dir, f"{os.path.splitext(img_file)[0]}.json")
            if not os.path.exists(cache_path):
                return False
        return True
    
    def _load_cached_results(self):
        """
        Load cached OCR results and labels
        """
        cached_results = {}
        for img_file in self.image_files:
            # Try to load from cache first
            cache_path = os.path.join(self.cache_dir, f"{os.path.splitext(img_file)[0]}.json")
            label_path = os.path.join(self.labels_dir, f"{os.path.splitext(img_file)[0]}.json")
            
            try:
                if os.path.exists(cache_path):
                    with open(cache_path, 'r') as f:
                        cached_results[img_file] = json.load(f)
                elif os.path.exists(label_path):
                    # If no cache exists but label file does, load from label file
                    with open(label_path, 'r') as f:
                        label_data = json.load(f)
                        cached_results[img_file] = {
                            'label': label_data.get('medicine_types', [0] * 7),
                            'medicines': label_data.get('medicines', [])
                        }
            except Exception as e:
                logger.error(f"Error loading data for {img_file}: {str(e)}")
                # Set default values if loading fails
                cached_results[img_file] = {'label': [0] * 7, 'medicines': []}
                continue
                
        return cached_results
    
    def __len__(self):
        return len(self.image_files)
    
    def __getitem__(self, idx):
        img_file = self.image_files[idx]
        img_path = os.path.join(self.images_dir, img_file)
        
        # Load image with PIL and handle memory issue
        try:
            image = Image.open(img_path).convert('RGB')
            if self.transform:
                image = self.transform(image)
        except Exception as e:
            logger.error(f"Error loading image {img_path}: {str(e)}")
            # Return a black image in case of error
            image = torch.zeros(3, 224, 224)
        
        # Get cached result
        result = self.cached_results.get(img_file, {'label': [0] * 7})
        
        return {
            'image': image,
            'label': torch.tensor(result['label'], dtype=torch.float32)
        }
    
    def print_statistics(self):
        """Print dataset statistics with proper error handling"""
        logger.info("\nDataset Statistics:")
        logger.info(f"Total images: {len(self.image_files)}")
        logger.info(f"Cached results: {len(self.cached_results)}")
        
        # Print sample label distribution
        labels = [result['label'] for result in self.cached_results.values()]
        if not labels:
            logger.warning("No labels found in the dataset!")
            return
            
        labels = np.array(labels)
        if labels.size == 0:
            logger.warning("Empty labels array!")
            return
            
        if len(labels.shape) < 2:
            logger.warning(f"Invalid labels shape: {labels.shape}")
            return
            
        logger.info("\nLabel Distribution:")
        medicine_types = ["Tablets", "Capsules", "Syrups", "Injections", "Drops", "Creams", "Inhalers"]
        for i, med_type in enumerate(medicine_types):
            positive_count = np.sum(labels[:, i] == 1)
            logger.info(f"{med_type}: {positive_count} positive samples ({positive_count/len(labels)*100:.2f}%)")

class PrescriptionModel(nn.Module):
    """
    PyTorch model for prescription medicine classification
    """
    def __init__(self, num_classes=7):
        super(PrescriptionModel, self).__init__()
        
        # Use a pre-trained ResNet model
        self.resnet = models.resnet50(pretrained=True)
        
        # Modify the final layer for our classification task
        num_features = self.resnet.fc.in_features
        self.resnet.fc = nn.Sequential(
            nn.Linear(num_features, 512),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(512, num_classes),
            nn.Sigmoid()  # Use sigmoid for multi-label classification
        )
    
    def forward(self, x):
        return self.resnet(x)

class PrescriptionTrainer:
    """
    Trainer class for the prescription model
    """
    def __init__(self, model, train_loader, val_loader, device, learning_rate=1e-4):
        self.model = model
        self.train_loader = train_loader
        self.val_loader = val_loader
        self.device = device
        
        # Move model to device
        self.model = self.model.to(device)
        
        # Define loss function and optimizer
        self.criterion = nn.BCELoss()
        self.optimizer = optim.Adam(model.parameters(), lr=learning_rate)
        
        # Learning rate scheduler
        self.scheduler = optim.lr_scheduler.ReduceLROnPlateau(
            self.optimizer, mode='min', factor=0.1, patience=3, verbose=True
        )
    
    def train_epoch(self):
        self.model.train()
        total_loss = 0
        all_preds = []
        all_labels = []
        
        for batch in tqdm(self.train_loader, desc="Training"):
            images = batch['image'].to(self.device)
            labels = batch['label'].to(self.device)
            
            # Forward pass
            self.optimizer.zero_grad()
            outputs = self.model(images)
            loss = self.criterion(outputs, labels)
            
            # Backward pass
            loss.backward()
            self.optimizer.step()
            
            # Track metrics
            total_loss += loss.item()
            all_preds.extend(outputs.detach().cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
        
        # Calculate metrics
        avg_loss = total_loss / len(self.train_loader)
        f1 = f1_score(all_labels, np.array(all_preds) > 0.5, average='weighted')
        
        return avg_loss, f1
    
    def validate(self):
        self.model.eval()
        total_loss = 0
        all_preds = []
        all_labels = []
        
        with torch.no_grad():
            for batch in tqdm(self.val_loader, desc="Validation"):
                images = batch['image'].to(self.device)
                labels = batch['label'].to(self.device)
                
                # Forward pass
                outputs = self.model(images)
                loss = self.criterion(outputs, labels)
                
                # Track metrics
                total_loss += loss.item()
                all_preds.extend(outputs.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())
        
        # Calculate metrics
        avg_loss = total_loss / len(self.val_loader)
        f1 = f1_score(all_labels, np.array(all_preds) > 0.5, average='weighted')
        
        return avg_loss, f1
    
    def train(self, num_epochs=10, patience=5):
        best_val_loss = float('inf')
        patience_counter = 0
        
        for epoch in range(num_epochs):
            logger.info(f"Epoch {epoch+1}/{num_epochs}")
            
            # Training phase
            train_loss, train_f1 = self.train_epoch()
            
            # Validation phase
            val_loss, val_f1 = self.validate()
            
            # Update learning rate
            self.scheduler.step(val_loss)
            
            # Log metrics
            logger.info(f"Train Loss: {train_loss:.4f}, Train F1: {train_f1:.4f}")
            logger.info(f"Val Loss: {val_loss:.4f}, Val F1: {val_f1:.4f}")
            
            # Save best model
            if val_loss < best_val_loss:
                best_val_loss = val_loss
                patience_counter = 0
                torch.save(self.model.state_dict(), 'best_prescription_model.pth')
                logger.info("Model saved!")
            else:
                patience_counter += 1
            
            # Early stopping
            if patience_counter >= patience:
                logger.info(f"Early stopping triggered after {epoch+1} epochs")
                break

class NLPTextExtractor:
    """
    Class for extracting medicine information from prescription text using NLP techniques
    """
    def __init__(self):
        self.medicine_types = {
            'tablet': ['tab', 'tablet', 'pill'],
            'capsule': ['cap', 'capsule'],
            'syrup': ['syrup', 'suspension', 'syr'],
            'injection': ['inj', 'injection'],
            'drops': ['drops', 'eye drops'],
            'cream': ['cream', 'ointment'],
            'inhaler': ['inhaler', 'spray']
        }
        
        # Common dosage units
        self.dosage_units = ['mg', 'g', 'ml', 'mcg', 'tablets', 'tablet', 'capsules', 'capsule']
        
        # Common frequency patterns
        self.frequency_patterns = [
            'once a day', 'twice a day', 'three times a day', 'four times a day',
            '1x', '2x', '3x', '4x', '1 time', '2 times', '3 times', '4 times'
        ]
        
        # Common duration patterns
        self.duration_patterns = [
            'days', 'weeks', 'months', 'day', 'week', 'month'
        ]
    
    def extract_medicine_data(self, text):
        """
        Extract medicine information from text using spaCy NLP
        """
        medicine_data = []
        
        # Split text into lines
        lines = text.split('\n')
        
        # Process each line to identify medicine sections
        current_medicine = None
        
        for line in lines:
            line = line.strip()
            if not line:
                continue
            
            # Check if this line starts a new medicine section
            is_new_medicine = False
            for med_types in self.medicine_types.values():
                for med_type in med_types:
                    if line.lower().startswith(med_type.lower()):
                        is_new_medicine = True
                        break
                if is_new_medicine:
                    break
            
            if is_new_medicine:
                # If we have a current medicine, add it to the list
                if current_medicine and current_medicine["name"]:
                    medicine_data.append(current_medicine)
                
                # Start a new medicine entry
                current_medicine = {
                    "name": "",
                    "dosage": "",
                    "frequency": "",
                    "duration": ""
                }
                
                # Extract medicine name from this line
                parts = line.split()
                for i, part in enumerate(parts):
                    if any(med_type in part.lower() for med_types in self.medicine_types.values() for med_type in med_types):
                        if i + 1 < len(parts):
                            # Join remaining parts as the medicine name
                            name_parts = parts[i+1:]
                            current_medicine["name"] = ' '.join(name_parts).replace(' - ', '-').strip()
                            break
            
            # If we have a current medicine, try to extract other information
            if current_medicine:
                # Check for dosage
                if "tablets" in line.lower() or "tablet" in line.lower() or "ml" in line.lower():
                    # Extract dosage
                    doc = nlp(line)
                    for token in doc:
                        if token.like_num:
                            try:
                                quantity = int(token.text)
                                # Look for the unit
                                if token.i + 1 < len(doc):
                                    unit = doc[token.i+1].text.lower()
                                    if "tablet" in unit or "ml" in unit:
                                        current_medicine["dosage"] = f"{quantity} {unit}"
                                        break
                            except ValueError:
                                continue
                
                # Check for frequency
                if "times" in line.lower() or "once" in line.lower() or "twice" in line.lower():
                    # Extract frequency
                    doc = nlp(line)
                    for token in doc:
                        if token.like_num:
                            try:
                                times = int(token.text)
                                if token.i + 1 < len(doc) and "times" in doc[token.i+1].text.lower():
                                    current_medicine["frequency"] = f"{times} times a day"
                                    break
                            except ValueError:
                                continue
                    
                    # Check for text-based frequency
                    if not current_medicine["frequency"]:
                        if "once" in line.lower():
                            current_medicine["frequency"] = "once a day"
                        elif "twice" in line.lower():
                            current_medicine["frequency"] = "twice a day"
                        elif "three times" in line.lower():
                            current_medicine["frequency"] = "three times a day"
                
                # Check for duration
                if "days" in line.lower() or "day" in line.lower():
                    # Extract duration
                    doc = nlp(line)
                    for token in doc:
                        if token.like_num:
                            try:
                                days = int(token.text)
                                if token.i + 1 < len(doc) and "day" in doc[token.i+1].text.lower():
                                    current_medicine["duration"] = f"{days} days"
                                    break
                            except ValueError:
                                continue
        
        # Add the last medicine if it exists
        if current_medicine and current_medicine["name"]:
            medicine_data.append(current_medicine)
        
        return medicine_data

class ReminderGenerator:
    """
    Class for generating and scheduling reminders based on extracted medicine data
    """
    def __init__(self, user_id, prescription_id=None, config=None):
        self.user_id = user_id
        self.prescription_id = prescription_id
        self.reminders_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "reminders")
        os.makedirs(self.reminders_dir, exist_ok=True)
        
        # Default configuration
        self.config = {
            'default_duration': 7,  # Default duration in days
            'default_method': 'After Food',  # Default method
            'default_status': 'upcoming',  # Default status
            'reminder_prefix': 'It\'s time to take',  # Prefix for reminder text
            'reminder_suffix': '',  # Suffix for reminder text
            'audio_format': 'mp3',  # Audio format for voice reminders
        }
        
        # Update with user-provided configuration
        if config:
            self.config.update(config)
    
    def generate_voice_reminder(self, text, reminder_id):
        """Generate voice reminder for the given text."""
        try:
            output_file = os.path.join(self.reminders_dir, f"reminder_{reminder_id}.{self.config['audio_format']}")
            tts = gTTS(text=text, lang='en')
            tts.save(output_file)
            return f"reminder_{reminder_id}.{self.config['audio_format']}"
        except Exception as e:
            print(f"Error generating audio: {e}")
            return None
    
    def schedule_reminders(self, medicine_data):
        """
        Schedule reminders based on extracted medicine data
        """
        scheduled_reminders = []
        
        for medicine in medicine_data:
            # Extract medicine information
            name = medicine.get("name", "").strip()
            dosage = medicine.get("dosage", "").strip()
            frequency = medicine.get("frequency", "").lower().strip()
            duration = medicine.get("duration", "").lower().strip()
            method = medicine.get("method", self.config['default_method'])
            
            # Determine times per day
            times_per_day = 1
            if "twice" in frequency or "2x" in frequency or "2 times" in frequency:
                times_per_day = 2
            elif "three" in frequency or "3x" in frequency or "3 times" in frequency:
                times_per_day = 3
            elif "four" in frequency or "4x" in frequency or "4 times" in frequency:
                times_per_day = 4
            
            # Set default reminder times based on times per day
            reminder_times = []
            today = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0)
            
            if times_per_day == 1:
                reminder_times.append(today.replace(hour=9, minute=0))  # 9 AM
            elif times_per_day == 2:
                reminder_times.append(today.replace(hour=9, minute=0))  # 9 AM
                reminder_times.append(today.replace(hour=21, minute=0))  # 9 PM
            elif times_per_day == 3:
                reminder_times.append(today.replace(hour=9, minute=0))  # 9 AM
                reminder_times.append(today.replace(hour=14, minute=0))  # 2 PM
                reminder_times.append(today.replace(hour=21, minute=0))  # 9 PM
            elif times_per_day == 4:
                reminder_times.append(today.replace(hour=8, minute=0))  # 8 AM
                reminder_times.append(today.replace(hour=12, minute=0))  # 12 PM
                reminder_times.append(today.replace(hour=16, minute=0))  # 4 PM
                reminder_times.append(today.replace(hour=20, minute=0))  # 8 PM
            
            # Determine duration in days
            duration_days = self.config['default_duration']  # Default duration
            if "week" in duration:
                try:
                    weeks = int(duration.split()[0])
                    duration_days = weeks * 7
                except:
                    pass
            elif "day" in duration:
                try:
                    duration_days = int(duration.split()[0])
                except:
                    pass
            
            # Set start and end times
            start_time = datetime.now()
            end_time = start_time + timedelta(days=duration_days)
            
            # Schedule reminders
            for day in range(duration_days):
                for base_time in reminder_times:
                    reminder_time = base_time + timedelta(days=day)
                    if reminder_time > datetime.now() and reminder_time < end_time:
                        reminder_id = str(uuid.uuid4())
                        reminder_text = f"{self.config['reminder_prefix']} {dosage} of {name}"
                        if self.config['reminder_suffix']:
                            reminder_text += f" {self.config['reminder_suffix']}"
                        
                        # Generate voice reminder
                        audio_file = self.generate_voice_reminder(reminder_text, reminder_id)
                        
                        # Create reminder object
                        reminder = {
                            'id': reminder_id,
                            'user_id': self.user_id,
                            'medicine': name,
                            'dosage': dosage,
                            'frequency': frequency,
                            'duration': duration,
                            'method': method,
                            'reminder_time': reminder_time,
                            'status': self.config['default_status'],
                            'audio_file': audio_file,
                            'prescription_id': self.prescription_id
                        }
                        
                        # Schedule job
                        scheduler.add_job(
                            func=lambda r=reminder: self.trigger_reminder(r['id']),
                            trigger="date",
                            run_date=reminder_time,
                            id=reminder_id
                        )
                        
                        scheduled_reminders.append(reminder)
        
        return scheduled_reminders
    
    def trigger_reminder(self, reminder_id):
        """
        Trigger reminder when time is hit
        """
        print(f"REMINDER ALERT: Reminder {reminder_id} triggered")
        return reminder_id

def create_dataset_structure():
    """
    Create the dataset directory structure for prescription images and labels
    """
    # Define base dataset directory
    base_dir = "dataset"
    
    # Create main dataset directory
    os.makedirs(base_dir, exist_ok=True)
    
    # Create subdirectories for different medicine types
    medicine_types = [
        "tablets",
        "capsules",
        "syrups",
        "injections",
        "drops",
        "creams",
        "inhalers"
    ]
    
    for med_type in medicine_types:
        os.makedirs(os.path.join(base_dir, med_type), exist_ok=True)
    
    # Create labels directory
    labels_dir = os.path.join(base_dir, "labels")
    os.makedirs(labels_dir, exist_ok=True)
    
    # Create a sample label file
    sample_label = {
        "image_id": "sample_image",
        "medicine_types": [1, 0, 0, 0, 0, 0, 0],  # Example: tablet
        "medicines": [
            {
                "name": "Sample Medicine",
                "dosage": "1 tablet",
                "frequency": "twice a day",
                "duration": "7 days"
            }
        ]
    }
    
    with open(os.path.join(labels_dir, "sample_label.json"), "w") as f:
        json.dump(sample_label, f, indent=4)
    
    print(f"Dataset structure created at: {os.path.abspath(base_dir)}")
    print("Please place your prescription images in the appropriate subdirectories.")
    print("For example, tablet prescriptions should go in the 'tablets' directory.")
    print("Label files should be placed in the 'labels' directory.")
    
    return base_dir

def main():
    try:
        # Set seeds for reproducibility
        torch.manual_seed(42)
        np.random.seed(42)
        
        # GPU setup and memory management
        if torch.cuda.is_available():
            torch.backends.cudnn.benchmark = True
            torch.backends.cudnn.deterministic = True
            torch.cuda.empty_cache()
            logger.info(f"Using GPU: {torch.cuda.get_device_name(0)}")
        else:
            logger.info("Using CPU")
        
        # Initialize OCR
        logger.info("Initializing OCR...")
        ocr = PaddleOCR(use_angle_cls=True, lang='en')
        
        # Initialize NLP components
        logger.info("Initializing NLP components...")
        text_extractor = NLPTextExtractor()
        
        # Define fixed dataset path
        dataset_path = "D:/Coding/Machine_Learning/Projects/VAMD/dataset/train"
        
        while True:
            print("\n=== Prescription Processing System ===")
            print("1. Process prescription image")
            print("2. Train model")
            print("3. Create dataset structure")
            print("4. Exit")
            
            choice = input("\nEnter your choice (1-4): ")
            
            if choice == "4":
                print("Exiting...")
                break
                
            elif choice == "3":
                # Create dataset structure
                print("\n=== Creating Dataset Structure ===")
                dataset_path = create_dataset_structure()
                print(f"Dataset structure created at: {dataset_path}")
                continue
                
            elif choice == "2":
                # Train model
                print("\n=== Model Training ===")
                
                if not os.path.exists(dataset_path):
                    print("Error: Dataset directory does not exist!")
                    continue
                
                # Check if directory is empty
                image_files = [f for f in os.listdir(os.path.join(dataset_path, "resized_images")) 
                             if f.endswith(('.png', '.jpg', '.jpeg'))]
                if not image_files:
                    print("Error: No image files found in the dataset directory!")
                    continue
                
                # Define transformations
                transform = transforms.Compose([
                    transforms.Resize((224, 224)),
                    transforms.ToTensor(),
                    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
                ])
                
                try:
                    # Create dataset
                    dataset = PrescriptionDataset(
                        image_dir=dataset_path,
                        transform=transform,
                        cache_dir="ocr_cache"
                    )
                    
                    # Check if dataset is empty
                    if len(dataset) == 0:
                        print("Error: Dataset is empty!")
                        continue
                    
                    # Split dataset
                    train_size = int(0.8 * len(dataset))
                    val_size = len(dataset) - train_size
                    train_dataset, val_dataset = random_split(dataset, [train_size, val_size])
                    
                    # Create data loaders
                    train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
                    val_loader = DataLoader(val_dataset, batch_size=8, shuffle=False)
                    
                    # Initialize model
                    model = PrescriptionModel()
                    
                    # Initialize trainer
                    trainer = PrescriptionTrainer(
                        model=model,
                        train_loader=train_loader,
                        val_loader=val_loader,
                        device=device
                    )
                    
                    # Train model
                    trainer.train(num_epochs=10, patience=3)
                    
                    print("Model training completed!")
                    
                except Exception as e:
                    logger.error(f"Error during model training: {str(e)}")
                    print(f"Error: {str(e)}")
                    continue
                
            elif choice == "1":
                # Get prescription image path from user
                image_path = input("\nEnter the path to your prescription image: ")
                
                if not os.path.exists(image_path):
                    print("Error: File does not exist!")
                    continue
                
                try:
                    # Process the image with OCR
                    logger.info(f"Processing image: {image_path}")
                    result = ocr.ocr(image_path, cls=True)
                    
                    if not result or not result[0]:
                        print("Error: Could not extract text from the image!")
                        continue
                    
                    # Extract text from OCR result
                    extracted_text = "\n".join([line[1][0] for line in result[0]])
                    print("\nExtracted Text:")
                    print("---------------")
                    print(extracted_text)
                    
                    # Extract medicine data using NLP
                    logger.info("Extracting medicine data...")
                    extracted_medicine_data = text_extractor.extract_medicine_data(extracted_text)
                    
                    if not extracted_medicine_data:
                        print("\nNo medicine information found in the prescription!")
                        continue
                    
                    print("\nExtracted Medicine Data:")
                    print("----------------------")
                    for idx, medicine in enumerate(extracted_medicine_data, 1):
                        print(f"\nMedicine {idx}:")
                        print(f"Name: {medicine.get('name', 'N/A')}")
                        print(f"Dosage: {medicine.get('dosage', 'N/A')}")
                        print(f"Frequency: {medicine.get('frequency', 'N/A')}")
                        print(f"Duration: {medicine.get('duration', 'N/A')}")
                    
                    # Ask user if they want to schedule reminders
                    schedule_choice = input("\nWould you like to schedule reminders for these medicines? (y/n): ")
                    
                    if schedule_choice.lower() == 'y':
                        # Get user ID (in a real application, this would come from authentication)
                        user_id = input("Enter your user ID: ")
                        
                        # Initialize reminder generator
                        logger.info("Initializing reminder generator...")
                        reminder_generator = ReminderGenerator(user_id=user_id)
                        
                        # Schedule reminders
                        logger.info("Scheduling reminders...")
                        scheduled_reminders = reminder_generator.schedule_reminders(extracted_medicine_data)
                        
                        print(f"\nSuccessfully scheduled {len(scheduled_reminders)} reminders!")
                        print("\nReminder Schedule:")
                        print("-----------------")
                        for reminder in scheduled_reminders:
                            print(f"\nMedicine: {reminder['medicine']}")
                            print(f"Dosage: {reminder['dosage']}")
                            print(f"Time: {reminder['reminder_time'].strftime('%Y-%m-%d %H:%M')}")
                            print(f"Status: {reminder['status']}")
                    
                except Exception as e:
                    logger.error(f"Error processing prescription: {str(e)}")
                    print(f"Error: {str(e)}")
            
            else:
                print("Invalid choice! Please try again.")
        
        # Final cleanup
        if torch.cuda.is_available():
            torch.cuda.empty_cache()

    except Exception as e:
        logger.error(f"Error in main: {str(e)}")
        raise

if __name__ == "__main__":
    main()


Some weights of the model checkpoint at dslim/bert-base-NER were not used when initializing BertForTokenClassification: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Device set to use cuda:0
Device set to use cuda:0


[2025/04/08 20:38:08] ppocr DEBUG: Namespace(help='==SUPPRESS==', use_gpu=False, use_xpu=False, use_npu=False, use_mlu=False, use_gcu=False, ir_optim=True, use_tensorrt=False, min_subgraph_size=15, precision='fp32', gpu_mem=500, gpu_id=0, image_dir=None, page_num=0, det_algorithm='DB', det_model_dir='C:\\Users\\Harsh/.paddleocr/whl\\det\\en\\en_PP-OCRv3_det_infer', det_limit_side_len=960, det_limit_type='max', det_box_type='quad', det_db_thresh=0.3, det_db_box_thresh=0.6, det_db_unclip_ratio=1.5, max_batch_size=10, use_dilation=False, det_db_score_mode='fast', det_east_score_thresh=0.8, det_east_cover_thresh=0.1, det_east_nms_thresh=0.2, det_sast_score_thresh=0.5, det_sast_nms_thresh=0.2, det_pse_thresh=0, det_pse_box_thresh=0.85, det_pse_min_area=16, det_pse_scale=1, scales=[8, 16, 32], alpha=1.0, beta=1.0, fourier_degree=5, rec_algorithm='SVTR_LCNet', rec_model_dir='C:\\Users\\Harsh/.paddleocr/whl\\rec\\en\\en_PP-OCRv4_rec_infer', rec_image_inverse=True, rec_image_shape='3, 48, 320',

Processing batch (50 images):  60%|██████    | 30/50 [00:25<00:16,  1.20it/s]


KeyboardInterrupt: 