# 🏥 Healthcare Chatbot - Getting Started

Welcome to the Healthcare Chatbot project! This notebook will guide you through:
1. Setting up the environment
2. Understanding the project structure
3. Testing basic functionality
4. Running your first demo

Let's get started! 🚀

## 📋 Step 1: Environment Setup

First, let's check if we have all the required packages installed.

In [None]:
# Check Python version and install requirements
import sys
print(f"Python version: {sys.version}")

# Install required packages if not already installed
import subprocess
import os

def install_requirements():
    """Install required packages."""
    try:
        # Change to project root directory
        os.chdir('/workspace')
        
        # Install requirements
        result = subprocess.run([sys.executable, '-m', 'pip', 'install', '-r', 'requirements.txt'], 
                              capture_output=True, text=True)
        
        if result.returncode == 0:
            print("✅ Requirements installed successfully!")
        else:
            print(f"❌ Error installing requirements: {result.stderr}")
            
    except Exception as e:
        print(f"❌ Error: {e}")

# Uncomment the line below if you need to install requirements
# install_requirements()

In [None]:
# Test key imports
try:
    import torch
    import transformers
    import pandas as pd
    import numpy as np
    print("✅ Core packages imported successfully!")
    print(f"PyTorch version: {torch.__version__}")
    print(f"Transformers version: {transformers.__version__}")
    print(f"CUDA available: {torch.cuda.is_available()}")
    if torch.cuda.is_available():
        print(f"GPU: {torch.cuda.get_device_name(0)}")
except ImportError as e:
    print(f"❌ Import error: {e}")
    print("Please run the install_requirements() function above.")

## 📁 Step 2: Project Structure Overview

Let's explore the project structure and understand what each component does.

In [None]:
# Display project structure
import os
from pathlib import Path

def show_project_structure(path, prefix="", max_depth=3, current_depth=0):
    """Display project structure in a tree format."""
    if current_depth > max_depth:
        return
    
    path = Path(path)
    items = sorted([item for item in path.iterdir() if not item.name.startswith('.')])
    
    for i, item in enumerate(items):
        is_last = i == len(items) - 1
        current_prefix = "└── " if is_last else "├── "
        
        if item.is_dir():
            print(f"{prefix}{current_prefix}📁 {item.name}/")
            extension = "    " if is_last else "│   "
            show_project_structure(item, prefix + extension, max_depth, current_depth + 1)
        else:
            icon = "📓" if item.suffix == ".ipynb" else "🐍" if item.suffix == ".py" else "📄"
            print(f"{prefix}{current_prefix}{icon} {item.name}")

print("🏗️ Healthcare Chatbot Project Structure:")
print("="*50)
show_project_structure("/workspace")

## 🧪 Step 3: Test Basic Functionality

Let's test the core components to make sure everything is working.

In [None]:
# Add src to Python path
import sys
sys.path.append('/workspace/src')

# Test data preprocessing
print("🧪 Testing Data Preprocessing...")
try:
    from data_preprocessing import HealthcareDataPreprocessor
    
    # Initialize preprocessor
    preprocessor = HealthcareDataPreprocessor()
    print("✅ Data preprocessor initialized successfully!")
    
    # Test text cleaning
    sample_text = "What are the symptoms of diabetes?   "
    cleaned_text = preprocessor.clean_text(sample_text)
    print(f"📝 Text cleaning test:")
    print(f"   Original: '{sample_text}'")
    print(f"   Cleaned:  '{cleaned_text}'")
    
except Exception as e:
    print(f"❌ Error testing data preprocessing: {e}")

In [None]:
# Test Kaggle data loader
print("🧪 Testing Kaggle Data Loader...")
try:
    from kaggle_data_loader import KaggleMedicalDataLoader
    
    # Create a small test dataset
    import pandas as pd
    test_data = pd.DataFrame({
        'question': [
            'What are diabetes symptoms?',
            'How to prevent heart disease?',
            'What causes headaches?'
        ],
        'answer': [
            'Diabetes symptoms include frequent urination, excessive thirst, and fatigue.',
            'Prevent heart disease through regular exercise, healthy diet, and avoiding smoking.',
            'Headaches can be caused by stress, dehydration, or underlying medical conditions.'
        ],
        'category': ['endocrine', 'cardiovascular', 'neurology']
    })
    
    # Save test dataset
    test_path = '/workspace/test_medical_data.csv'
    test_data.to_csv(test_path, index=False)
    
    # Test loader
    loader = KaggleMedicalDataLoader(test_path)
    processed_data = loader.process_data()
    
    print(f"✅ Kaggle loader test successful!")
    print(f"📊 Processed {len(processed_data)} samples")
    
    # Show sample
    if processed_data:
        sample = processed_data[0]
        print(f"📝 Sample data:")
        print(f"   Q: {sample['question']}")
        print(f"   A: {sample['answer'][:50]}...")
    
    # Clean up
    os.remove(test_path)
    
except Exception as e:
    print(f"❌ Error testing Kaggle loader: {e}")

## 🎮 Step 4: Run Interactive Demo

Let's run a simple demo to see how the system works.

In [None]:
# Load the sample healthcare dataset
print("📊 Loading Sample Healthcare Dataset...")

try:
    import json
    
    # Load the sample dataset
    with open('/workspace/data/healthcare_qa_dataset.json', 'r') as f:
        sample_data = json.load(f)
    
    print(f"✅ Loaded {len(sample_data)} healthcare Q&A pairs")
    
    # Show some statistics
    questions = [item['question'] for item in sample_data]
    answers = [item['answer'] for item in sample_data]
    
    avg_q_len = sum(len(q.split()) for q in questions) / len(questions)
    avg_a_len = sum(len(a.split()) for a in answers) / len(answers)
    
    print(f"📈 Dataset Statistics:")
    print(f"   Average question length: {avg_q_len:.1f} words")
    print(f"   Average answer length: {avg_a_len:.1f} words")
    
    # Show sample Q&A pairs
    print(f"\n📝 Sample Q&A Pairs:")
    for i, item in enumerate(sample_data[:3], 1):
        print(f"\n{i}. Q: {item['question']}")
        print(f"   A: {item['answer'][:100]}...")
        
except Exception as e:
    print(f"❌ Error loading sample dataset: {e}")

## 🎯 Step 5: What's Next?

Now that you've tested the basic functionality, here are your next steps:

In [None]:
print("🎯 NEXT STEPS FOR YOU:")
print("="*50)
print("")
print("1. 📊 Explore Your Data:")
print("   → Open: notebooks/02_Data_Exploration.ipynb")
print("   → Upload your Kaggle dataset and analyze it")
print("")
print("2. 🎓 Train a Model:")
print("   → Open: notebooks/03_Model_Training.ipynb")
print("   → Train your first healthcare chatbot")
print("")
print("3. 📈 Evaluate Performance:")
print("   → Open: notebooks/04_Model_Evaluation.ipynb")
print("   → Test and evaluate your trained model")
print("")
print("4. 🌐 Deploy Your Chatbot:")
print("   → Open: notebooks/05_Deployment.ipynb")
print("   → Launch web interface and share your chatbot")
print("")
print("💡 TIP: Run the notebooks in order for the best experience!")

## ✅ Environment Check Summary

Run this cell to get a final summary of your environment setup:

In [None]:
print("🏥 HEALTHCARE CHATBOT ENVIRONMENT CHECK")
print("="*50)

# Check Python and packages
checks = {
    "Python 3.8+": sys.version_info >= (3, 8),
    "PyTorch": 'torch' in sys.modules,
    "Transformers": 'transformers' in sys.modules,
    "Pandas": 'pandas' in sys.modules,
    "NumPy": 'numpy' in sys.modules,
}

# Check files
file_checks = {
    "Sample Dataset": os.path.exists('/workspace/data/healthcare_qa_dataset.json'),
    "Training Script": os.path.exists('/workspace/train_chatbot.py'),
    "Kaggle Loader": os.path.exists('/workspace/src/kaggle_data_loader.py'),
    "Web Interface": os.path.exists('/workspace/src/web_interface.py'),
}

all_good = True

print("📦 Package Checks:")
for check, status in checks.items():
    icon = "✅" if status else "❌"
    print(f"   {icon} {check}")
    if not status:
        all_good = False

print("\n📁 File Checks:")
for check, status in file_checks.items():
    icon = "✅" if status else "❌"
    print(f"   {icon} {check}")
    if not status:
        all_good = False

print("\n🔧 Hardware:")
if 'torch' in sys.modules:
    cuda_available = torch.cuda.is_available()
    print(f"   {'✅' if cuda_available else '⚠️'} CUDA: {'Available' if cuda_available else 'Not available (CPU only)'}")
    if cuda_available:
        print(f"   🎮 GPU: {torch.cuda.get_device_name(0)}")

print("\n" + "="*50)
if all_good:
    print("🎉 EVERYTHING LOOKS GOOD! You're ready to proceed.")
    print("📖 Next: Open notebooks/02_Data_Exploration.ipynb")
else:
    print("⚠️  Some issues detected. Please resolve them before proceeding.")
    print("💡 Try running: pip install -r requirements.txt")