# Environment Setup

Run this notebook first to set up your environment for the RAG benchmarking framework. This will:
1. Install all required dependencies
2. Configure AWS access
3. Test imports and utilities
4. Create necessary directories

## 1. Install Dependencies

Run this cell to install all required packages. This only needs to be done once per environment.

In [None]:
import sys
import subprocess
import pkg_resources

def install_requirements(requirements_file: str):
    """Install packages from requirements file if not already installed."""
    with open(requirements_file) as f:
        requirements = [line.strip() for line in f if line.strip() and not line.startswith('#')]
    
    installed = {pkg.key for pkg in pkg_resources.working_set}
    missing = [pkg for pkg in requirements if pkg.split('==')[0] not in installed]
    
    if missing:
        print("📦 Installing packages...")
        try:
            # Capture output to suppress verbose pip messages
            result = subprocess.run(
                [sys.executable, '-m', 'pip', 'install', '--quiet'] + missing,
                capture_output=True,
                text=True
            )
            if result.returncode == 0:
                print("✅ Installation complete!")
            else:
                print("❌ Installation failed with errors:")
                # Only show error messages, not the full output
                for line in result.stderr.split('\n'):
                    if 'ERROR:' in line:
                        print(f"  {line.strip()}")
        except Exception as e:
            print(f"❌ Installation failed: {str(e)}")
    else:
        print("✅ All required packages are already installed!")

install_requirements('requirements.txt')

## 2. Configure AWS Access

Check AWS access and provide setup instructions based on the environment.

In [None]:
import boto3
from botocore.exceptions import ClientError
import json
import os

def is_running_in_sagemaker():
    """Check if we're running in a SageMaker notebook."""
    return os.path.exists('/opt/ml/metadata/resource-metadata.json')

def test_s3_permissions():
    """Test S3 permissions by attempting actual operations."""
    missing_permissions = []
    s3 = boto3.client('s3')
    test_bucket = "test-permissions-bucket-" + str(hash(str(boto3.client('sts').get_caller_identity())))[:8]
    test_key = "test-file.txt"
    test_data = b"test data"

    try:
        # Test ListBuckets
        try:
            s3.list_buckets()
            print("✅ S3:ListBuckets - Success")
        except ClientError as e:
            missing_permissions.append('ListBuckets')
            print(f"❌ S3:ListBuckets - {e.response['Error']['Message']}")

        # Test CreateBucket, PutObject, GetObject, DeleteObject, DeleteBucket
        try:
            s3.create_bucket(Bucket=test_bucket)
            print("✅ S3:CreateBucket - Success")
            
            s3.put_object(Bucket=test_bucket, Key=test_key, Body=test_data)
            print("✅ S3:PutObject - Success")
            
            s3.get_object(Bucket=test_bucket, Key=test_key)
            print("✅ S3:GetObject - Success")
            
            s3.delete_object(Bucket=test_bucket, Key=test_key)
            print("✅ S3:DeleteObject - Success")
            
            s3.delete_bucket(Bucket=test_bucket)
            print("✅ S3:DeleteBucket - Success")
            
        except ClientError as e:
            error_code = e.response['Error']['Code']
            if 'AccessDenied' in error_code:
                current_operation = e.operation_name
                missing_permissions.append(current_operation)
                print(f"❌ S3:{current_operation} - {e.response['Error']['Message']}")
            else:
                print(f"Warning: Non-permission error during S3 tests: {str(e)}")
    except Exception as e:
        print(f"❌ Error during S3 tests: {str(e)}")
        missing_permissions.extend(['ListBuckets', 'CreateBucket', 'PutObject', 'GetObject', 'DeleteObject', 'DeleteBucket'])

    return missing_permissions

def test_bedrock_permissions():
    """Test Bedrock permissions."""
    try:
        bedrock = boto3.client('bedrock-runtime')
        # Just check if we can create the client - actual invoke would cost money
        print("✅ Bedrock client creation successful")
        return []
    except Exception as e:
        print(f"❌ Error accessing Bedrock: {str(e)}")
        return ['InvokeModel']

def get_required_managed_policies():
    """Get list of AWS managed policies that provide the required permissions."""
    return [
        {
            'name': 'AmazonBedrockFullAccess',
            'arn': 'arn:aws:iam::aws:policy/AmazonBedrockFullAccess',
            'description': 'Provides access to Amazon Bedrock services'
        },
        {
            'name': 'AmazonOpenSearchServiceFullAccess',
            'arn': 'arn:aws:iam::aws:policy/AmazonOpenSearchServiceFullAccess',
            'description': 'Provides access to Amazon OpenSearch Service'
        },
        {
            'name': 'NeptuneFullAccess',
            'arn': 'arn:aws:iam::aws:policy/NeptuneFullAccess',
            'description': 'Provides access to Amazon Neptune'
        },
        {
            'name': 'AmazonS3FullAccess',
            'arn': 'arn:aws:iam::aws:policy/AmazonS3FullAccess',
            'description': 'Provides access to Amazon S3'
        }
    ]

def get_custom_policy_json():
    """Get a custom IAM policy JSON that includes all required permissions."""
    return {
        "Version": "2012-10-17",
        "Statement": [
            {
                "Effect": "Allow",
                "Action": [
                    "bedrock:InvokeModel",
                    "bedrock:ListFoundationModels"
                ],
                "Resource": "*"
            },
            {
                "Effect": "Allow",
                "Action": "opensearch:*",
                "Resource": "*"
            },
            {
                "Effect": "Allow",
                "Action": "neptune-db:*",
                "Resource": "*"
            },
            {
                "Effect": "Allow",
                "Action": "s3:*",
                "Resource": "*"
            }
        ]
    }

def print_iam_setup_instructions(role_name=None, missing_permissions=None):
    """Print detailed IAM setup instructions."""
    print("\n📋 IAM Setup Instructions\n")
    
    if role_name:
        print(f"Role to update: {role_name}\n")
    
    if missing_permissions:
        print("Missing Permissions:")
        print(json.dumps(missing_permissions, indent=2))
        print()
    
    print("Option 1: Use AWS Managed Policies (Recommended)")
    print("----------------------------------------")
    print("Attach these AWS managed policies to your role:")
    for policy in get_required_managed_policies():
        print(f"\n• {policy['name']}")
        print(f"  ARN: {policy['arn']}")
        print(f"  Purpose: {policy['description']}")
    
    print("\nOption 2: Use Custom Policy")
    print("----------------------------------------")
    print("Create a new inline policy with this JSON:")
    print(json.dumps(get_custom_policy_json(), indent=2))
    
    print("\n📝 Steps to update the IAM role:")
    print("1. Go to AWS Console > IAM > Roles")
    if role_name:
        print(f"2. Search for and select: {role_name}")
    print("3. Choose either option:")
    print("   - Click 'Attach policies' and search for the managed policies listed above")
    print("   - Or click 'Add inline policy', choose JSON, and paste the custom policy")

def check_aws_access():
    """Verify AWS access and provide appropriate setup instructions."""
    try:
        # Try to get caller identity
        sts = boto3.client('sts')
        identity = sts.get_caller_identity()
        
        print("✅ AWS access configured successfully!")
        print(f"Account: {identity['Account']}")
        print(f"Identity ARN: {identity['Arn']}")
        
        print("\n=== Testing Required Permissions ===\n")
        
        # Test specific service permissions
        missing_s3 = test_s3_permissions()
        missing_bedrock = test_bedrock_permissions()
        
        missing_permissions = {}
        if missing_s3:
            missing_permissions['s3'] = missing_s3
        if missing_bedrock:
            missing_permissions['bedrock-runtime'] = missing_bedrock
        
        # If in SageMaker, show IAM role instructions with missing permissions
        if is_running_in_sagemaker():
            print("\n🔄 Running in SageMaker environment")
            role_name = identity['Arn'].split('/')[-1]
            print_iam_setup_instructions(role_name, missing_permissions if missing_permissions else None)
        elif missing_permissions:
            print_iam_setup_instructions(missing_permissions=missing_permissions)
        
        return True
        
    except Exception as e:
        print("❌ AWS access not configured!")
        print("Error:", str(e))
        
        if is_running_in_sagemaker():
            print("\n🔄 SageMaker environment detected but IAM role may need permissions.")
            print_iam_setup_instructions()
        else:
            print("\nTo configure AWS access, choose one of these methods:")
            print("\n1. AWS CLI (Recommended):")
            print("   Run: aws configure")
            print("   Enter your:")
            print("   - AWS Access Key ID")
            print("   - AWS Secret Access Key")
            print("   - Default region (e.g., us-west-2)")
            print("\n2. Environment Variables:")
            print("   export AWS_ACCESS_KEY_ID=your_access_key")
            print("   export AWS_SECRET_ACCESS_KEY=your_secret_key")
            print("   export AWS_DEFAULT_REGION=your_region")
            print("\n3. Credentials File:")
            print("   Create ~/.aws/credentials with:")
            print("   [default]")
            print("   aws_access_key_id = your_access_key")
            print("   aws_secret_access_key = your_secret_key")
            
            print("\nOnce credentials are configured, you'll also need to set up IAM permissions:")
            print_iam_setup_instructions()
        
        return False

check_aws_access()

## 3. Test Imports

Verify that all utilities can be imported correctly.

In [None]:
def test_imports():
    """Test importing all required modules."""
    try:
        import utils_setup
        from utils import RAGMetricsEvaluator, BenchmarkVisualizer, notebook_to_module
        print("✅ Core utilities imported successfully!")
        
        import pandas as pd
        import numpy as np
        import matplotlib.pyplot as plt
        import seaborn as sns
        print("✅ Data science packages imported successfully!")
        
        from ragas import evaluate
        from ragas.metrics import (
            response_relevancy,
            context_precision,
            context_recall,
            faithfulness
        )
        print("✅ RAGAs evaluation framework imported successfully!")
        
        return True
    except Exception as e:
        print("❌ Error importing packages:")
        print(str(e))
        return False

test_imports()

## 4. Create Directories

Set up necessary directories for results and data.

In [None]:
import os
from pathlib import Path

def setup_directories():
    """Create necessary directories if they don't exist."""
    directories = [
        'results',
        'datasets/rag_evaluation/labeled/data',
        'datasets/rag_evaluation/unlabeled/data',
        'rag_implementations/baseline_rag',
        'rag_implementations/graph_rag/graph_store'
    ]
    
    for directory in directories:
        path = Path(directory)
        if not path.exists():
            path.mkdir(parents=True)
            print(f"✅ Created directory: {directory}")
        else:
            print(f"✓ Directory exists: {directory}")

setup_directories()

## Setup Complete!

If all cells above show successful completion (✅), your environment is ready to use!

You can now proceed to:
1. Open notebooks in evaluation_pipelines/templates/
2. Create your own RAG implementations
3. Run evaluations and comparisons

All dependencies are installed in your current environment and will be available to all notebooks in this project.