In [6]:
!pip install -r requirements.txt



In [2]:
%load_ext autoreload
%autoreload 2

import sys, os
module_path = "."
sys.path.append(os.path.abspath(module_path))

In [None]:
import boto3
import uuid
import botocore
import time
from dotenv import load_dotenv, set_key


DEV = True # True일 경우 1-AZ without standby로 생성, False일 경우 3-AZ with standby. 워크샵 목적일 때는 지나친 과금/리소스 방지를 위해 True로 설정하는 것을 권장
VERSION = "2.11" # OpenSearch Version (예: 2.7 / 2.9 / 2.11)

load_dotenv()

opensearch_user_id = os.environ.get("OPENSEARCH_USER") if os.environ.get("OPENSEARCH_USER") is not None else 'raguser' ## 다른 이름으로 수정해도 됩니다
opensearch_user_password = os.environ.get("OPENSEARCH_PASSWORD") if os.environ.get("OPENSEARCH_PASSWORD") is not None else 'MarsEarth1!' ## 다른 값으로 수정해도 됩니다

region = boto3.Session().region_name
account_id = boto3.client("sts").get_caller_identity()["Account"]
opensearch = boto3.client('opensearch', region)
rand_str = uuid.uuid4().hex[:8]

domain_name = f'rag-hol-kr-fsi-regulation'

cluster_config_prod = {
    'InstanceCount': 3,
    'InstanceType': 'r6g.large.search',
    'ZoneAwarenessEnabled': True,
    'DedicatedMasterEnabled': True,
    'MultiAZWithStandbyEnabled': True,
    'DedicatedMasterType': 'r6g.large.search',
    'DedicatedMasterCount': 3
}

cluster_config_dev = {
    'InstanceCount': 1,
    'InstanceType': 'r6g.large.search',
    'ZoneAwarenessEnabled': False,
    'DedicatedMasterEnabled': False,
}


ebs_options = {
    'EBSEnabled': True,
    'VolumeType': 'gp3',
    'VolumeSize': 100,
}

advanced_security_options = {
    'Enabled': True,
    'InternalUserDatabaseEnabled': True,
    'MasterUserOptions': {
        'MasterUserName': opensearch_user_id,
        'MasterUserPassword': opensearch_user_password
    }
}

ap = f'{{\"Version\":\"2012-10-17\",\"Statement\":[{{\"Effect\":\"Allow\",\"Principal\":{{\"AWS\":\"*\"}},\"Action\":\"es:*\",\"Resource\":\"arn:aws:es:{region}:{account_id}:domain\/{domain_name}\/*\"}}]}}'

if DEV:
    cluster_config = cluster_config_dev
else:
    cluster_config = cluster_config_prod


try:
    # 도메인 상태 확인
    response = opensearch.describe_domain(DomainName=domain_name)
    print(f"Domain '{domain_name}' already exists.")
except opensearch.exceptions.ResourceNotFoundException:
    # 도메인이 존재하지 않는 경우 새로운 도메인 생성
    print(f"Domain '{domain_name}' does not exist. Creating a new domain...")

response = opensearch.create_domain(
    DomainName=domain_name,
    EngineVersion=f'OpenSearch_{VERSION}',
    ClusterConfig=cluster_config,
    AccessPolicies=ap,
    EBSOptions=ebs_options,
    AdvancedSecurityOptions=advanced_security_options,
    NodeToNodeEncryptionOptions={'Enabled': True},
    EncryptionAtRestOptions={'Enabled': True},
    DomainEndpointOptions={'EnforceHTTPS': True}
)

In [6]:
%%time
def wait_for_domain_creation(domain_name):
    try:
        response = opensearch.describe_domain(
            DomainName=domain_name
        )
        # Every 60 seconds, check whether the domain is processing.
        while 'Endpoint' not in response['DomainStatus']:
            print('Creating domain...')
            time.sleep(60)
            response = opensearch.describe_domain(
                DomainName=domain_name)

        # Once we exit the loop, the domain is ready for ingestion.
        endpoint = response['DomainStatus']['Endpoint']
        print('Domain endpoint ready to receive data: ' + endpoint)
    except botocore.exceptions.ClientError as error:
        if error.response['Error']['Code'] == 'ResourceNotFoundException':
            print('Domain not found.')
        else:
            raise error

wait_for_domain_creation(domain_name)

Creating domain...


KeyboardInterrupt: 

In [7]:
response = opensearch.describe_domain(DomainName=domain_name)
opensearch_domain_endpoint = f"https://{response['DomainStatus']['Endpoint']}"

print(opensearch_domain_endpoint) 

# set to .env
set_key(".env", "OPENSEARCH_HOST", opensearch_domain_endpoint)
set_key(".env", "OPENSEARCH_USER", opensearch_user_id)
set_key(".env", "OPENSEARCH_PASSWORD", opensearch_user_password)
set_key(".env", "OPENSEARCH_REGION", region)
set_key(".env", "OPENSEARCH_INDEX", "kr-fsi-regulation")


KeyError: 'Endpoint'

In [16]:
nori_pkg_id = {}
nori_pkg_id['us-east-1'] = {
    '2.3': 'G196105221',
    '2.5': 'G240285063',
    '2.7': 'G16029449', 
    '2.9': 'G60209291',
    '2.11': 'G181660338'
}

nori_pkg_id['us-west-2'] = {
    '2.3': 'G94047474',
    '2.5': 'G138227316',
    '2.7': 'G182407158', 
    '2.9': 'G226587000',
    '2.11': 'G79602591'
}

pkg_response = opensearch.associate_package(
    PackageID=nori_pkg_id[region][VERSION], # nori plugin
    DomainName=domain_name
)

In [17]:
%%time
def wait_for_associate_package(domain_name, max_results=1):

    response = opensearch.list_packages_for_domain(
        DomainName=domain_name,
        MaxResults=1
    )
    # Every 60 seconds, check whether the domain is processing.
    while response['DomainPackageDetailsList'][0]['DomainPackageStatus'] == "ASSOCIATING":
        print('Associating packages...')
        time.sleep(60)
        response = opensearch.list_packages_for_domain(
            DomainName=domain_name,
            MaxResults=1
        )

    #endpoint = response['DomainStatus']['Endpoint']
    print('Associated!')

wait_for_associate_package(domain_name)

Associating packages...
Associating packages...
Associating packages...
Associating packages...
Associating packages...
Associating packages...
Associating packages...
Associating packages...
Associating packages...
Associating packages...
Associating packages...
Associating packages...
Associating packages...
Associating packages...
Associating packages...
Associating packages...
Associating packages...
Associating packages...
Associating packages...
Associating packages...
Associating packages...
Associating packages...
Associating packages...
Associating packages...
Associating packages...
Associating packages...
Associating packages...
Associating packages...
Associating packages...
Associating packages...
Associated!
CPU times: user 523 ms, sys: 31.2 ms, total: 554 ms
Wall time: 30min 3s


In [18]:
! pip list | grep langchain
! pip list | grep opensearch

langchain                     0.3.1
langchain-core                0.3.7
langchain-text-splitters      0.3.0
opensearch-dsl                2.1.0
opensearch-py                 2.7.1


In [5]:
from opensearchpy import OpenSearch, RequestsHttpConnection
http_auth = (opensearch_user_id, opensearch_user_password)
os_client = OpenSearch(
                hosts=[
                    {'host': opensearch_domain_endpoint.replace("https://", ""),
                     'port': 443
                    }
                ],
                http_auth=http_auth, # Master username, Master password,
                use_ssl=True,
                verify_certs=True,
                connection_class=RequestsHttpConnection
            )

res_str = os_client.cat.plugins()

if 'opensearch-analysis-nori' in res_str:
    print('opensearch-nori plugin이 사용가능합니다.')
else:
    print('opensearch-nori plugin 연결이 진행되지 않았습니다.')

NameError: name 'opensearch_domain_endpoint' is not defined

In [1]:
%store opensearch_user_id opensearch_user_password domain_name opensearch_domain_endpoint

UsageError: Unknown variable 'opensearch_user_id'
