## ChatBotv1 Object Storage Management (Create OrgID, ADD Model/Corpus)

In [1]:
!pip install boto3

Defaulting to user installation because normal site-packages is not writeable


In [1]:
import boto3
import os
import re
from botocore.exceptions import ClientError

In [2]:
def connect_to_s3(access_key, secret_key, endpoint_url, region=None):
    """
    Connect to an S3-compatible storage service.
    
    Args:
        access_key (str): Access key for the S3 service
        secret_key (str): Secret key for the S3 service
        endpoint_url (str): URL of the S3-compatible service
        region (str): AWS region (optional for some providers)
        
    Returns:
        boto3.client: S3 client object
    """
    try:
        # Configure session with signature version
        session = boto3.session.Session()
        s3_client = session.client(
            's3',
            aws_access_key_id=access_key,
            aws_secret_access_key=secret_key,
            endpoint_url=endpoint_url,
            region_name=region if region else None,
            # Use signature version 4 which is widely supported
            config=boto3.session.Config(
                signature_version='s3v4',
                # Some S3-compatible providers need this
                s3={'addressing_style': 'path'}
            )
        )
        return s3_client
    except Exception as e:
        print(f"Error connecting to S3: {e}")
        return None

In [3]:
def get_next_org_id(s3_client, bucket_name, base_folder="org"):
    """
    Determine the next organization ID by listing existing folders.
    
    Args:
        s3_client: S3 client
        bucket_name (str): Name of the bucket
        base_folder (str): Base folder path
        
    Returns:
        str: Next organization ID
    """
    try:
        # Ensure base_folder doesn't start with '/' for S3 compatibility
        if base_folder.startswith('/'):
            base_folder = base_folder[1:]
            
        # Add trailing '/' if not present
        if not base_folder.endswith('/'):
            base_folder += '/'
            
        # List objects with prefix
        response = s3_client.list_objects_v2(
            Bucket=bucket_name,
            Prefix=base_folder,
            Delimiter='/'
        )
        
        # Extract organization IDs from common prefixes
        org_ids = []
        if 'CommonPrefixes' in response:
            for prefix in response['CommonPrefixes']:
                # Extract folder name from path
                folder_name = prefix['Prefix'].rstrip('/').split('/')[-1]
                # Check if it's a numeric ID folder with 5 digits
                if re.match(r'^\d{5}$', folder_name):
                    org_ids.append(int(folder_name))
        
        # If no existing folders, start with 00001
        if not org_ids:
            return "00001"
        
        # Get the next ID
        next_id = max(org_ids) + 1
        return f"{next_id:05d}"  # Format as 5-digit number with leading zeros
        
    except ClientError as e:
        print(f"Error getting next organization ID: {e}")
        return None

In [4]:
def upload_org_files(s3_client, bucket_name, org_id, model_file_path, corpus_file_path, base_folder="org"):
    """
    Upload organization files to the S3 bucket.
    
    Args:
        s3_client: S3 client
        bucket_name (str): Name of the bucket
        org_id (str): Organization ID
        model_file_path (str): Path to the model file
        corpus_file_path (str): Path to the corpus file
        base_folder (str): Base folder path
        
    Returns:
        bool: Success or failure
    """
    try:
        # Ensure base_folder doesn't start with '/' for S3 compatibility
        if base_folder.startswith('/'):
            base_folder = base_folder[1:]
            
        # Add trailing '/' if not present
        if not base_folder.endswith('/'):
            base_folder += '/'
        
        # Upload model file
        model_key = f"{base_folder}{org_id}/{org_id}_model.pt"
        s3_client.upload_file(model_file_path, bucket_name, model_key)
        print(f"Uploaded model file to {model_key}")
        
        # Upload corpus file
        corpus_key = f"{base_folder}{org_id}/{org_id}_corpus.json"
        s3_client.upload_file(corpus_file_path, bucket_name, corpus_key)
        print(f"Uploaded corpus file to {corpus_key}")
        
        return True
        
    except ClientError as e:
        print(f"Error uploading files: {e}")
        return False

## Process

In [5]:
def main():
    # Configuration
    access_key = input("Enter your access key: ")
    secret_key = input("Enter your secret key: ")
    endpoint_url = input("Enter the S3 endpoint URL: ")
    region = input("Enter the region (if applicable, leave empty otherwise): ")
    bucket_name = input("Enter the bucket name: ")
    base_folder = input("Enter the base folder path (default: 'org'): ") or "org"
    
    # Get paths to the files to upload
    model_file_path = input("Enter the path to the model file: ")
    corpus_file_path = input("Enter the path to the corpus file: ")
    
    # Validate inputs
    if not all([access_key, secret_key, endpoint_url, bucket_name, model_file_path, corpus_file_path]):
        print("All inputs are required except the base folder and region.")
        return
    
    # Check if files exist
    if not os.path.exists(model_file_path):
        print(f"Model file not found: {model_file_path}")
        return
        
    if not os.path.exists(corpus_file_path):
        print(f"Corpus file not found: {corpus_file_path}")
        return
    
    # Connect to S3
    s3_client = connect_to_s3(access_key, secret_key, endpoint_url, region)
    if not s3_client:
        return
    
    # Test connection first
    try:
        print("Testing connection to bucket...")
        s3_client.head_bucket(Bucket=bucket_name)
        print(f"Connection successful to bucket '{bucket_name}'")
    except ClientError as e:
        error_code = e.response.get('Error', {}).get('Code', 'Unknown')
        if error_code == '404':
            print(f"Error: Bucket '{bucket_name}' does not exist")
        elif error_code == '403':
            print(f"Error: Access denied to bucket '{bucket_name}'")
        else:
            print(f"Error accessing bucket: {e}")
        return
    
    # Get the next organization ID
    org_id = get_next_org_id(s3_client, bucket_name, base_folder)
    if not org_id:
        return
    
    print(f"Creating organization folder with ID: {org_id}")
    
    # Upload the files
    success = upload_org_files(
        s3_client, 
        bucket_name, 
        org_id, 
        model_file_path, 
        corpus_file_path, 
        base_folder
    )
    
    if success:
        print(f"Successfully created organization {org_id} and uploaded files.")
    else:
        print("Failed to create organization and upload files.")


Enter your access key:  P3JIUOIQ3BLXOEGUY0H9
Enter your secret key:  1EVTV1111SFIIGTPGHXDJ63R390O2MRRVX6ZLTA3
Enter the S3 endpoint URL:  https://objectstore.e2enetworks.net
Enter the region (if applicable, leave empty otherwise):  
Enter the bucket name:  chatbotv1
Enter the base folder path (default: 'org'):  
Enter the path to the model file:  ./EgressBuff/00001_model.pt
Enter the path to the corpus file:  ./EgressBuff/00001_corpus.json

Testing connection to bucket...
Connection successful to bucket 'chatbotv1'
Creating organization folder with ID: 00003
Uploaded model file to org/00003/00003_model.pt
Uploaded corpus file to org/00003/00003_corpus.json
Successfully created organization 00003 and uploaded files.


In [6]:
main()

Enter your access key:  P3JIUOIQ3BLXOEGUY0H9
Enter your secret key:  1EVTV1111SFIIGTPGHXDJ63R390O2MRRVX6ZLTA3
Enter the S3 endpoint URL:  https://objectstore.e2enetworks.net
Enter the region (if applicable, leave empty otherwise):  
Enter the bucket name:  chatbotv1
Enter the base folder path (default: 'org'):  
Enter the path to the model file:  ./EgressBuff/school_m1.pt
Enter the path to the corpus file:  ./EgressBuff/SchCorpus.json


Testing connection to bucket...
Connection successful to bucket 'chatbotv1'
Creating organization folder with ID: 00004
Uploaded model file to org/00004/00004_model.pt
Uploaded corpus file to org/00004/00004_corpus.json
Successfully created organization 00004 and uploaded files.
