## Initialize Dependency and Configs

In [None]:
# Install pip requirements.txt
%pip install -r requirements.txt

Note: you may need to restart the kernel to use updated packages.


In [1]:
# Init Config and Dependency

from yaml import safe_load
from types import SimpleNamespace
import requests as re
import pandas as pd
import os
from cryptography.hazmat.primitives.asymmetric import rsa
from cryptography.hazmat.primitives import serialization
from cryptography.hazmat.backends import default_backend
import base64
import json
from tqdm import tqdm
import re as regex
from dotenv import load_dotenv

def substitute_env_variables(yaml_content):
    """Substitute environment variables in the YAML content."""
    # Regular expression to match ${VAR} or $VAR
    pattern = regex.compile(r'\${(.*?)}|\$(\w+)')
    
    def replace(match):
        # Get the environment variable name from the match
        env_var = match.group(1) or match.group(2)
        # Return the value of the environment variable, or the original text if not found
        res = os.environ.get(env_var, match.group(0))
        return res
    
    # Replace environment variable placeholders with actual values
    return pattern.sub(replace, yaml_content)

def map_to_namespace(mapping):
    """
    Convert a mapping (like a dictionary or map object) into a nested namespace.
    """
    if isinstance(mapping, dict):  # If the object is a dictionary
        return SimpleNamespace(**{key: map_to_namespace(value) for key, value in mapping.items()})
    elif isinstance(mapping, (list, tuple)):  # For lists or tuples, apply recursively
        return [map_to_namespace(item) for item in mapping]
    else:  # Base case: return as is
        return mapping

def load_config(path):
    """Load and parse a YAML config file with environment variable substitution."""
    # Load environment variables from a .env file if available
    load_dotenv()

    with open(path, 'r') as file:
        # Read the file content
        yaml_content = file.read()
        # Substitute environment variables in the content
        yaml_content = substitute_env_variables(yaml_content)
        # Now parse the YAML content after substitution
        return safe_load(yaml_content)


def generate_ssh_key_pair(key_name="id_rsa", key_size=2048):
    """
    Generate an RSA SSH key pair and save them as files.

    Args:
        key_name (str): The base name of the key files (default is 'id_rsa').
        key_size (int): The size of the RSA key in bits (default is 2048).
    """
    # Generate the private key
    private_key = rsa.generate_private_key(
        public_exponent=65537,
        key_size=key_size,
        backend=default_backend()
    )

    # Serialize and save the private key
    private_key_path = f"{key_name}"
    with open(private_key_path, "wb") as priv_file:
        priv_file.write(
            private_key.private_bytes(
                encoding=serialization.Encoding.PEM,
                format=serialization.PrivateFormat.TraditionalOpenSSL,
                encryption_algorithm=serialization.NoEncryption()
            )
        )
    print(f"Private key saved to: {private_key_path}")

    # Generate the public key
    public_key = private_key.public_key()

    # Serialize and save the public key in OpenSSH format
    public_key_path = f"{key_name}.pub"
    with open(public_key_path, "wb") as pub_file:
        pub_file.write(
            public_key.public_bytes(
                encoding=serialization.Encoding.OpenSSH,
                format=serialization.PublicFormat.OpenSSH
            )
        )

    fix_private_key_permissions(public_key_path)
    print(f"Public key saved to: {public_key_path}")
    return public_key_path

def build_write_file_cloud(file, permissions = "0755", encoding = "b64", path="/home/ubuntu", ):
    target_path = os.path.join(path, os.path.basename(file))
    
    # Read the content of the bash file
    with open(file, "r") as file:
        content = file.read()
        
    encoded_content = base64.b64encode(content.encode('utf-8')).decode('utf-8')
    # Add to write_files
    return {
        "path": target_path,
        "content": encoded_content,
        "permissions": permissions,  # Ensuring the script is executable
        "encoding": encoding
    }

def cloud_init_writer(path="/home/ubuntu", files=[], bash_files=[], environments={}):
    write_files = []
    runcmd = []
    
    # Write files section
    for file in files:
        write_files.append(build_write_file_cloud(file, path=path))
    
    # Environment variables section
    for key, value in environments.items():
        runcmd.append(f"export {key}='{value}'")
    
    # Bash files section
    for bash_file in bash_files:
        # Get the filename from the full file path
        filename = os.path.basename(bash_file)
        target_path = os.path.join(path, filename)
        log_file = f"/var/log/{filename}.log"  # Log file to store output
        
        # Add to runcmd
        runcmd.extend([
            f"echo 'Running script: {filename}' >> /var/log/cloud-init.log",
            f"chown ubuntu:ubuntu {target_path}",
            f"chmod +x {target_path}",
            # Run the script and capture output (both stdout and stderr) to log file
            f"{target_path} >> {log_file} 2>&1",  # Redirect both stdout and stderr
            f"echo 'Script {filename} execution complete' >> {log_file}"
        ])
    
    return {
        "write_files": write_files,
        "runcmd": runcmd
    }

def cloud_init_generator(files, runcmd, path="/home/ubuntu", environments = {}):
    if not isinstance(files, list):
        folder_path = files
        files = [os.path.join(files, f) for f in os.listdir(folder_path) if os.path.isfile(os.path.join(folder_path, f))]

    return json.dumps(cloud_init_writer(path, files, runcmd, environments=environments))

config = map_to_namespace(load_config("config.yaml"))
print("Config Loaded:", config)

idch_header = {
    "apikey": config.idch.token
}

def idch_get(path):
    url = os.path.join(config.idch.host, path.format(location=config.cluster.location))
    print('GET ' + url)
    return pd.DataFrame(re.get(url, headers=idch_header).json())

def idch_post(path, data):
    url = os.path.join(config.idch.host, path.format(location=config.cluster.location))
    print('POST ' + url)
    res = re.post(url, headers=idch_header, data=data).json()
    return pd.DataFrame(res)
    
def idch_delete(path, data=None):
    url = os.path.join(config.idch.host, path.format(location=config.cluster.location))
    print('DELETE ' + url)
    print(re.delete(url, headers=idch_header, data=data).json())

def idch_delete_instance(uuid, ip_address):
    if (uuid):
        print("Deleting Container:", uuid)
        idch_delete("v1/{location}/user-resource/vm", data={
            "uuid": uuid,
        })
    
    if (ip_address):
        print("Deleting IP:", ip_address)
        idch_delete("v1/{location}/network/ip_addresses/" + ip_address)


def idch_get_instances():
    # Get the data from the API or external service
    vm_list = idch_get("v1/{location}/user-resource/vm/list")
    ip_addresses = idch_get("v1/{location}/network/ip_addresses")

    if (len(ip_addresses) == 0):
        print(vm_list)
        return vm_list
    
    ip_addresses = ip_addresses.rename(columns={'address': 'public_ipv4', "uuid": "network_uuid"})
    
    if (len(vm_list) == 0):
        return ip_addresses
    

    # Step 1: Perform the join on uuid and assigned_to
    merged_df = pd.merge(vm_list, ip_addresses, left_on='uuid', right_on='assigned_to', how='inner')

    # Step 2: Filter rows where the 'name' column starts with config.cluster.name
    filtered_df = merged_df[merged_df['name'].str.startswith(config.cluster.name)]

    # Step 3: Create a new DataFrame to ensure we aren't working on a slice of the original DataFrame
    result = filtered_df[['uuid','network_uuid','name', 'private_ipv4', 'status', 'public_ipv4']].copy()

    # Step 4: Add the 'command' column using .loc to avoid SettingWithCopyWarning
    result.loc[:, 'command'] = result.apply(
        lambda row: f"ssh -i {config.cluster.keypair.private} {config.cluster.username}@{row['public_ipv4']}",
        axis=1
    )

    return result

def convert_to_dict(key_value_list):
    result = {}
    for item in key_value_list:
        key, value = item.split("=", 1)  # Split at the first '=' character
        result[key] = value
    return result

def idch_build_node(node_config, resource_config, role="master", environments = {}):
    with open(config.cluster.keypair.public, "r") as file:
        public_key = file.read()
        
    if (node_config.cloud_init.environments):
        environments.update(convert_to_dict(node_config.cloud_init.environments))

    environments["CLOUD_INIT_WORKDIR"] = node_config.cloud_init.path
    environments["NODE_ROLE"] = role
    environments["NODE_USER"] = config.cluster.username

    data = {
        "name": config.cluster.name + "_master",
        "os_name": node_config.os_name,
        "os_version": node_config.os_version,
        "disks": int(resource_config.storage),
        "vcpu": int(resource_config.cpu),
        "ram": int(resource_config.memory) * (2 ** 10),
        "username": config.cluster.username,
        "password": config.cluster.password,
        "public_key": public_key,
        "cloud_init": cloud_init_generator(node_config.cloud_init.files, node_config.cloud_init.runcmd, node_config.cloud_init.path, environments=environments)
    }

    print(data)

    return idch_post("v1/{location}/user-resource/vm", data)

# Assuming you have your instances DataFrame ready
def idch_healthcheck_instance():
    instances = idch_get_instances()  # Assume this returns a DataFrame with instance details
    health_set = {}
    count = 0

    # Initialize tqdm progress bar for the loop
    with tqdm(total=len(instances), desc="Checking health", unit="instance") as pbar:
        while len(health_set.keys()) != len(instances):
            count += 1
            for i, row in instances.iterrows():
                try:
                    re.get("http://" + row.get('public_ipv4') + ":8000/health", timeout=1)
                    health_set[row.get('uuid')] = True
                    pbar.update(1)
                except Exception as _:
                    # Log to tqdm (not interrupting the progress bar)
                    pbar.set_postfix({"Error": f"Failed {row.get('uuid')}", "Iter": count}, refresh=True)
            

def fix_private_key_permissions(private_key_path):
    """
    This function ensures that the private key file has the correct permissions (600).
    This is typically required for SSH private keys to ensure that only the owner can read/write it.
    """
    # Check if the file exists
    if not os.path.exists(private_key_path):
        raise FileNotFoundError(f"The specified private key file does not exist: {private_key_path}")
    
    # Check the current file permissions
    current_permissions = oct(os.stat(private_key_path).st_mode)[-3:]
    
    # Set the correct permissions (600)
    if current_permissions != '600':
        print(f"Fixing permissions for {private_key_path}. Current permissions: {current_permissions}")
        os.chmod(private_key_path, 0o600)  # Set permission to 600 (read/write for owner only)
        print(f"Permissions fixed to 600 for {private_key_path}")
    else:
        print(f"Permissions for {private_key_path} are already correctly set to 600.")

private_key_path = config.cluster.keypair.private

if private_key_path:
    try:
        # Attempt to fix the permissions if the key exists
        fix_private_key_permissions(private_key_path)
    except FileNotFoundError as e:
        print("Private key pair not found, generating one...")
        
        # Extract the directory and file name from the private key path
        key_dir = os.path.dirname(private_key_path)
        key_name = os.path.basename(private_key_path)
        
        # Create the directory if it doesn't exist
        os.makedirs(key_dir, exist_ok=True)
        
        # Generate SSH key pair
        generate_ssh_key_pair(private_key_path)


Config Loaded: namespace(idch=namespace(host='https://api.idcloudhost.com/', token='VzCQ8K67gjjgaYA01XHppnDsBQUMw8a2', access_name='konfersi-mpi'), cluster=namespace(name='konfersi_mpi', location='sgp01', network_uuid='0af9b107-d1d4-4c3d-84fc-71299e5c5c32', username='konfersiadmin', password='voNrec-wokkac-3sezky', keypair=namespace(public='keys/id_rsa.pub', private='keys/id_rsa')), master=namespace(os_name='ubuntu', os_version='20.04-lts', cloud_init=namespace(path='/home/ubuntu', files='assets', runcmd=['0-init-server.sh'], environments=['TEST_ENV_MASTER=this env was came from master config']), init_resources=namespace(cpu=2, memory=2, storage=150), resources=namespace(cpu=16, memory=8, storage=200)), worker=namespace(nodes=5, os_name='ubuntu', os_version='20.04-lts', cloud_init=namespace(path='/home/ubuntu', files='assets', runcmd=['0-init-server.sh', 'TEST_ENV_WORKER=this env was came from worker config'], environments=None), resources=namespace(cpu=16, memory=8, storage=20)))
Perm

In [2]:
# Get List of Locations
idch_get("v1/config/locations")

GET https://api.idcloudhost.com/v1/config/locations


Unnamed: 0,display_name,is_preferred,is_default,is_published,description,country_code,order_nr,create_resource_disabled,slug
0,SouthJKT-a,True,True,True,SouthJKT-a (jkt01),idn,1,False,jkt01
1,NorthJKT-a,False,False,True,NorthJKT-a (jkt02),idn,99,True,jkt02
2,WestJKT-a,False,False,True,WestJKT-a (jkt03),idn,0,False,jkt03
3,Singapore,False,False,True,Singapore (sgp01),sgp,3,False,sgp01


In [4]:
# Get Available Network ID
idch_get("v1/{location}/network/networks")

GET https://api.idcloudhost.com/v1/sgp01/network/networks


Unnamed: 0,vlan_id,subnet,name,created_at,updated_at,uuid,type,is_default,vm_uuids,resources_count,subnet_ipv6
0,14534,10.57.254.0/24,My Network,2024-02-16 13:22:21,2024-02-16 13:22:21,114f613b-d3ca-4e47-b8da-e19de0872961,private,True,[],0,
1,16280,10.64.215.0/24,swarm-network-sg,2024-11-13 10:00:08,2024-11-13 10:00:08,0af9b107-d1d4-4c3d-84fc-71299e5c5c32,private,False,[],0,


In [5]:
# Get Available OS Catalogue
plain_oses = idch_get("v1/config/vm_images/plain_os")[['os_name', 'versions']].explode('versions')
plain_oses['os_version'] = plain_oses['versions'].apply(lambda x: x['os_version'])
del plain_oses['versions']
plain_oses

GET https://api.idcloudhost.com/v1/config/vm_images/plain_os


Unnamed: 0,os_name,os_version
0,almalinux,9.x
0,almalinux,8.x
1,bsd,freebsd_12.2
2,centos,9.x
3,cloudlinux,8.4
3,cloudlinux,7.9
4,debian,11
4,debian,12
5,fedora,32
5,fedora,34


## Initialize Master Node

In [2]:
# Create Master Node
idch_build_node(config.master, config.master.init_resources)

{'name': 'konfersi_mpi_master', 'os_name': 'ubuntu', 'os_version': '20.04-lts', 'disks': 150, 'vcpu': 2, 'ram': 2048, 'username': 'konfersiadmin', 'password': 'voNrec-wokkac-3sezky', 'public_key': 'ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDYaO5kuqFmPkOo+w5Q3KBZdC06CoqAQEnEnCsgqhDR6Nrzc+Df+YEm0gCjNJLis5pJ1m2U798OR88CgjEwlann2AqYN+IMl6dnvOFdRBnGpVZAbvMpN4hTIXK7w5/CCiDIElEffxEoo82CEEkceAXd27t4L/3X9IsBy/1SIMDkpJBbDMrbvnhohNQD55QErGIByOBPL0FuomTvlfBkXGFi7JTuCF88hvOEvNG92MWN1lyd0L55oHbdKetbR8uvXL21D9YEgcu1RT7ouc/DF5RPwSRbIEqQSf5EVg+mYtiUxfYoULv4MlB3He2Su8dcimz3Nhuk62A46C0WF+0Np42v', 'cloud_init': '{"write_files": [{"path": "/home/ubuntu/8-mpich-agent-stack.yaml", "content": "dmVyc2lvbjogJzMnCgpzZXJ2aWNlczoKICBtcGlfbWFzdGVyOgogICAgaW1hZ2U6IGFsZmlhbmlzbmFuMjYva29uZmVyc2ktbXBpOmxhdGVzdAogICAgZGVwbG95OgogICAgICByZXBsaWNhczogMQogICAgICByZXNvdXJjZXM6CiAgICAgICAgbGltaXRzOgogICAgICAgICAgICBjcHVzOiAnMicKICAgICAgICAgICAgbWVtb3J5OiAyNTAwTQogICAgICBwbGFjZW1lbnQ6CiAgICAgICAgY29uc3RyYWludHM6CiAgICAgICAgICAtI

Unnamed: 0,backup,billing_account,created_at,description,hostname,mac,memory,name,os_name,os_version,private_ipv4,status,storage,updated_at,user_id,username,uuid,vcpu
0,False,1200219732,2024-12-07 02:44:37,,konfersimpimaster,52:54:00:f1:1d:f5,2048,konfersi_mpi_master,ubuntu,20.04-lts,10.57.254.228,running,"{'created_at': '2024-12-07 02:44:40', 'name': ...",2024-12-07 02:46:23,25542,konfersiadmin,ec5a8279-7606-4739-84bc-2a5aa27846dc,2


In [40]:
# Get SSH Command
idch_get_instances()

GET https://api.idcloudhost.com/v1/sgp01/user-resource/vm/list
GET https://api.idcloudhost.com/v1/sgp01/network/ip_addresses


Unnamed: 0,uuid,network_uuid,name,private_ipv4,status,public_ipv4,command
0,547aa71b-dd01-46fa-b174-aba37e0c794f,c26b9e6a-e895-402e-bcb8-1b41416d8d4e,konfersi_mpi_master,10.57.254.14,running,103.187.147.40,ssh -i keys/id_rsa konfersiadmin@103.187.147.40


In [41]:
# Healthcheck
idch_healthcheck_instance()

GET https://api.idcloudhost.com/v1/sgp01/user-resource/vm/list
GET https://api.idcloudhost.com/v1/sgp01/network/ip_addresses


Checking health:   0%|          | 0/1 [00:03<?, ?instance/s, Error=Failed 547aa71b-dd01-46fa-b174-aba37e0c794f, Iter=3]


KeyboardInterrupt: 

## Cluster Management

In [None]:
idch_build_node(config.worker, config.worker.resource, environments={
    "MASTER_NODE_IP": "192.168.0.1"
})

{'name': 'konfersi_mpi_worker0', 'os_name': 'ubuntu', 'os_version': '20.04-lts', 'disks': 20, 'vcpu': 16, 'ram': 8192, 'username': 'konfersi', 'password': 'voNrec-wokkac-3sezky', 'public_key': 'ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQCYcgJtzDqd3AnQvNlYwWVZ8upcQ0ZaLZ4upmEz9tneKU4ppGMu1WK7zCYoVUvZTMxknNG1xe6vKkf6owamEo4LgxKhIlgp7qQWvmWsDQyS1aRl5D9lqZGZa1HhcOLSrhx8avcm+Jcq23FqYhuW97Z827IpS+X2Yxk7Wg0r9y3E6Jub06ZxefM1vIVYyFSqmVAOBE0xIJ22oVcqKIdBP5P0mAnqMJnCuWayFk/ZYB9Wt1+rvz5hiLKL+19PP31Z4o0z/mwIA8p0lG0JNooHca2ULFWWD8Dp/9hFv8uQvIxqAxyy8rcfxbJwe6hsF9ZiKldwL1CjYEBXP2c3xccgG5Qp', 'cloud_init': '{"write_files": [{"path": "/home/ubuntu/3-portainer-agent-stack.yml", "content": "dmVyc2lvbjogJzMuMicKCnNlcnZpY2VzOgogIGFnZW50OgogICAgaW1hZ2U6IHBvcnRhaW5lci9hZ2VudDoyLjIxLjQKICAgIHZvbHVtZXM6CiAgICAgIC0gL3Zhci9ydW4vZG9ja2VyLnNvY2s6L3Zhci9ydW4vZG9ja2VyLnNvY2sKICAgICAgLSAvdmFyL2xpYi9kb2NrZXIvdm9sdW1lczovdmFyL2xpYi9kb2NrZXIvdm9sdW1lcwogICAgbmV0d29ya3M6CiAgICAgIC0gYWdlbnRfbmV0d29yawogICAgZGVwbG95OgogICAgICBtb2

KeyboardInterrupt: 

In [13]:
# Health Checker and Links
idch_healthcheck_instance()

GET https://api.idcloudhost.com/v1/sgp01/user-resource/vm/list
GET https://api.idcloudhost.com/v1/sgp01/network/ip_addresses


Checking health: 100%|██████████| 1/1 [00:01<00:00,  1.00s/instance]


In [None]:
# Stop Cluster

In [27]:
# Delete Cluster
for index, row in idch_get_instances().iterrows():
    idch_delete_instance(row.get('uuid'), row.get('public_ipv4'))

GET https://api.idcloudhost.com/v1/sgp01/user-resource/vm/list
GET https://api.idcloudhost.com/v1/sgp01/network/ip_addresses
Deleting Container: d978456e-c822-4eda-aad1-8e6a540293f8
DELETE https://api.idcloudhost.com/v1/sgp01/user-resource/vm
{'success': True}
Deleting IP: 103.187.147.40
DELETE https://api.idcloudhost.com/v1/sgp01/network/ip_addresses/103.187.147.40
True
