# Create an empty Ubuntu VM on Azure

Install HAGrid

```
pip install hagrid
```

__NOTE__: Just in case there are some issues with a VM we should provision are few extra, here we use node_count 12 for a session of 10 users

Run hagrid launch with these arguments:

```
hagrid launch to azure --image_name=domain_0.7.0 --jupyter --ansible_extras="install=false,aa_demo=true" --node_count 12
```

- Use a new unique resource group for this session like: aa-test-1
- Choose the location where your demo participants will be located, e.g. `eastus`, `westus` etc
- Choose an 8 core machine like `Standard_D8s_v3`
- Set the username to `azureuser`
- Choose password and then `n` to auto-generate password
- Set an easy to remember 12 character password like: `Adastrademo2022`
- What ever you enter into Repo and Branch will be ignored

![ip_address](img/hagrid_bare_vm.png)

After it is finished you should see this message

![ip_address](img/hagrid_bare_vm_output.png)

Now run this to get JSON containing the information for all the VMs:

```
cat ~/.hagrid/host_ips.json
```

In [10]:
import os
import json

# paste the path to host ip json here
HOST_IP_PATH = "~/.hagrid/host_ips.json"
HOST_IP_PATH = os.path.expanduser(HOST_IP_PATH)

with open(HOST_IP_PATH) as fp:
    host_ips = json.loads(fp.read())

In [11]:
host_ips

{'host_ips': [{'username': 'azureuser',
   'password': 'Adastrademo2022',
   'ip_address': '20.245.2.237',
   'jupyter_token': 'rce4r1712ejdzz2tfkak1unfp69pi0dy9jdlfxrnkqykmtos'},
  {'username': 'azureuser',
   'password': 'Adastrademo2022',
   'ip_address': '20.245.3.8',
   'jupyter_token': 'scs9u4o58e5pp8jelzi7fo2txdmzim25kp3hn9a24p0oemc2'},
  {'username': 'azureuser',
   'password': 'Adastrademo2022',
   'ip_address': '20.245.2.223',
   'jupyter_token': '66ce29qudc52226d90gwxz8mw1hsag73wha2cn40n89slq64'},
  {'username': 'azureuser',
   'password': 'Adastrademo2022',
   'ip_address': '20.245.2.236',
   'jupyter_token': 'akdufkewvcwa0jtujrmxj3mhujy12uww5oqtxd4y9i6xhnsh'},
  {'username': 'azureuser',
   'password': 'Adastrademo2022',
   'ip_address': '20.245.3.9',
   'jupyter_token': 'rfutvu3owuveszu8t4wyiuyvk4eerya1t9ca21zdfdza8lo5'},
  {'username': 'azureuser',
   'password': 'Adastrademo2022',
   'ip_address': '20.245.2.221',
   'jupyter_token': 's82w54ldz41o2392656hcg3mbynwn2nbzs9j

In [12]:
# update TOTAL_PARTICIPANTS
# use the total participants not the total machines, e.g. 10 not 12
# as this is used to calculate the data split assignment
TOTAL_PARTICIPANTS = 10

In [13]:
# optionally add names or emails here which will be printed below to help keep track of assignment
participants = [
    "Teo",
    "Ruchi",
    "Kyoko",
    "Ivy",
    "Shubham",
    "Irina",
    "Laura",
    "Ionesio",
    "Ronnie",
    "Rasswanth",
]

assert len(participants) <= TOTAL_PARTICIPANTS, "TOTAL_PARTICIPANTS should be less than or equal to the length of participant list"
print("Total participants:", len(participants))

Total participants: 10


If you need to re-partition the MedNIST dataset and create new data subsets, switch to [prepare MedNIST dataset notebook](02-prepare-datasets-MedNIST.ipynb).

In [14]:
import requests


DATASET_INFO_FILEPATH = "https://raw.githubusercontent.com/OpenMined/datasets/main/TissueMNIST/dataset.json"

def get_dataset_urls():
    
    data_subset_urls = []
    
    response = requests.get(DATASET_INFO_FILEPATH)
    data_subset_info = response.json()
    DATASET_REPO_URL = "https://raw.githubusercontent.com/OpenMined/datasets/main/TissueMNIST/subsets/"
    
    for dataset_name in data_subset_info.values():
        url = DATASET_REPO_URL + dataset_name
        data_subset_urls.append(url)
        
    return data_subset_urls

In [15]:
def check_ip_port(host_ip: str, port: int) -> bool:
    import socket
    try:
        sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        sock.settimeout(2)
        result = sock.connect_ex((host_ip, port))
        sock.close()
        if result == 0:
            return True
    except Exception:
        pass
    return False

In [16]:
def get_icon(status: bool) -> str:
    return "✅" if status else "❌"

In [7]:
def check_hosts_ready(host_ips: dict) -> None:
    for host in host_ips["host_ips"]:
        print("-----------------------")
        host_ip = host["ip_address"]
        # make sure the containers are not running
        http_up = check_ip_port(host_ip=host_ip, port=80)
        print(f"{get_icon(not http_up)} Containers Off {host_ip}:80")
        
        # make sure jupyter notebooks is up
        jupyter_up = check_ip_port(host_ip=host_ip, port=8888)
        print(f"{get_icon(jupyter_up)} Jupyter Up {host_ip}:8888")
        
        # make sure SSH is up
        ssh_up = check_ip_port(host_ip=host_ip, port=22)
        print(f"{get_icon(ssh_up)} SSH Up {host_ip}:22")

        print()
        all_status = (not http_up) and jupyter_up and ssh_up
        print(f"{get_icon(all_status)} Node {host_ip} Ready!")
        print("-----------------------")
        print()

In [31]:
check_hosts_ready(host_ips)

-----------------------
✅ Containers Off 20.232.50.67:80
❌ Jupyter Up 20.232.50.67:8888


KeyboardInterrupt: 

In [17]:
def output_user_details(host_ips: dict, participants: list[str] = []) -> None:
    notebook_path = "adastra/data-owners/data-owners-presentation.ipynb"
    print("===============================")
    print("Ad Astra Demo 1")
    print("===============================")
    print()
    print("Send to each participant")
    print()
    if TOTAL_PARTICIPANTS > len(host_ips["host_ips"]):
        raise Exception(
            f"TOTAL_PARTICIPANTS: {TOTAL_PARTICIPANTS} is less than VM count: {len(host_ips['host_ips'])}"
        )
    partition = 0
    dataset_urls = get_dataset_urls()
    num_of_urls = len(dataset_urls)
    for host in host_ips["host_ips"]:
        partition += 1
        if partition <= len(participants):
            print(f"Hi {participants[partition - 1]},")
        if partition <= TOTAL_PARTICIPANTS:
            print("These are your Session Details:")
        else:
            print("Spare Session Details:")
        print("-------------------------------")
        print(f"VM Username: {host['username']}")
        print(f"VM Password: {host['password']}")
        print(f"VM IP Address: {host['ip_address']}")
        print(f"📎 MY_DATASET_URL:\n{dataset_urls[partition%num_of_urls]}")

        print()
        print(f"👉🏽 Start Here:")
        print(
            f"http://{host['ip_address']}:8888/lab/tree/notebooks/{notebook_path}"
            f"?token={host['jupyter_token']}"
        )
        print()

In [18]:
output_user_details(host_ips, participants)

Ad Astra Demo 1

Send to each participant

Hi Teo,
These are your Session Details:
-------------------------------
VM Username: azureuser
VM Password: Adastrademo2022
VM IP Address: 20.245.2.237
📎 MY_DATASET_URL:
https://raw.githubusercontent.com/OpenMined/datasets/main/TissueMNIST/subsets/TissueMNIST-1ffc51892957453e908919645da3ea90.pkl

👉🏽 Start Here:
http://20.245.2.237:8888/lab/tree/notebooks/adastra/data-owners/data-owners-presentation.ipynb?token=rce4r1712ejdzz2tfkak1unfp69pi0dy9jdlfxrnkqykmtos

Hi Ruchi,
These are your Session Details:
-------------------------------
VM Username: azureuser
VM Password: Adastrademo2022
VM IP Address: 20.245.3.8
📎 MY_DATASET_URL:
https://raw.githubusercontent.com/OpenMined/datasets/main/TissueMNIST/subsets/TissueMNIST-551b7b7265d84bfd8dea36476a7e77f6.pkl

👉🏽 Start Here:
http://20.245.3.8:8888/lab/tree/notebooks/adastra/data-owners/data-owners-presentation.ipynb?token=scs9u4o58e5pp8jelzi7fo2txdmzim25kp3hn9a24p0oemc2

Hi Kyoko,
These are your Sessio