# Create an empty Ubuntu VM on Azure

Install HAGrid

```
pip install hagrid
```

__NOTE__: Just in case there are some issues with a VM we should provision are few extra, here we use node_count 12 for a session of 10 users

Run hagrid launch with these arguments:

```
hagrid launch to azure --image_name=domain_0.7.0 --jupyter --ansible_extras="install=false,aa_demo=true" --node_count 12
```

- Use a new unique resource group for this session like: aa-test-1
- Choose the location where your demo participants will be located, e.g. `eastus`, `westus` etc
- Choose an 8 core machine like `Standard_D8s_v3`
- Set the username to `azureuser`
- Choose password and then `n` to auto-generate password
- Set an easy to remember 12 character password like: `Adastrademo2022`
- What ever you enter into Repo and Branch will be ignored

![ip_address](img/hagrid_bare_vm.png)

After it is finished you should see this message

![ip_address](img/hagrid_bare_vm_output.png)

Now run this to get JSON containing the information for all the VMs:

```
cat ~/.hagrid/host_ips.json
```

In [1]:
import os
import json

# paste the path to host ip json here
HOST_IP_PATH = "~/.hagrid/host_ips.json"
HOST_IP_PATH = os.path.expanduser(HOST_IP_PATH)

with open(HOST_IP_PATH) as fp:
    host_ips = json.loads(fp.read())

In [2]:
host_ips

{'host_ips': [{'username': 'azureuser',
   'password': 'Adastrademo2022',
   'ip_address': '52.188.79.206',
   'jupyter_token': 'nhvntkwi4l3zam5o1xv45c2h9alf6r39vmmvg9irxph9hkzl'},
  {'username': 'azureuser',
   'password': 'Adastrademo2022',
   'ip_address': '52.188.79.139',
   'jupyter_token': '08shavto27rswkurfges38v7er0barwal0ioq4i3519l8ant'},
  {'username': 'azureuser',
   'password': 'Adastrademo2022',
   'ip_address': '52.188.79.75',
   'jupyter_token': 'zolffc9rtfvl1djxmt7d27l3f79flfo2xll3ykduy8f1kqcu'},
  {'username': 'azureuser',
   'password': 'Adastrademo2022',
   'ip_address': '20.185.73.219',
   'jupyter_token': '838kdzh4jdqf1hk3eayynmexuid5r0zbnzds5m0jci13usrw'},
  {'username': 'azureuser',
   'password': 'Adastrademo2022',
   'ip_address': '20.185.73.214',
   'jupyter_token': 'n56nx4snq0o7hhq91r0gp7xdonzmjy4aiarvcenmzjsw8n3l'},
  {'username': 'azureuser',
   'password': 'Adastrademo2022',
   'ip_address': '20.185.74.201',
   'jupyter_token': '63bdws7s6kf7191wevdrgtka1t8

In [3]:
# update TOTAL_PARTICIPANTS
# use the total participants not the total machines, e.g. 10 not 12
# as this is used to calculate the data split assignment
TOTAL_PARTICIPANTS = 10

In [4]:
# add names and emails so we can send out the links with details via email

In [36]:
participants = {
    "Madhava Jay": "madhava@openmined.org",
    # "Shubham Gupta": "shubham@openmined.org",
}

In [6]:
# optionally add names or emails here which will be printed below to help keep track of assignment

assert len(participants) <= TOTAL_PARTICIPANTS, "TOTAL_PARTICIPANTS should be less than or equal to the length of participant list"
print("Total participants:", len(participants))

Total participants: 2


If you need to re-partition the MedNIST dataset and create new data subsets, switch to [prepare MedNIST dataset notebook](02-prepare-datasets-MedNIST.ipynb).

In [7]:
import requests
datasets =  ["MedNIST","TissueMNIST"]
curr_dataset_name = datasets[1]
DATASET_INFO_FILEPATH = f"https://raw.githubusercontent.com/OpenMined/datasets/main/{curr_dataset_name}/dataset.json"

def get_dataset_urls():
    
    data_subset_urls = []
    
    response = requests.get(DATASET_INFO_FILEPATH)
    data_subset_info = response.json()
    DATASET_REPO_URL = f"https://raw.githubusercontent.com/OpenMined/datasets/main/{curr_dataset_name}/subsets/"
    
    for dataset_name in data_subset_info.values():
        url = DATASET_REPO_URL + dataset_name
        data_subset_urls.append(url)
        
    return data_subset_urls

In [8]:
def check_ip_port(host_ip: str, port: int) -> bool:
    import socket
    try:
        sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        sock.settimeout(2)
        result = sock.connect_ex((host_ip, port))
        sock.close()
        if result == 0:
            return True
    except Exception:
        pass
    return False

In [9]:
def get_icon(status: bool) -> str:
    return "✅" if status else "❌"

In [10]:
def check_hosts_ready(host_ips: dict) -> None:
    for host in host_ips["host_ips"]:
        print("-----------------------")
        host_ip = host["ip_address"]
        # make sure the containers are not running
        http_up = check_ip_port(host_ip=host_ip, port=80)
        print(f"{get_icon(not http_up)} Containers Off {host_ip}:80")
        
        # make sure jupyter notebooks is up
        jupyter_up = check_ip_port(host_ip=host_ip, port=8888)
        print(f"{get_icon(jupyter_up)} Jupyter Up {host_ip}:8888")
        
        # make sure SSH is up
        ssh_up = check_ip_port(host_ip=host_ip, port=22)
        print(f"{get_icon(ssh_up)} SSH Up {host_ip}:22")

        print()
        all_status = (not http_up) and jupyter_up and ssh_up
        print(f"{get_icon(all_status)} Node {host_ip} Ready!")
        print("-----------------------")
        print()

In [11]:
check_hosts_ready(host_ips)

-----------------------
❌ Containers Off 52.188.79.206:80
❌ Jupyter Up 52.188.79.206:8888
❌ SSH Up 52.188.79.206:22

❌ Node 52.188.79.206 Ready!
-----------------------

-----------------------


KeyboardInterrupt: 

In [12]:
def output_user_details(host_ips: dict, participants: dict[str, str] = {}) -> None:
    notebook_path = "adastra/data-owners/data-owners-presentation.ipynb"
    print("===============================")
    print("Ad Astra Demo 1")
    print("===============================")
    print()
    print("Send to each participant")
    print()
    if TOTAL_PARTICIPANTS > len(host_ips["host_ips"]):
        raise Exception(
            f"TOTAL_PARTICIPANTS: {TOTAL_PARTICIPANTS} is less than VM count: {len(host_ips['host_ips'])}"
        )
    partition = 0
    dataset_urls = get_dataset_urls()
    num_of_urls = len(dataset_urls)
    for host in host_ips["host_ips"]:
        partition += 1
        if partition <= len(participants):
            print(f"Hi {list(participants.keys())[partition - 1]},")
        if partition <= TOTAL_PARTICIPANTS:
            print("These are your Session Details:")
        else:
            print("Spare Session Details:")
        print("-------------------------------")
        # print(f"VM Username: {host['username']}")
        # print(f"VM Password: {host['password']}")
        # print(f"VM IP Address: {host['ip_address']}")
        print(f"📎 MY_DATASET_URL:\n{dataset_urls[partition%num_of_urls]}")

        print()
        print(f"👉🏽 Start Here:")
        print(
            f"http://{host['ip_address']}:8888/lab/tree/notebooks/{notebook_path}"
            f"?token={host['jupyter_token']}"
        )
        print()

In [13]:
output_user_details(host_ips, participants)

Ad Astra Demo 1

Send to each participant

Hi Madhava Jay,
These are your Session Details:
-------------------------------
📎 MY_DATASET_URL:
https://raw.githubusercontent.com/OpenMined/datasets/main/TissueMNIST/subsets/TissueMNIST-1ffc51892957453e908919645da3ea90.pkl

👉🏽 Start Here:
http://52.188.79.206:8888/lab/tree/notebooks/adastra/data-owners/data-owners-presentation.ipynb?token=nhvntkwi4l3zam5o1xv45c2h9alf6r39vmmvg9irxph9hkzl

Hi Shubham Gupta,
These are your Session Details:
-------------------------------
📎 MY_DATASET_URL:
https://raw.githubusercontent.com/OpenMined/datasets/main/TissueMNIST/subsets/TissueMNIST-551b7b7265d84bfd8dea36476a7e77f6.pkl

👉🏽 Start Here:
http://52.188.79.139:8888/lab/tree/notebooks/adastra/data-owners/data-owners-presentation.ipynb?token=08shavto27rswkurfges38v7er0barwal0ioq4i3519l8ant

These are your Session Details:
-------------------------------
📎 MY_DATASET_URL:
https://raw.githubusercontent.com/OpenMined/datasets/main/TissueMNIST/subsets/TissueMNI

In [54]:
addresses_sent_to = []

In [25]:
addresses_sent_to

[]

In [26]:
!pip install sendgrid

Collecting sendgrid
  Downloading sendgrid-6.9.7-py3-none-any.whl (101 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m101.1/101.1 KB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting starkbank-ecdsa>=2.0.1
  Downloading starkbank-ecdsa-2.0.3.tar.gz (12 kB)
  Preparing metadata (setup.py) ... [?25ldone
[?25hCollecting python-http-client>=3.2.1
  Downloading python_http_client-3.3.7-py3-none-any.whl (8.4 kB)
Building wheels for collected packages: starkbank-ecdsa
  Building wheel for starkbank-ecdsa (setup.py) ... [?25ldone
[?25h  Created wheel for starkbank-ecdsa: filename=starkbank_ecdsa-2.0.3-py3-none-any.whl size=14229 sha256=276862cfb5e6476b682e9bf9cb07204e2e46edb9d3f215269aa95e4644e2bf29
  Stored in directory: /Users/madhavajay/Library/Caches/pip/wheels/e3/bb/37/b77a2a1b31257de6f06fe38f590bf2396ec3477a65cdae06a8
Successfully built starkbank-ecdsa
Installing collected packages: starkbank-ecdsa, python-http-client, sendgrid
Successfully installed pyt

In [44]:
# from_address='research@openmined.org'
from_address='madhava@openmined.org' # test

In [45]:
SENDGRID_API_KEY = ""

In [50]:
template = """
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html
  data-editor-version="2"
  class="sg-campaigns"
  xmlns="http://www.w3.org/1999/xhtml"
>
<head>
    <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
    <meta
      name="viewport"
      content="width=device-width, initial-scale=1, minimum-scale=1, maximum-scale=1"
    />
    <style>
      body {
        font-family: "Chivo", sans-serif;
      }
    </style>
</head>
<table width="100%">
<tr>
<td align="center">
<img src="http://cdn.mcauto-images-production.sendgrid.net/919ca6f839ef4a13/8ac55789-f733-4ce7-929c-3a9483e9ffeb/600x257.png" width="400px" style="text-align:center;" />
</td>
</tr>
</table>
<body>
"""

close_template = """
</body>
</html>
"""

In [51]:
def send_email(person, email, host, notebook_url, dataset_url) -> bool:
    print("send email to", person, email, host)
    # using SendGrid's Python Library
    # https://github.com/sendgrid/sendgrid-python
    import os
    from sendgrid import SendGridAPIClient
    from sendgrid.helpers.mail import Mail

    message = Mail(
        from_email=from_address,
        to_emails=email,
        subject="OpenMined: Medical Federated Learning Program - Session 1",
        html_content=f"""
        {template}
        <p>Hi {person},</p>
        
        <p>These are your session details for the Medical Federated Learning Program - Session 1:</p>
        <p>👇🏽 Click this link to start your interactive Jupyter Notebook<br />
        {notebook_url}</p>
        <p>📎 Copy and Paste this later during the demo:<br /><br />
        MY_DATASET_URL="{dataset_url}"</p>

        <br />
        Regards,<br />
        The OpenMined Research Team<br />
        openmined.org
        {close_template}
        """)
    try:
        sg = SendGridAPIClient(SENDGRID_API_KEY)
        response = sg.send(message)
        if response.status_code == 202:
            return True
        else:
            print(response.status_code)
            print(response.body)
            print(response.headers)
            return False
    except Exception as e:
        print(e.message)
        return False
    return True

In [55]:
def send_emails(host_ips: dict, participants: dict[str, str] = {}) -> None:
    notebook_path = "adastra/data-owners/data-owners-presentation.ipynb"
    print("===============================")
    print("Ad Astra Demo 1")
    print("===============================")
    print()
    print("Send to each participant")
    print()
    if TOTAL_PARTICIPANTS > len(host_ips["host_ips"]):
        raise Exception(
            f"TOTAL_PARTICIPANTS: {TOTAL_PARTICIPANTS} is less than VM count: {len(host_ips['host_ips'])}"
        )
    partition = 0

    dataset_urls = get_dataset_urls()
    num_of_urls = len(dataset_urls)
    for host in host_ips["host_ips"]:
        try:
            partition += 1
            notebook_url = f"http://{host['ip_address']}:8888/lab/tree/notebooks/{notebook_path}?token={host['jupyter_token']}"
            dataset_url = dataset_urls[partition%num_of_urls]
            if partition <= len(participants):
                person_name = list(participants.keys())[partition - 1]
                print(f"Hi {person_name},")
                person_email = participants[person_name]
                if person_email not in addresses_sent_to:
                    if send_email(person_name, person_email, host, notebook_url, dataset_url):
                        addresses_sent_to.append(person_email)
                else:
                    print(f"Already emailed: {person_name} {person_email}")
            if partition <= TOTAL_PARTICIPANTS:
                print("These are your Session Details:")
            else:
                print("Spare Session Details:")
            print("-------------------------------")
            # print(f"VM Username: {host['username']}")
            # print(f"VM Password: {host['password']}")
            # print(f"VM IP Address: {host['ip_address']}")
            print(f"📎 MY_DATASET_URL: \n{dataset_url}")

            print()
            print(f"👉🏽 Start Here:")
            print(notebook_url)
            print()
        except Exception as e:
            print(f"Failed to send details for partition: {partition}", e)

In [56]:
send_emails(host_ips, participants)

Ad Astra Demo 1

Send to each participant

Hi Madhava Jay,
send email to Madhava Jay madhava@openmined.org {'username': 'azureuser', 'password': 'Adastrademo2022', 'ip_address': '52.188.79.206', 'jupyter_token': 'nhvntkwi4l3zam5o1xv45c2h9alf6r39vmmvg9irxph9hkzl'}
These are your Session Details:
-------------------------------
📎 MY_DATASET_URL: 
https://raw.githubusercontent.com/OpenMined/datasets/main/TissueMNIST/subsets/TissueMNIST-1ffc51892957453e908919645da3ea90.pkl

👉🏽 Start Here:
http://52.188.79.206:8888/lab/tree/notebooks/adastra/data-owners/data-owners-presentation.ipynb?token=nhvntkwi4l3zam5o1xv45c2h9alf6r39vmmvg9irxph9hkzl

These are your Session Details:
-------------------------------
📎 MY_DATASET_URL: 
https://raw.githubusercontent.com/OpenMined/datasets/main/TissueMNIST/subsets/TissueMNIST-551b7b7265d84bfd8dea36476a7e77f6.pkl

👉🏽 Start Here:
http://52.188.79.139:8888/lab/tree/notebooks/adastra/data-owners/data-owners-presentation.ipynb?token=08shavto27rswkurfges38v7er0bar