In [1]:
from sys import path
from time import sleep
from fabric import Connection
import yaml
import re

#adding prognos tools
path.insert(0, "/home/jose-luis/Envs/watexr/")
from watexr_tools.encrypt import decryptString
from watexr_tools import gce_api as gce

##  Getting files to instantiated shared volume

This is a workaround that  needs to be done because google cloud does not allow for ReadWriteMany on persistent disks. The downside is that it makes disk io rather slow.

In [2]:
with Connection('localhost') as c:
    c.local('wget https://raw.githubusercontent.com/kubernetes/examples/master/staging/volumes/nfs/provisioner/nfs-server-gce-pv.yaml -O nfs-server-gce-pv.yaml',hide='stderr')
    c.local('wget https://raw.githubusercontent.com/kubernetes/examples/master/staging/volumes/nfs/nfs-server-rc.yaml -O nfs-server-rc.yaml',hide='stderr')
    c.local('wget https://raw.githubusercontent.com/kubernetes/examples/master/staging/volumes/nfs/nfs-server-service.yaml -O nfs-server-service.yaml',hide='stderr')
    c.local('wget https://raw.githubusercontent.com/kubernetes/examples/master/staging/volumes/nfs/nfs-pv.yaml -O nfs-pv.yaml',hide='stderr')
    c.local('wget https://raw.githubusercontent.com/kubernetes/examples/master/staging/volumes/nfs/nfs-pvc.yaml -O nfs-pvc.yaml',hide='stderr')

#Setting up the desired storage size
with open('nfs-server-gce-pv.yaml', 'r') as f:
    dummy =  yaml.safe_load(f)
dummy['spec']['resources']['requests']['storage'] =  '400Gi'
with open('nfs-server-gce-pv.yaml', 'w') as f:
    f.write(yaml.dump(dummy))

# Setting up a Kubernetes cluster

The [Kubernetes engine API](https://cloud.google.com/kubernetes-engine/docs/reference/rest/) will be used to instantiate the cluster.

The possible options for cluster instantiation can be fed to the REST api by passing a [cluster object](https://cloud.google.com/kubernetes-engine/docs/reference/rest/v1beta1/projects.zones.clusters)

The for the moment we will limit ourselves to setting the machine type, the number of nodes and the location of the cluster.

## Cluster settings

In [3]:
#Set up machine type available in the cluster

core_node = {
  "machineType": "n1-standard-4",
  "diskSizeGb": 50, 
  "imageType": "COS",
  "diskType": "pd-standard",
  "labels" : {"hub.jupyter.org/node-purpose" : "core" }
}

default_user_node = {
  "machineType": "n1-standard-4",
  "diskSizeGb": 50,
  "imageType": "COS",
  "diskType": "pd-standard",
  "labels" : {"hub.jupyter.org/node-purpose" : "user" },
  "taints" : [ {"key" : "hub.jupyter.org_dedicated" ,
               "value" : "user", 
               "effect" : "NO_SCHEDULE"}
             ]
}

high_cpu_user_node = {
  "machineType": "n1-standard-32",
  "diskSizeGb": 50,
  "imageType": "COS",
  "diskType": "pd-standard",
  "labels" : {"hub.jupyter.org/node-purpose" : "user",
              "niva-dedicated" : "user-hi-cpu"},
  "taints" : [ 
              {"key" : "hub.jupyter.org_dedicated" ,
               "value" : "user", 
               "effect" : "NO_SCHEDULE"},
              {"key" : "niva-dedicated",
               "value" : "user-hi-cpu",
               "effect" : "NO_SCHEDULE"}
              ]
}

# expensive_node = {
#   "machineType": "n1-standard-64",
#   "diskSizeGb": 100,
#   "imageType": "COS",
#   "diskType": "pd-standard",
#   "labels" : {"hub.jupyter.org/node-purpose" : "user",
#               "niva-dedicated" : "user-hi-mem"},
#   "taints" : [ 
#               {"key" : "hub.jupyter.org_dedicated" ,
#                "value" : "user", 
#                "effect" : "NO_SCHEDULE"},
#               {"key" : "niva-dedicated",
#                "value" : "user-very-expensive",
#                "effect" : "NO_SCHEDULE"}
#               ]
# }

#Setting up pool (which can have several nodes)
node_pool = [
    {
      "name": "default-pool",
      "config": core_node,
      "initialNodeCount": 1  #Needs to be 2 apparently
    },
    {
      "name": "user-default-pool",
      "config": default_user_node,
      "initialNodeCount": 0,
      "autoscaling" : {
          "enabled": True,
          "minNodeCount": 0,
          "maxNodeCount": 2,
          "autoprovisioned": True,
          }

    },
    {
        "name": "user-high-cpu-pool",
      "config": high_cpu_user_node,
      "initialNodeCount": 0,
          "autoscaling" : {
          "enabled": True,
          "minNodeCount": 0,
          "maxNodeCount": 2,
          "autoprovisioned": True,
          }
        
    }
]

#Setting up the Kubernetes cluster
cluster_object = {
  "name": "chemical-fate-jhub",
  "description": "A cluster to run chemical models for the non-sis edc",
  "nodePools": node_pool, 
  "location": "europe-north1-b",
  "autoscaling" : {"enableNodeAutoprovisioning": True,
                   "resourceLimits" : [
                       {
                          "resourceType": 'cpu',
                          "minimum": '0',
                          "maximum": '64'
                       },
                       {
                          "resourceType": 'memory',
                          "minimum": '0',
                          "maximum": '125'
                       }                      
                   ]
                   },
}

## Instantiating Kubernetes cluster

This is done using the [REST API](https://cloud.google.com/kubernetes-engine/docs/reference/rest). 

In [4]:
#Initializing the custom gke_api

#Paths to keys. Getting keys has not been automated but can be done in the cloud console
master_key = "/home/jose-luis/Envs/gce_framework/code/keys/nivacatchment.json"
storage_key = "/home/jose-luis/Envs/gce_framework/code/keys/framework-storage.json"

#General properties
properties = {
    "project" : "nivacatchment",
    "zone" : "europe-north1-b",
    "cluster" : cluster_object['name']
}

cloud = gce.gce_api(master_key,storage_key,properties)

#Actually instantiating the cluster
cloud.post("kubeCreate",json={"cluster": cluster_object})

{'name': 'operation-1598534070777-17fbdb45',
 'zone': 'europe-north1-b',
 'operationType': 'CREATE_CLUSTER',
 'status': 'RUNNING',
 'selfLink': 'https://container.googleapis.com/v1beta1/projects/808260220155/zones/europe-north1-b/operations/operation-1598534070777-17fbdb45',
 'targetLink': 'https://container.googleapis.com/v1beta1/projects/808260220155/zones/europe-north1-b/clusters/chemical-fate-jhub',
 'startTime': '2020-08-27T13:14:30.777377899Z'}

In [5]:
#Waiting until the cluster is up and running
response = cloud.get('kubeCreate')['clusters'][0]['status']
display(response)
while response != 'RUNNING':
    sleep(10)
    response = cloud.get('kubeCreate')['clusters'][0]['status']
    display(response) 
    
    
#Function that checks that a pool has been created
def waitForPool():
    #Wait until the cluster is ready
    response = [i['status'] for i in cloud.get('kubeAddPool')['nodePools'] ]
    display(response)
    while (not(all([i == 'RUNNING' for i in response]))):
        display(response)
        sleep(10)
        response = [i['status'] for i in cloud.get('kubeAddPool')['nodePools'] ]

'PROVISIONING'

'PROVISIONING'

'PROVISIONING'

'PROVISIONING'

'PROVISIONING'

'PROVISIONING'

'PROVISIONING'

'PROVISIONING'

'PROVISIONING'

'PROVISIONING'

'PROVISIONING'

'PROVISIONING'

'PROVISIONING'

'PROVISIONING'

'PROVISIONING'

'PROVISIONING'

'PROVISIONING'

'PROVISIONING'

'RUNNING'

## Getting credentials for cluster

In [6]:
with Connection('localhost') as c:
    c.local('gcloud container clusters get-credentials {name} --zone {location}'.format(**cluster_object),replace_env=False)

Fetching cluster endpoint and auth data.
kubeconfig entry generated for chemical-fate-jhub.


## Creating namespace

In [None]:
#Check if namespace jhub exist
# with Connection('localhost') as c:
#     bla = c.local('kubectl get namespace',replace_env=False)
    
# expr = re.compile('^jhub\s+')
# bla = bla.stdout.strip().split('\n')
# namespaceExists=any([expr.match(i) != None for i in bla])

In [9]:
with Connection('localhost') as c:
    randHex = c.local('openssl rand -hex  32',hide='out')
    randHex = randHex.stdout.strip()

config = {'proxy': {'secretToken': randHex} }

config.update({'singleuser' : {'default': {'name':'jupyter/datascience-notebook','tag': '41d6b29cd15c'}} })
config['singleuser'].update({'defaultUrl' : "/lab"})

display(config)
    
with open('config.yaml','w') as f:
    f.write(yaml.dump(config,default_flow_style=False))


with Connection('localhost') as c:
    c.local('kubectl create namespace chemhub',replace_env=False)
    #Using helm to install jupyterhub on the kubernetes cluster
    c.local('helm repo add jupyterhub https://jupyterhub.github.io/helm-chart/',replace_env=False)
    c.local('helm repo update', replace_env=False)  
    #First install
    c.local('helm upgrade --install chemhub jupyterhub/jupyterhub --namespace chemhub --version=0.9.0 --values config.yaml ',replace_env=False)

{'proxy': {'secretToken': '9bab3ab5b179a253e2325cb088cbb0325b0d178102192482a5589ef0e0adc398'},
 'singleuser': {'default': {'name': 'jupyter/datascience-notebook',
   'tag': '41d6b29cd15c'},
  'defaultUrl': '/lab'}}

Error from server (AlreadyExists): namespaces "chemhub" already exists


UnexpectedExit: Encountered a bad command exit code!

Command: 'kubectl create namespace chemhub'

Exit code: 1

Stdout: already printed

Stderr: already printed



In [10]:
## Setting up machine types to be instantiated

profileList = [{'display_name': 'Standard',
                        'description': 'At least: two processors, 7.5GB of RAM ',
                        'default': True,
                        'kubespawner_override': {'cpu_limit': 4,
                         'cpu_guarantee': 1,
                         'mem_limit': '15G',
                         'mem_guarantee': '1G',
                         'start_timeout': 900
                                                }
               },
               {'display_name': 'Your own personal machine',
                'description': '4 processors, 15GB of RAM',
                'kubespawner_override': {'cpu_limit': 4,
                                         'cpu_guarantee': 4,
                                         'mem_limit': '15G',
                                         'mem_guarantee': '15G',
                                         'start_timeout': 900,
                                         'tolerations' : [{'effect': 'NoSchedule',
                                                            'key': 'hub.jupyter.org_dedicated',
                                                            'operator': 'Equal',
                                                            'value': 'user'},
                                                         ]
                                         }
               },
               {'display_name': 'High CPU (new node; typically 5-10 mins startup time)',
                 'description': 'Access to 16 CPUs, 12 GB RAM, no GPU. For CPU-heavy processing. Expensive!',
                 'kubespawner_override': {'cpu_limit': 8,
                  'cpu_guarantee': 32,
                  'mem_limit': '60G',
                  'mem_guarantee': '7.5G',
                  'start_timeout': 900,
                  'tolerations': [{'effect': 'NoSchedule',
                    'key': 'hub.jupyter.org_dedicated',
                    'operator': 'Equal',
                    'value': 'user'},
                   {'effect': 'NoSchedule',
                    'key': 'niva-dedicated',
                    'operator': 'Equal',
                    'value': 'user-hi-cpu'}]}
               }
               ]


config['singleuser'].update({'profileList' : profileList})

display(config)
    
with open('config.yaml','w') as f:
    f.write(yaml.dump(config))
    
with Connection('localhost') as c:
    #Update
    c.local('helm upgrade chemhub jupyterhub/jupyterhub --namespace=chemhub --version=0.9.0 --values config.yaml ',replace_env=False)

{'proxy': {'secretToken': '9bab3ab5b179a253e2325cb088cbb0325b0d178102192482a5589ef0e0adc398'},
 'singleuser': {'default': {'name': 'jupyter/datascience-notebook',
   'tag': '41d6b29cd15c'},
  'defaultUrl': '/lab',
  'profileList': [{'display_name': 'Standard',
    'description': 'At least: two processors, 7.5GB of RAM ',
    'default': True,
    'kubespawner_override': {'cpu_limit': 4,
     'cpu_guarantee': 1,
     'mem_limit': '15G',
     'mem_guarantee': '1G',
     'start_timeout': 900}},
   {'display_name': 'Your own personal machine',
    'description': '4 processors, 15GB of RAM',
    'kubespawner_override': {'cpu_limit': 4,
     'cpu_guarantee': 4,
     'mem_limit': '15G',
     'mem_guarantee': '15G',
     'start_timeout': 900,
     'tolerations': [{'effect': 'NoSchedule',
       'key': 'hub.jupyter.org_dedicated',
       'operator': 'Equal',
       'value': 'user'}]}},
   {'display_name': 'High CPU (new node; typically 5-10 mins startup time)',
    'description': 'Access to 16 C

Release "chemhub" has been upgraded. Happy Helming!
NAME: chemhub
LAST DEPLOYED: Thu Aug 27 15:23:00 2020
NAMESPACE: chemhub
STATUS: deployed
REVISION: 2
TEST SUITE: None
NOTES:
Thank you for installing JupyterHub!

Your release is named chemhub and installed into the namespace chemhub.

You can find if the hub and proxy is ready by doing:

 kubectl --namespace=chemhub get pod

and watching for both those pods to be in status 'Running'.

You can find the public IP of the JupyterHub by doing:

 kubectl --namespace=chemhub get svc proxy-public

It might take a few minutes for it to appear!

Note that this is still an alpha release! If you have questions, feel free to
  1. Read the guide at https://z2jh.jupyter.org
  2. Chat with us at https://gitter.im/jupyterhub/jupyterhub
  3. File issues at https://github.com/jupyterhub/zero-to-jupyterhub-k8s/issues


## Getting ip

In [11]:
with Connection('localhost') as c:
    ip = c.local('kubectl --namespace=chemhub get svc proxy-public',replace_env=False)

ip = ip.stdout.split('\n')
ip_dict = dict()
for i,j in zip(ip[0].split(),ip[1].split()):
    ip_dict.update({i:j})
    
display(ip_dict)

NAME           TYPE           CLUSTER-IP      EXTERNAL-IP      PORT(S)                      AGE
proxy-public   LoadBalancer   10.27.245.249   35.228.115.160   443:32170/TCP,80:30548/TCP   2m39s


{'NAME': 'proxy-public',
 'TYPE': 'LoadBalancer',
 'CLUSTER-IP': '10.27.245.249',
 'EXTERNAL-IP': '35.228.115.160',
 'PORT(S)': '443:32170/TCP,80:30548/TCP',
 'AGE': '2m39s'}

## Limiting access

In [12]:
hub = {'extraConfig': {'jupyterlab': "c.Spawner.cmd = ['jupyter-labhub']"} ,
      'scheduling': {'userScheduler': {'enabled': True},
                          'podPriority': {'enabled': True},
                          'userPlaceholder': {'enabled': True, 'replicas': 2},
                          'userPods': {'nodeAffinity': {'matchNodePurpose': 'require'}},
                          'corePods': {'nodeAffinity': {'matchNodePurpose': 'require'}}
                        }
      }
auth = {
      'type' : 'github', 
       'admin': {
           'access' : True, 'users' : ['Lecheps',]},
       'whitelist' : {'users' :  ['LeahJB','clayerf']},
       'github' : {
           'clientId' : '42a65eb61a1f218f7a99',
           'clientSecret' : '965f613f25d74f9f1cd592ff5d8c601e59666c73',
           'callbackUrl': "http://{}/hub/oauth_callback".format(ip_dict['EXTERNAL-IP'])
                  }
         }

config.update({'hub': hub})
config.update({'auth': auth})

with open('config.yaml','w') as f:
    f.write(yaml.dump(config))
    
with Connection('localhost') as c:
    #Update
    c.local('helm upgrade chemhub jupyterhub/jupyterhub --namespace=chemhub --version=0.9.0 --values config.yaml ',replace_env=False)       

Release "chemhub" has been upgraded. Happy Helming!
NAME: chemhub
LAST DEPLOYED: Thu Aug 27 15:24:13 2020
NAMESPACE: chemhub
STATUS: deployed
REVISION: 3
TEST SUITE: None
NOTES:
Thank you for installing JupyterHub!

Your release is named chemhub and installed into the namespace chemhub.

You can find if the hub and proxy is ready by doing:

 kubectl --namespace=chemhub get pod

and watching for both those pods to be in status 'Running'.

You can find the public IP of the JupyterHub by doing:

 kubectl --namespace=chemhub get svc proxy-public

It might take a few minutes for it to appear!

Note that this is still an alpha release! If you have questions, feel free to
  1. Read the guide at https://z2jh.jupyter.org
  2. Chat with us at https://gitter.im/jupyterhub/jupyterhub
  3. File issues at https://github.com/jupyterhub/zero-to-jupyterhub-k8s/issues


In [None]:
with open('config.yaml','w') as f:
    f.write(yaml.dump(c,default_flow_style=False))


In [13]:
config.update({'singleuser' : {'default': {'name':'jupyter/all-spark-notebook','tag': '67bed9ea4a47'}} })
with Connection('localhost') as c:
    #Update
    c.local('helm upgrade chemhub jupyterhub/jupyterhub --namespace=chemhub --version=0.9.0 --values config.yaml ',replace_env=False)       

Release "chemhub" has been upgraded. Happy Helming!
NAME: chemhub
LAST DEPLOYED: Thu Aug 27 15:48:11 2020
NAMESPACE: chemhub
STATUS: deployed
REVISION: 4
TEST SUITE: None
NOTES:
Thank you for installing JupyterHub!

Your release is named chemhub and installed into the namespace chemhub.

You can find if the hub and proxy is ready by doing:

 kubectl --namespace=chemhub get pod

and watching for both those pods to be in status 'Running'.

You can find the public IP of the JupyterHub by doing:

 kubectl --namespace=chemhub get svc proxy-public

It might take a few minutes for it to appear!

Note that this is still an alpha release! If you have questions, feel free to
  1. Read the guide at https://z2jh.jupyter.org
  2. Chat with us at https://gitter.im/jupyterhub/jupyterhub
  3. File issues at https://github.com/jupyterhub/zero-to-jupyterhub-k8s/issues


In [None]:
kubectl --namespace=chemhub get pod