# Central Worker in Oakestra

# Setting up Oakestra

### Basics

```
sudo apt-get install iptables

export CLUSTER_NAME=laptop

export CLUSTER_LOCATION=60.204478,24.962756,3000

export SYSTEM_MANAGER_URL=(Public IP)
```

## Orhestrator

```
sudo -E docker compose -f run-a-cluster/1-DOC.yaml -f run-a-cluster/override-alpha-versions.yaml up
```

## Worker node
```
wget -c https://github.com/oakestra/oakestra/releases/download/alpha-v0.4.300/NodeEngine_$(dpkg --print-architecture).tar.gz && tar -xzf NodeEngine_$(dpkg --print-architecture).tar.gz && chmod +x install.sh && mv NodeEngine NodeEngine_$(dpkg --print-architecture) && ./install.sh $(dpkg --print-architecture)

wget -c https://github.com/oakestra/oakestra-net/releases/download/alpha-v0.4.300/NetManager_$(dpkg --print-architecture).tar.gz && tar -xzf NetManager_$(dpkg --print-architecture).tar.gz && chmod +x install.sh && ./install.sh $(dpkg --print-architecture)
```

## NetManager Config

```
sudo nano /etc/netmanager/netcfg.json

{
  "NodePublicAddress": "(Public IP)",
  "NodePublicPort": 50103,
  "ClusterUrl": "(Public IP)",
  "ClusterMqttPort": "10003"
}
```

## Running Netmanager

```
# Terminal
sudo NetManager -p 6000

# Background
sudo nohup NetManager -p 6000 </dev/null >/tmp/netmanager.log 2>&1 &

sudo kill -9 $(ps -ax | grep NetManager | sed 's/|/ /' | awk '{print $1}')
```

## Running NodeEngine

```
# Terminal
sudo NodeEngine -n 6000 -p 10100 -a (Public IP)

# Background

sudo nohup NodeEngine -n 6000 -p 10100 -a (Public IP) </dev/null >/tmp/nodeengine.log 2>&1 &

sudo kill -9 $(ps -ax | grep NodeEngine | sed 's/|/ /' | awk '{print $1}')
```

### Checking NodeEngine

```
tail -f  /tmp/nodeengine.log
```

### Checking Orhestrator

- http://(Public IP) (Admin-Admin)
- Go to http://(Public IP):10000/api/docs
- Scroll to /api/cluster/active
- Execute and check that print shows correct cluster name and number of workers

## Setting up MinIO, MLflow, Prometheus, Pushgateway and Grafana

### Prometheus

```
{
    "microservices":[
        {
            "microservice_name":"Prometheus",
            "microservice_namespace":"test",
            "virtualization":"docker",
            "description":"",
            "memory":200,
            "vcpus":1,
            "vgpus":0,
            "vtpus":0,
            "bandwidth_in":0,
            "bandwidth_out":0,
            "storage":200,
            "code":"docker.io/prom/prometheus:latest",
            "state":"",
            "port":"9090:9090",
            "added_files":[],
            "constraints":[],
            "connectivity":[],
            "args":[],
            "environment":[]
        }
    ]
}
```

### Grafana

```
{
    "microservices":[
        {
            "microservice_name":"Grafana",
            "microservice_namespace":"test",
            "virtualization":"docker",
            "description":"",
            "memory":200,
            "vcpus":1,
            "vgpus":0,
            "vtpus":0,
            "bandwidth_in":0,
            "bandwidth_out":0,
            "storage":200,
            "code":"docker.io/grafana/grafana:latest",
            "state":"",
            "port":"3000:3000",
            "added_files":[],
            "constraints":[],
            "connectivity":[],
            "args":[],
            "environment":[]
        }
    ]
}
```

In [None]:
/home/sfniila/NAI_2024/Coding/FFD/minio/data

In [None]:
docker.io/minio/minio:RELEASE.2023-02-10T18-48-39Z

## Using MLflow, MinIO, Prometheus, Pushgateway and Grafana

In [17]:
import os
import mlflow
import mlflow.sklearn
#os.environ['MLFLOW_TRACKING_URI'] = 'postgresql+psycopg2://postgres:postgres@localhost:5432/mlflow_db'
#os.environ['MLFLOW_S3_ENDPOINT_URL'] = "http://172.17.0.3:9000"
#os.environ['AWS_ACCESS_KEY_ID'] = 'minio'
#os.environ['AWS_SECRET_ACCESS_KEY'] = 'minio123'

In [2]:
experiment_name = "demo_experiment"
#try:
#    mlflow.create_experiment(experiment_name, artifact_location="s3://mlflow")
#except MlflowException as e:
#    print(e)
mlflow.set_experiment(experiment_name)

<Experiment: artifact_location='s3://mlflow', creation_time=1710679644015, experiment_id='1', last_update_time=1710679644015, lifecycle_stage='active', name='demo_experiment', tags={}>

In [3]:
mlflow.start_run()
# Log a parameter (key-value pair)
mlflow.log_param("param1", 5)
# Log a metric; metrics can be updated throughout the run
mlflow.log_metric("foo", 1)
mlflow.log_metric("foo", 2)
mlflow.log_metric("foo", 3)
# Log an artifact (output file)
with open("output.txt", "w") as f:
    f.write("Hello world!")
mlflow.log_artifact("output.txt")
mlflow.end_run()

In [None]:
export MLFLOW_TRACKING_URI=postgresql+psycopg2://postgres:postgres@localhost:5432/mlflow_db
export MLFLOW_S3_ENDPOINT_URL=http://172.17.0.3:9000
export AWS_ACCESS_KEY_ID=minio
export AWS_SECRET_ACCESS_KEY=minio123

In [19]:
experiment_name = "demo_experiment"
try:
    mlflow.create_experiment(experiment_name, artifact_location="s3://mlflow")
except MlflowException as e:
    print(e)
mlflow.set_experiment(experiment_name)

<Experiment: artifact_location='s3://mlflow', creation_time=1710690277406, experiment_id='1', last_update_time=1710690277406, lifecycle_stage='active', name='demo_experiment', tags={}>

In [20]:
mlflow.start_run()
# Log a parameter (key-value pair)
mlflow.log_param("param1", 5)
# Log a metric; metrics can be updated throughout the run
mlflow.log_metric("foo", 1)
mlflow.log_metric("foo", 2)
mlflow.log_metric("foo", 3)
# Log an artifact (output file)
with open("output.txt", "w") as f:
    f.write("Hello world!")
mlflow.log_artifact("output.txt")
mlflow.end_run()

In [12]:
mlflow.end_run()

In [13]:
mlflow.start_run()
# Log a parameter (key-value pair)
mlflow.log_param("param1", 5)
# Log a metric; metrics can be updated throughout the run
mlflow.log_metric("foo", 1)
mlflow.log_metric("foo", 2)
mlflow.log_metric("foo", 3)
# Log an artifact (output file)
with open("output.txt", "w") as f:
    f.write("Hello world!")
mlflow.log_artifact("output.txt")
mlflow.end_run()

In [1]:
import os
import mlflow
import mlflow.sklearn



In [2]:
mlflow.set_tracking_uri('http://87.92.217.82:5000')

In [3]:
experiment_name = "demo_experiment"
try:
    mlflow.create_experiment(experiment_name, artifact_location="s3://mlflow/mlruns")
except MlflowException as e:
    print(e)
mlflow.set_experiment(experiment_name)

<Experiment: artifact_location='s3://mlflow/mlruns', creation_time=1711130388527, experiment_id='1', last_update_time=1711130388527, lifecycle_stage='active', name='demo_experiment', tags={}>

In [4]:
mlflow.start_run()
# Log a parameter (key-value pair)
mlflow.log_param("param1", 5)
# Log a metric; metrics can be updated throughout the run
mlflow.log_metric("foo", 1)
mlflow.log_metric("foo", 2)
mlflow.log_metric("foo", 3)
# Log an artifact (output file)
with open("output.txt", "w") as f:
    f.write("Hello world!")
mlflow.log_artifact("output.txt")
mlflow.end_run()

NoCredentialsError: Unable to locate credentials

In [13]:
mlflow.end_run()

RestException: RESOURCE_DOES_NOT_EXIST: Run with id=d3b24186c5434712bbc4165b469af021 not found

In [None]:
MLFLOW_S3_ENDPOINT_URL=http://10.30.55.56:9000  
MLFLOW_SQL_ALCHEMY_CONN=postgresql+psycopg2://postgres:postgres@10.30.55.55:5432/mlflow_db

In [2]:
from prometheus_client import CollectorRegistry
from prometheus_client import Gauge
from prometheus_client import push_to_gateway

In [5]:
url = 'http://87.92.217.82:9091'

registry = CollectorRegistry()

g = Gauge('name','descripton', registry = registry)
g.set(20)
push_to_gateway(url, job = 'job', registry = registry)