# Data Scientist (DS)

Outline of what the DS will do

1. DS logs into DO1 and DO2's datasites as guest  
2. DS explores the datasets  
3. DS prepares syft_flwr code  
4. DS bootstraps the syft_flwr project  
5. DS runs flwr and syft_flwr simulations (optional)   
6. DS submits jobs to the DOs' datasites  
7. DS starts the FL server code  
8. DS Observes the Results

## 1. DS logs into DO1 and DO2's datasites as guest


DO will also have their own datasite where they can log in as an admin, but it's not needed in this workflow

<img src="../../fl-diabetes-prediction/images/dsLogsInAsGuests.png" width="50%" alt="DS logs into DOs' datasites as guests">

In [None]:
from pathlib import Path

from syft_rds.orchestra import setup_rds_server

import syft_flwr

In [None]:
DS = "nutorbitx@gmail.com"
print("DS email: ", DS)

DO1 = "do_1@gmail.com"
DO2 = "do_2@gmail.com"

ds_stack = setup_rds_server(email=DS, key="local_syftbox_network", root_dir=Path("."))
do_client_1 = ds_stack.init_session(host=DO1)
do_client_2 = ds_stack.init_session(host=DO2)

In [None]:
do_client_1.is_admin

In [None]:
do_client_2.is_admin

In [None]:
do_client_1.datasets

In [None]:
do_client_2.datasets

In [None]:
mock_paths = []

try:
    bank_dataset = do_client_1.dataset.get(name="bank_features")
    mock_paths.append(bank_dataset.get_mock_path())
except Exception as e:
    print(f"Error getting bank dataset for client {do_client_1.host}: {e}")
    mock_paths.append(None)

try:
    marketing_dataset = do_client_2.dataset.get(name="marketing_features")
    mock_paths.append(marketing_dataset.get_mock_path())
except Exception as e:
    print(f"Error getting marketing dataset for client {do_client_1.host}: {e}")
    mock_paths.append(None)

mock_paths

In [None]:
from pathlib import Path

PROJECT_PATH = Path("../vertical-fl")
assert PROJECT_PATH.exists()

In [None]:
try:
    (PROJECT_PATH / "main.py").unlink()
    syft_flwr.bootstrap(PROJECT_PATH, aggregator=DS, datasites=[DO1, DO2])
    print("Bootstrapped project successfully ✅")
except Exception as e:
    print(e)

In [None]:
import os

os.environ["USE_MOCK_DATA"] = "true"

syft_flwr.run(PROJECT_PATH, mock_paths)

In [None]:
!rm -rf {PROJECT_PATH / "**/__pycache__"}
!rm -rf {PROJECT_PATH / "simulation_logs"}

In [None]:
job = do_client_1.jobs.submit(
    name="Syft Flower Experiment",
    dataset_name="bank_features",
    user_code_path=PROJECT_PATH,
    entrypoint="main.py",
)
print(job)

In [None]:
job = do_client_2.jobs.submit(
    name="Syft Flower Experiment",
    dataset_name="marketing_features",
    user_code_path=PROJECT_PATH,
    entrypoint="main.py",
)

In [None]:
import os

os.environ["SYFTBOX_CLIENT_CONFIG_PATH"] = str(ds_stack.client.config_path)
os.environ["LOGURU_LEVEL"] = "DEBUG"
os.environ["SYFT_FLWR_MSG_TIMEOUT"] = "60"
os.environ["USE_MOCK_DATA"] = "false"

!uv run {str(PROJECT_PATH / "main.py")} --active