# Bandgap Workflow Example
 This notebook demonstrates how to build and run a bandgap workflow for a material.
 Example of building and running a bandgap workflow for twisted MoS2 interface from specific_examples.

## Process Overview
### 1. Set up the environment and parameters.
### 2. Log in to get the API token
### 3. Load the target material.
### 4. Import workflow builder and related analyzers.
### 5. Analyze material to get parameters for the workflow configuration.
### 6. Create the workflow configuration.
### 7. Create a job with material and workflow configuration.
### 8. Submit the job to the server.
### 9. Monitor the job status and retrieve results.
### 10. Display the results.

## 1. Set up the environment and parameters

In [None]:
import sys

if sys.platform == "emscripten":
    import micropip

    await micropip.install(
        "https://exabyte-io.github.io/api-examples/mat3ra_api_examples-0.1.dev1+gf2d827c28-py3-none-any.whl",
        deps=False)
    await micropip.install("mat3ra-utils")
    from mat3ra.utils.jupyterlite.packages import install_packages

    await install_packages("api_examples")

## 2. Log in to get the API token

In [None]:
# Development environment variables # TODO: Delete before merging
import os

API_HOST = "localhost"
API_PORT = "3000"
API_SECURE = "false"
API_VERSION = "2018-10-01"
os.environ.update(
    API_HOST=API_HOST,
    API_PORT=API_PORT,
    API_SECURE=API_SECURE,
    API_VERSION=API_VERSION
)

In [None]:
import os
import sys
import json
from utils.auth import authenticate

if "data_from_host" in globals():
    apiConfig = data_from_host.get("apiConfig")
    os.environ.update(data_from_host.get("environ", {}))
    os.environ.update(
        dict(
            ACCOUNT_ID=apiConfig.get("accountId"),
            AUTH_TOKEN=apiConfig.get("authToken"),
            ORGANIZATION_ID=apiConfig.get("organizationId", ""),
            CLUSTERS=json.dumps(apiConfig.get("clusters", [])),
        )
    )
else:
    token_data = await authenticate()
    # TODO: use APIClient instead


In [None]:
# TODO: move to mat3ra
from exabyte_api_client import APIClient

client = APIClient.authenticate()

## 3. Create material
### 3.1. Load material from local file

In [None]:
from utils.visualize import visualize_materials as visualize
from utils.jupyterlite import load_material_from_folder

material = load_material_from_folder("../uploads", "MoS2(001)-MoS2(001), Interface")
visualize(material)

### 3.2. Save material to the platform

In [None]:
from utils.generic import dict_to_namespace

MY_USER_ID = client.account.id
print(f"âœ… My ID: {MY_USER_ID}")

saved_material_response = create_material(material, MY_USER_ID)
saved_material = dict_to_namespace(saved_material_response)
print(f"âœ… Material created: {saved_material._id}")

### 3.3. Get material id

In [None]:
print("Material ID:", saved_material._id)

## 5. Create workflow and set its parameters
### 5.1. Get list of applications and select one

In [None]:
from mat3ra.standata.applications import ApplicationStandata
from mat3ra.ade.application import Application

apps_list = ApplicationStandata.list_all()

In [None]:
app_config = ApplicationStandata.get_by_name_first_match("espresso")
app = Application(**app_config)
app.name

### 5.2. Create workflow from standard workflows and preview it

In [None]:
from mat3ra.standata.workflows import WorkflowStandata
from mat3ra.wode.workflows import Workflow
from utils.visualize import visualize_workflow

workflow_config = WorkflowStandata.filter_by_application(app.name).get_by_name_first_match("band_gap.json")
workflow = Workflow.create(workflow_config)

visualize_workflow(workflow)

### 5.3. Add relaxation subworkflow

In [None]:
from utils.visualize import visualize_workflow

workflow.add_relaxation()
# Relaxation subworkflow is added as the first subworkflow
visualize_workflow(workflow)

### 5.4. Change subworkflow details (Model subtype)

In [None]:
from mat3ra.standata.model_tree import ModelTreeStandata
from mat3ra.mode import Model

swf_0 = workflow.subworkflows[0]  # relaxation subworkflow
swf_1 = workflow.subworkflows[1]  # band structure subworkflow

# Change model subtype for relaxation subworkflow
# For preview:
subtypes = ModelTreeStandata.get_subtypes_by_model_type("dft")  # ["gga", "lda"] as enum
functionals = ModelTreeStandata.get_functionals_by_subtype("dft", subtypes.LDA)  # ["pz", ...] as enum

model_config = ModelTreeStandata.get_model_by_parameters(
    type="dft",
    subtype=subtypes.LDA.value,
    functional=functionals.PZ.value,
)

# TODO: find actual one
method_config = {"type": "pseudopotential", "subtype": "us"}
model_config["method"] = method_config

model = Model.create(model_config)
swf_0.model = model
swf_1.model = model
print(model)


### 5.5. Modify k-grid in subworkflow units
#### 5.5.1. Define k-grid values and create context providers

In [None]:
from mat3ra.wode.context.providers import PointsGridDataProvider

# Values from publication
kgrid_relax = kgrid_scf = [6, 6, 1]
kgrid_nscf = [12, 12, 1]

# In future: helper to workflow.set_context_to_unit_by_name_regex(context_provider, unit_name_regex)
kgrid_context_provider_relax = PointsGridDataProvider(dimensions=kgrid_scf)
kgrid_context_provider_scf = PointsGridDataProvider(dimensions=kgrid_scf)
kgrid_context_provider_nscf = PointsGridDataProvider(dimensions=kgrid_nscf)

#### 5.5.2. Get new context data and set it to workflow units

In [None]:
new_context_relax = kgrid_context_provider_relax.get_data()
new_context_scf = kgrid_context_provider_scf.get_data()
new_context_nscf = kgrid_context_provider_nscf.get_data()

#### 5.5.3. Modify workflow units with new context

In [None]:
# Get workflow's specific unit that needs to be modified
# Option 1: search is done by unit name regex across the entire workflow
relaxation_subworkflow = workflow.subworkflows[0]  # Relaxation is first
unit_to_modify_relax = relaxation_subworkflow.get_unit_by_name(name_regex="relax")
unit_to_modify_relax.add_context(new_context_relax)
relaxation_subworkflow.set_unit(unit_to_modify_relax)

# Option 2: search is done by unit name within a specific subworkflow
unit_to_modify_scf = workflow.subworkflows[1].get_unit_by_name(name="pw_scf")
unit_to_modify_scf.add_context(new_context_scf)
unit_to_modify_nscf = workflow.subworkflows[1].get_unit_by_name(name="pw_nscf")
unit_to_modify_nscf.add_context(new_context_nscf)

# Set the modified unit back to the workflow
# Option 1: direct set by unit object, replacing the existing one
workflow.set_unit(unit_to_modify_relax)

# Option 2: set by unit flowchart id and new unit object
workflow.set_unit(unit_flowchart_id=unit_to_modify_scf.flowchart_id, new_unit=unit_to_modify_scf)
workflow.set_unit(unit_flowchart_id=unit_to_modify_nscf.flowchart_id, new_unit=unit_to_modify_nscf)
workflow.name = workflow.name + " FROM IDE"
visualize_workflow(workflow)

### 5.6. Save workflow to collection

In [None]:
# from utils.api import _create_endpoints_with_auth

workflow_dict = workflow.to_dict()

saved_workflow_response = create_workflow(workflow, MY_USER_ID)

# endpoints = _create_endpoints_with_auth()
# saved_workflow_response = endpoints.workflow.create(workflow_dict, OWNER_ID)

# saved_workflow_response = create_workflow(workflow, OWNER_ID)
saved_workflow = dict_to_namespace(saved_workflow_response)
print(f"âœ… Workflow created: {saved_workflow._id}")

## 6. Create the compute configuration
### 6.1. View available clusters and providers

In [None]:
# cluster_config = next(iter(json.loads(os.getenv("CLUSTERS"))), {})
# queue_configs = cluster_config.get("queues", [])

# CLUSTER_NAME = cluster_config.get("displayName", "cluster-001")
CLUSTER_NAME = CLUSTERS[0] or "cluster-001"

### 6.2. Create compute configuration

In [None]:
from exabyte_api_client.endpoints.jobs import JobEndpoints

from utils.settings import ENDPOINT_ARGS

jobs_endpoint = JobEndpoints(*ENDPOINT_ARGS)

compute = jobs_endpoint.get_compute(
    cluster=CLUSTER_NAME
)

## 7. Create the job with material and workflow configuration

In [None]:
from datetime import datetime

project_id = get_default_project(MY_USER_ID)

timestamp = datetime.now().strftime("%Y-%m-%d %H:%M")

JOB_NAME = f"Band Gap {timestamp}"

In [None]:
from utils.generic import display_JSON

material_from_collection = get_material(saved_material._id)

print(f"ğŸ“¦ Material: {material_from_collection['_id']}")
print(f"ğŸ“¦ Workflow: {saved_workflow._id}")
print(f"ğŸ“¦ Project: {project_id}")

job_response = create_job(
    materials=[vars(saved_material)],
    workflow_id=saved_workflow._id,
    project_id=project_id,
    name=JOB_NAME,
    compute=compute,
    owner_id=MY_USER_ID
)

# Convert to namespace for dot notation access
job = dict_to_namespace(job_response)

print("âœ… Job created successfully!")
display_JSON(job_response)

## 8. Submit the job and monitor the status

In [None]:
submit_job(job[0]._id)

In [None]:
from utils.generic import wait_for_jobs_to_finish

wait_for_jobs_to_finish(jobs_endpoint, [job._id], poll_interval=60)

## 9. Retrieve results

In [None]:
import re
from utils.generic import get_property_by_subworkflow_and_unit_indicies
from exabyte_api_client.endpoints.properties import PropertiesEndpoints

property_endpoints = PropertiesEndpoints(*ENDPOINT_ARGS)

results = []
final_structure = get_property_by_subworkflow_and_unit_indicies(property_endpoints, "final_structure", job, 0, 0)[
    "data"
]
pressure = get_property_by_subworkflow_and_unit_indicies(property_endpoints, "pressure", job, 0, 0)["data"]["value"]

# Use dot notation to access workflow property
unit_flowchart_id = Workflow(job.workflow).get_unit_by_name(name="pw_scf").flowchart_id
band_gap_direct = property_endpoints.get_direct_band_gap(job._id, unit_flowchart_id)
band_gap_indirect = property_endpoints.get_indirect_band_gap(job._id, unit_flowchart_id)

results.append(
    {
        "material_id": material._id,
        "angle_deg": re.search(r"(\d+(?:\.\d+)?) degrees", material.name).group(1),
        "band_gap_direct": band_gap_direct,
        "band_gap_indirect": band_gap_indirect,
    }
)

## 10. Display results

In [None]:
from matplotlib import pyplot as plt
import pandas as pd

df = pd.DataFrame(results).dropna(subset=["band_gap_direct", "band_gap_indirect"]).sort_values("angle_deg")
display(df)

plt.figure(figsize=(5, 3.6), dpi=130)
plt.scatter(df["angle_deg"], df["band_gap_direct"], marker=">", label="K-valley bandgap (direct)")
plt.scatter(df["angle_deg"], df["band_gap_indirect"], marker="<", label="Indirect bandgap")
plt.xlabel(r"$\theta$ (Â°)")
plt.ylabel("Energy (eV)")
plt.xlim(-2, 62)
plt.legend(frameon=False, loc="best")
plt.tight_layout()
plt.show()