# Bandgap Workflow Example
 This notebook demonstrates how to build and run a bandgap workflow for a material.
 Example of building and running a bandgap workflow for twisted MoS2 interface from specific_examples.

## Process Overview
### 1. Set up the environment and parameters.
### 2. Log in to get the API token
### 3. Load the target material.
### 4. Import workflow builder and related analyzers.
### 5. Analyze material to get parameters for the workflow configuration.
### 6. Create the workflow configuration.
### 7. Create a job with material and workflow configuration.
### 8. Submit the job to the server.
### 9. Monitor the job status and retrieve results.
### 10. Display the results.

## 1. Set up the environment and parameters

In [None]:
import sys

if sys.platform == "emscripten":
    import micropip

    await micropip.install("https://exabyte-io.github.io/api-examples/mat3ra_api_examples-0.1.dev1+gc24498978-py3-none-any.whl", deps=False)
    await micropip.install("mat3ra-utils")
    from mat3ra.utils.jupyterlite.packages import install_packages

    await install_packages("api_examples")

## 2. Log in to get the API token

In [None]:
ACCOUNT_ID = "ACCOUNT_ID"
AUTH_TOKEN = "AUTH_TOKEN"
ORGANIZATION_ID = "ORGANIZATION_ID"

import os
import sys
import json

# TODO: add OIDC
if sys.platform == "emscripten":
    # Only works if launched within the Platform, otherwise requires redirect to Login
    apiConfig = data_from_host.get("apiConfig")
    os.environ.update(data_from_host.get("environ", {}))
    os.environ.update(
        dict(
            ACCOUNT_ID=apiConfig.get("accountId"),
            AUTH_TOKEN=apiConfig.get("authToken"),
            ORGANIZATION_ID=apiConfig.get("organizationId", ""),
            CLUSTERS=json.dumps(apiConfig.get("clusters", [])),
        )
    )

## 3. Create material
### 3.1. Load material from local file

In [None]:
from utils.visualize import visualize_materials as visualize
from utils.jupyterlite import load_material_from_folder

material = load_material_from_folder("../uploads", "MoS2(001)-MoS2(001), Interface")
visualize(material)

### 3.2. Save material to the platform

In [None]:
from utils.settings import ACCOUNT_ID
from utils.generic import dict_to_namespace, display_JSON
from utils.api import create_material

OWNER_ID = os.getenv("ORGANIZATION_ID") or ACCOUNT_ID

saved_material_data = create_material(material, OWNER_ID)

### 3.3. Get material id

In [None]:
print("Material ID:", saved_material_data["_id"])

## 5. Create workflow and set its parameters
### 5.1. Get list of applications and select one

In [None]:
from mat3ra.standata.applications import ApplicationStandata
from mat3ra.ade.application import Application

# Get Applications list (with versions, build)
apps_list = ApplicationStandata.list_all()
# returns apps_list[0] = [{"name" : "espresso", "version": "7.2", "build": "GNU"}]

In [None]:
app_config = ApplicationStandata.get_by_name_first_match("espresso")
# returns name, version, build config
app = Application(**app_config)
app.name

### 5.2. Create workflow from standard workflows and preview it

In [None]:
from mat3ra.standata.workflows import WorkflowStandata
from mat3ra.wode.workflows import Workflow
from utils.visualize import visualize_workflow

# TODO: adjust Standata to search for human readable `name`, or regex, or filename
# Search WF by name and application
workflow_config = WorkflowStandata.filter_by_application(app.name).get_by_name_first_match("band_gap.json")
workflow = Workflow.create(workflow_config)

# View workflow to understand its structure
visualize_workflow(workflow)

### 5.3. Add relaxation subworkflow

In [None]:
from utils.visualize import visualize_workflow

workflow.add_relaxation()
# Relaxation subworkflow is added as the first subworkflow
visualize_workflow(workflow)

### 5.4. Change subworkflow details (Model subtype)

In [None]:
from mat3ra.standata.model_tree import ModelTreeStandata

swf_0 = workflow.subworkflows[0]  # relaxation subworkflow
swf_1 = workflow.subworkflows[1]  # band structure subworkflow

# Change model subtype for relaxation subworkflow
# For preview:
subtypes = ModelTreeStandata.get_subtypes_by_model_type("dft")  # ["gga", "lda"] as enum
functionals = ModelTreeStandata.get_functionals_by_subtype("dft", subtypes.LDA)  # ["pz", ...] as enum

model = ModelTreeStandata.get_model_by_parameters(
    type="dft",
    subtype=subtypes.LDA,
    functional=functionals.PZ,
)
swf_0.model = model
swf_1.model = model
model

### 5.5. Modify k-grid in subworkflow units
#### 5.5.1. Define k-grid values and create context providers

In [None]:
from mat3ra.wode.context.providers import PointsGridDataProvider

# Values from publication
kgrid_relax = kgrid_scf = [6, 6, 1]
kgrid_nscf = [12, 12, 1]

# In future: helper to workflow.set_context_to_unit_by_name_regex(context_provider, unit_name_regex)
kgrid_context_provider_relax = PointsGridDataProvider(dimensions=kgrid_scf)
kgrid_context_provider_scf = PointsGridDataProvider(dimensions=kgrid_scf)
kgrid_context_provider_nscf = PointsGridDataProvider(dimensions=kgrid_nscf)

#### 5.5.2. Get new context data and set it to workflow units

In [None]:
new_context_relax = kgrid_context_provider_relax.get_data()
new_context_scf = kgrid_context_provider_scf.get_data()
new_context_nscf = kgrid_context_provider_nscf.get_data()

#### 5.5.3. Modify workflow units with new context

In [None]:
# Get workflow's specific unit that needs to be modified
# Option 1: search is done by unit name regex across the entire workflow
unit_to_modify_relax = workflow.get_unit_by_name(name_regex="relax")
unit_to_modify_relax.add_context(new_context_relax)

# Option 2: search is done by unit name within a specific subworkflow
unit_to_modify_scf = workflow.subworkflows[1].get_unit_by_name(name="pw_scf")
unit_to_modify_scf.add_context(new_context_scf)
unit_to_modify_nscf = workflow.subworkflows[1].get_unit_by_name(name="pw_nscf")
unit_to_modify_nscf.add_context(new_context_nscf)

# Set the modified unit back to the workflow
# Option 1: direct set by unit object, replacing the existing one
workflow.set_unit(unit_to_modify_relax)

# Option 2: set by unit flowchart id and new unit object
workflow.set_unit(unit_flowchart_id=unit_to_modify_scf.flowchart_id, new_unit=unit_to_modify_scf)
workflow.set_unit(unit_flowchart_id=unit_to_modify_nscf.flowchart_id, new_unit=unit_to_modify_nscf)
visualize_workflow(workflow)
workflow.to_dict()

### 5.6. Save workflow to collection

In [None]:
from utils.settings import ACCOUNT_ID
from utils.api import create_workflow

saved_workflow_data = create_workflow(workflow, ACCOUNT_ID)

## 6. Create the compute configuration
### 6.1. View available clusters and providers

In [None]:
cluster_config = next(iter(json.loads(os.getenv("CLUSTERS"))), {})
queue_configs = cluster_config.get("queues", [])

CLUSTER_NAME = cluster_config.get("displayName", "cluster-001")

### 6.2. Create compute configuration

In [None]:
from exabyte_api_client.endpoints.jobs import JobEndpoints
from utils.settings import ENDPOINT_ARGS

exabyte_jobs_endpoint = JobEndpoints(*ENDPOINT_ARGS)

compute = exabyte_jobs_endpoint.get_compute(
    cluster=CLUSTER_NAME
)

## 7. Create the job with material and workflow configuration

In [None]:
from datetime import datetime

from utils.settings import ACCOUNT_ID
from utils.api import get_default_project

OWNER_ID = os.getenv("ORGANIZATION_ID") or ACCOUNT_ID

project_id = get_default_project(OWNER_ID)

timestamp = datetime.now().strftime("%Y-%m-%d %H:%M")

JOB_NAME = f"Band Gap {timestamp}"

In [None]:
from utils.generic import display_JSON

job_endpoints = JobEndpoints(*ENDPOINT_ARGS)

job_response = job_endpoints.create_by_ids(
    materials=[saved_material_data],
    workflow_id=saved_workflow_data["_id"],
    project_id=project_id,
    prefix=JOB_NAME,
    owner_id=OWNER_ID,
    compute=compute
)

# Convert jobs list to SimpleNamespace objects for dot notation access
job = dict_to_namespace(job_response)

display_JSON(job_response)

## 8. Submit the job and monitor the status

In [None]:
job_endpoints.submit(job._id)


In [None]:
from utils.generic import wait_for_jobs_to_finish

wait_for_jobs_to_finish(job_endpoints, [job._id], poll_interval=60)

## 9. Retrieve results

In [None]:
import re
from utils.generic import get_property_by_subworkflow_and_unit_indicies
from exabyte_api_client.endpoints.properties import PropertiesEndpoints

property_endpoints = PropertiesEndpoints(*ENDPOINT_ARGS)

results = []
final_structure = get_property_by_subworkflow_and_unit_indicies(property_endpoints, "final_structure", job, 0, 0)[
    "data"
]
pressure = get_property_by_subworkflow_and_unit_indicies(property_endpoints, "pressure", job, 0, 0)["data"]["value"]

# Use dot notation to access workflow property
unit_flowchart_id = Workflow(job.workflow).get_unit_by_name(name="pw_scf").flowchart_id
band_gap_direct = property_endpoints.get_direct_band_gap(job._id, unit_flowchart_id)
band_gap_indirect = property_endpoints.get_indirect_band_gap(job._id, unit_flowchart_id)

results.append(
    {
        "material_id": material._id,
        "angle_deg": re.search(r"(\d+(?:\.\d+)?) degrees", material.name).group(1),
        "band_gap_direct": band_gap_direct,
        "band_gap_indirect": band_gap_indirect,
    }
)

## 10. Display results

In [None]:
from matplotlib import pyplot as plt
import pandas as pd

df = pd.DataFrame(results).dropna(subset=["band_gap_direct", "band_gap_indirect"]).sort_values("angle_deg")
display(df)

plt.figure(figsize=(5, 3.6), dpi=130)
plt.scatter(df["angle_deg"], df["band_gap_direct"], marker=">", label="K-valley bandgap (direct)")
plt.scatter(df["angle_deg"], df["band_gap_indirect"], marker="<", label="Indirect bandgap")
plt.xlabel(r"$\theta$ (Â°)")
plt.ylabel("Energy (eV)")
plt.xlim(-2, 62)
plt.legend(frameon=False, loc="best")
plt.tight_layout()
plt.show()