# Import materials from files in multiple formats (CIF, POSCAR, etc.)

This notebook uses ASE python package to extract structural information from files in multiple formats (CIF, POSCAR, etc., as supported by ASE). Some formats, like `espresso-in` and `espresso-out` can be inferred from the file content.

<h2 style="color:green">Usage</h2>

1. Upload files to `uploads` folder: open (double-click) the folder in the left sidebar, then click "Upload" and select the files to upload or just drag-and-drop files onto the sidebar.
1. Click "Run" > "Run All Cells" to run all cells
1. In case of format detection error, please correct the file format extension and try again

## Methodology

The following happens in the script below:

1. Install the required packages
1. The files are extracted from `uploads` folder assuming their extensions represent the format - e.g. `SiO2.poscar`, `Ni.pwi`.
1. Structural information is read from files into ASE Atoms objects.
1. ASE Atoms objects are converted to `poscar` format 
1. `poscar` structures are converted to ESSE
1. The results are visualized and passed to the outside runtime

## 1. Set Parameters

In [None]:
# Upload files to this folder
FOLDER_PATH = "./uploads"

# By default, format will be guessed from file extension
# If set to specific format, notebook will only read that format
ENFORCED_FORMATS = None # e.g. ["cif", "espresso-in"]

# If set to true, the file extension will be included in the resulting material name
USE_FILE_NAME_NO_EXTENSION = False

# If set to true, the supported formats and required extensions will be printed below
SHOW_SUPPORTED_FORMATS = False

## 2. Install Packages

In [None]:
import sys
if sys.platform == "emscripten":
    import micropip
    await micropip.install("mat3ra-api-examples", deps=False)
    await micropip.install('mat3ra-utils')
    from mat3ra.utils.jupyterlite.packages import install_packages
    await install_packages("import_materials_from_files.ipynb")

## 3. Data Processing

### 3.1. Read data from files in `uploads` folder

In [None]:
import os
from pathlib import Path
from ase.io import read

ase_atoms, readable_file_names, unreadable_file_names = [], [], []
file_names = os.listdir(FOLDER_PATH)

for file_name in file_names:
    file_path = os.path.join(FOLDER_PATH, file_name)
    formats = ENFORCED_FORMATS or [None]

    for fmt in formats:
        try:
            atoms = read(file_path, format=fmt)
            atoms.info["file_name"] = Path(file_name).stem if USE_FILE_NAME_NO_EXTENSION else file_name
            ase_atoms.append(atoms)
            readable_file_names.append(file_name)
            print("Successfully read:", atoms.info["file_name"], "using format:", fmt)
            break
        except Exception:
            continue
    else:
        unreadable_file_names.append(file_name)
        print("Failed to read:", file_name)

### 3.2. List imported the data

In [None]:
print(f"Successfully read {len(ase_atoms)} files: {readable_file_names}. ")
print(f"Unreadable files: {unreadable_file_names}. ")

### 3.3. Troubleshoot data

In [None]:
from ase.io.formats import ioformats
import pandas as pd

if len(unreadable_file_names) > 0 or SHOW_SUPPORTED_FORMATS:
    print(f"Unreadable files found: {unreadable_file_names}. See formats/extensions below.")
    data = [[frmt.name, frmt.extensions, frmt.description] for frmt in ioformats.values()]
    dataframe = pd.DataFrame(data, columns=["Format Name", "File Extensions", "Description"])
    print(dataframe.to_markdown())

### 3.4. Convert to ESSE format

In [None]:
import io
from ase import Atoms
from ase.io import write
from express import ExPrESS

def ase_to_poscar(atoms: Atoms):
    output = io.StringIO()
    try:
        write(output, atoms, format="vasp")
        content = output.getvalue()
    except Exception as e:
        print(f"Error converting ASE atoms to POSCAR: {e}")
        content = None
    finally:
        output.close()

    return content

def convert_ase_entry_to_esse(ase_entry):
    try:
        poscar = ase_to_poscar(ase_entry)
        if poscar is None:
            raise ValueError("Failed to generate POSCAR string")

        kwargs = {
            "structure_string": poscar,
            "structure_format": "poscar"
        }

        handler = ExPrESS("structure", **kwargs)
        esse = handler.property("material", **kwargs)

        esse["name"] = ase_entry.info.get("file_name", "Unknown")

        return esse
    except Exception as e:
        print(f"Error processing ASE entry: {e}")
        return None

esse_entries = [entry for entry in map(convert_ase_entry_to_esse, ase_atoms) if entry is not None]

### 3.5. Preview the data

In [None]:
from utils.visualize import visualize_materials
from mat3ra.made.material import Material

materials = [Material(esse_entry) for esse_entry in esse_entries]

visualize_materials(materials, viewer="wave")

## 4. Pass data to the outside runtime

In [None]:
from utils.jupyterlite import set_materials

set_materials(materials)