## Towards an interoperatable perovskite ontology
This notebook gives a demonstration to how to use the code in the project. <br>
The code can be run as a standalone application using the GUI_perovskite_to_json.py script

In [1]:
import os

import pandas as pd

from perovskite_to_json import PerovskiteToJson
from Utilities import filepaths
from Utilities import default_values

### Data for a test perovskite
The data below is what is needed to generate a perovskite composition file

In [2]:
# categorical information
composition_estimate = "Estimated from precursor solutions"
sample_type = "Polycrystalline film"
dimensionality = "3D"

# Band gap in eV
bandgap = 1.63

# Abbreviation nad coefficients for the perovskite ions
a_ions = ["Cs", "MA", "FA "]
a_coefficients = [0.05, 0.79, 0.18]
b_ions = ["Pb"]
b_coefficients = [1]
x_ions = ["Br", "I"]
x_coefficients = [0.5, 2.5]

# Data for additives, impurities, dopants, and secondary phases
additives_abbreviations = ["RbI"]
additives_concentrations = []
additives_mass_fractions = [0.001]

impurities_abbreviations = ["Fe+2"]
impurities_concentrations = [1e13]
impurities_mass_fractions = []

# The location of the data files for collected data about perovskite ions
# "local" refers to files stored in this repository
# Other filepaths can be defined in hte file filepahts.py found in the Utilities folder
path_to_reference_data="local"

# File path for saving where to save the generated JSON file
save_path = os.path.join(os.getcwd(), "test-4.json")

### Generate a perovskite object based on the test data

In [3]:
# All arguments are keyword arguments with sensible defaults (i.e. empty lists and the current working directory)
# The save keyword decides if the file is stored directly (True) or if only the composition object should be created (False)
perovskite = PerovskiteToJson(
    composition_estimate=composition_estimate,
    sample_type=sample_type,
    dimensionality=dimensionality, 
    bandgap=bandgap,        
    a_ions_abbreviations=a_ions, 
    a_coefficients=a_coefficients, 
    b_ions_abbreviations=b_ions, 
    b_coefficients=b_coefficients, 
    x_ions_abbreviations=x_ions, 
    x_coefficients=x_coefficients,
    additives_abbreviations = additives_abbreviations,
    additives_concentrations = additives_concentrations,
    additives_mass_fractions = additives_mass_fractions,       
    impurities_abbreviations = impurities_abbreviations,
    impurities_concentrations = impurities_concentrations,
    impurities_mass_fractions = impurities_mass_fractions, 
    path_to_reference_data='local', 
    save_path=save_path)

#### Inspect the generated perovskite object based on the test data

In [4]:
# List of the attributes to the perovskite object
print(perovskite.__dict__.keys())

dict_keys(['composition_estimate', 'sample_type', 'dimensionality', 'bandgap', 'additives', 'impurities', 'additives_abbreviations', 'additives_concentrations', 'additives_mass_fractions', 'impurities_abbreviations', 'impurities_concentrations', 'impurities_mass_fractions', 'a_ions_abbreviations', 'a_coefficients', 'b_ions_abbreviations', 'b_coefficients', 'x_ions_abbreviations', 'x_coefficients', 'path_to_reference_data', 'save_path', 'a_ions', 'b_ions', 'x_ions', 'short_form', 'long_form', 'reference_data_a_ions', 'reference_data_b_ions', 'reference_data_x_ions', 'reference_data_additive_and_impurities', 'json'])


In [5]:
# print inferred data
print(f"Composition. Long form: {perovskite.long_form}")
print(f"Composition. Short form: {perovskite.short_form}")

# Inspect data provided
print(f"composition_estimate: {perovskite.composition_estimate}")
print(f"sample_type: {perovskite.sample_type}")
print(f"dimensionality: {perovskite.dimensionality}")
print(f"bandgap: {perovskite.bandgap}")

Composition. Long form: Cs0.05FA0.18MA0.79PbBr0.5I2.5
Composition. Short form: CsFAMAPbBrI
composition_estimate: Estimated from precursor solutions
sample_type: Polycrystalline film
dimensionality: 3D
bandgap: 1.63


In [6]:
# Check how the data for the ions nad the additives and impurities have been formatted
print(f"a_ions: {perovskite.a_ions}")
print(f"b_ions: {perovskite.b_ions}")
print(f"x_ions: {perovskite.x_ions}")
print(f"Additives: {perovskite.additives}")

a_ions: [{'abbreviation': 'Cs', 'coefficient': '0.05', 'molecular_formula': 'Cs+', 'smiles': '[Cs+]', 'common_name': 'Cesium ion', 'iupac_name': 'Cesium(1+)', 'cas_number': '18459-37-5', 'source_compound_smiles': '[Cs]', 'source_compound_iupac_name': 'Cesium', 'source_compound_cas_number': '7440-46-2'}, {'abbreviation': 'FA', 'coefficient': '0.18', 'molecular_formula': 'CH5N2+', 'smiles': 'C(=[NH2+])N', 'common_name': 'Formamidinium', 'iupac_name': 'aminomethylideneazanium', 'source_compound_smiles': 'C(=N)N', 'source_compound_iupac_name': 'methanimidamide', 'source_compound_cas_number': '463-52-5'}, {'abbreviation': 'MA', 'coefficient': '0.79', 'molecular_formula': 'CH6N+', 'smiles': 'C[NH3+]', 'common_name': 'Methylammonium', 'iupac_name': 'methylazanium', 'cas_number': '17000-00-9', 'source_compound_smiles': 'CN', 'source_compound_iupac_name': 'methanamine', 'source_compound_cas_number': '74-89-5'}]
b_ions: [{'abbreviation': 'Pb', 'coefficient': '1', 'molecular_formula': 'Pb+2', 'sm

#### Convert the perovskite object to .json

In [7]:
# Convert to json format
json_data = perovskite.json

#Display json format
print(json_data)

{
    "long_form": "Cs0.05FA0.18MA0.79PbBr0.5I2.5",
    "short_form": "CsFAMAPbBrI",
    "composition_estimate": "Estimated from precursor solutions",
    "sample_type": "Polycrystalline film",
    "dimensionality": "3D",
    "band_gap": 1.63,
    "ions_a_site": [
        {
            "abbreviation": "Cs",
            "coefficient": "0.05",
            "molecular_formula": "Cs+",
            "smiles": "[Cs+]",
            "common_name": "Cesium ion",
            "iupac_name": "Cesium(1+)",
            "cas_number": "18459-37-5",
            "source_compound_smiles": "[Cs]",
            "source_compound_iupac_name": "Cesium",
            "source_compound_cas_number": "7440-46-2"
        },
        {
            "abbreviation": "FA",
            "coefficient": "0.18",
            "molecular_formula": "CH5N2+",
            "smiles": "C(=[NH2+])N",
            "common_name": "Formamidinium",
            "iupac_name": "aminomethylideneazanium",
            "source_compound_smiles": "C(=N)N

#### Save data to file

In [8]:
perovskite.save_data(file_path = save_path)

### Check the data collected and used for the ions

In [9]:
# Default file paths to reference data for ions are stored in the file filepaths.py 
# Two set of file paths are stored. 
#   "local" which is a copy distributed in the code
#   "online" which is an updated version stored online in which it is possible to add data for new ions
path_a_ions, path_b_ions, path_x_ions, path_additives = filepaths.paths_to_data(origin="local")

# Read in data
reference_data_a_ions = pd.read_excel(path_a_ions)
reference_data_b_ions = pd.read_excel(path_b_ions)
reference_data_x_ions = pd.read_excel(path_x_ions)
reference_data_additives = pd.read_excel(path_additives)

# Display the data
reference_data_a_ions


Unnamed: 0,ID,Abbreviation,Alternative_abbreviations,Molecular_formula,SMILE,Common_name,IUPAC_name,CAS,Parent_IUPAC,Parent_SMILE,Parent_CAS,Common_source_compound,Source_compound_CAS,Chemical_formula,Number_of_occurances,Source
0,1,Cs,,Cs+,[Cs+],Cesium ion,Cesium(1+),18459-37-5,Cesium,[Cs],7440-46-2,CsI,7789-17-5,,7985.0,Perovskite database
1,2,K,,K+,[K+],Patassium ion,Potassium(+1),24203-36-9,Potassium,[K],2023695,KI,7681-11-0,,36.0,Perovskite database
2,3,La,,La+,[La+],Lanthanum ion,Lanthanum ion,,Lanthanum,[La],7439-91-0,La,7439-91-0,,6.0,Perovskite database
3,4,Li,,Li+,[Li+],Lithium ion,Lithium(+1),17341-24-1,Lithium,[Li],7439-93-2,LiI,10377-51-2,,4.0,Perovskite database
4,5,Na,,Na+,[Na+],Sodium ion,Sodium(+1),17341-25-2,Sodium,[Na],7440-23-5,NaI,7681-82-5,,6.0,Perovskite database
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
290,291,ClCA,MBA,C6H13ClN+,C1CC(CCC1[NH3+])Cl,R-4-Cl-MBA,(4-chlorocyclohexyl)azanium,,4-chlorocyclohexan-1-amine,C1CC(CCC1N)Cl,,,,,,2D database
291,292,MNBA,,C5H13O2N2+,ON(O)C1CCC([NH3+])CC1,(S)-α-methyl-4-nitrobenzylamonium,[4-(dihydroxyamino)cyclohexyl]azanium,,"4-N,4-N-dihydroxycyclohexane-1,4-diamine",ON(O)C1CCC(N)CC1,,,,,,2D database
292,293,4T,,C18H16S4N+,C1=C(SC=C1)C2=CC=C(S2)C3=CC=C(S3)C4=CC=C(S4)CC...,"2-(3''',4'-dimethyl- [2,2':5',2'':5'',2'''- qu...",2-[5-[5-(5-thiophen-2-ylthiophen-2-yl)thiophen...,,2-[5-[5-(5-thiophen-2-ylthiophen-2-yl)thiophen...,C1=C(SC=C1)C2=CC=C(S2)C3=CC=C(S3)C4=CC=C(S4)CCN,,,,,,2D database
293,294,NAA,1-NA,C10H10N+,C1=CC=C2C(=C1)C=CC=C2[NH3+],1-naphthylamine,naphthalen-1-ylazanium,70450-98-5,naphthalen-1-amine,C1=CC=C2C(=C1)C=CC=C2N,134-32-7,,,,,Paper


### Default values for categorical parameters

In [11]:
print("Default values for composition estimates")
print(default_values.composition_estimates())
print("Default values for dimensionality")
print(default_values.dimensionality())
print("Default values for sample_type")
print(default_values.sample_type())

Default values for composition estimates
['Estimated from precursor solutions', 'Literature value', 'Estimated from XRD data', 'Estimated from spectroscopic data', 'Theoretical simulation', 'Hypothetical compound', 'Other']
Default values for dimensionality
['0D', '1D', '2D', '3D', '2D/3D', 'Unknown']
Default values for sample_type
['Polycrystalline film', 'Single crystal', 'Quantum dots', 'Nano rods', 'Colloidal solution', 'Other']
