# Citrine data retrieval example: get experimental band gaps of PbTe from Citrine

This notebook demonstrates use of the code **'retrieve_Citrine.py'** in retrieving data from various datasets collected by Citrine Informatics at http://citrination.com/, and output it in the form of a Pandas dataframe. In this example, we query the Citrine API to retrieve all experimental band gaps of PbTe available in the database of Citrine. 

## Preamble

### Set pandas options to display all rows and columns

In [1]:
import pandas as pd

pd.set_option('display.width', 1000)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

## Step 1:

### Import matminer's Citrine data retrieval tool, and retrieve all experimental band gaps of "PbTe" from Citrine's database in a Pandas dataframe. 

In [2]:
from matminer.data_retrieval.retrieve_Citrine import CitrineDataRetrieval

api_key = None       # Set your Citrine API key here. If set as an environment variable 'CITRINE_KEY', set it to 'None'
c = CitrineDataRetrieval(api_key)     # Create an adapter to the Citrine Database.

df = c.get_dataframe(formula='PbTe', property='band gap', data_type='experimental')
df

100%|██████████| 14/14 [00:00<00:00, 67.91it/s]


Unnamed: 0_level_0,chemicalFormula,references,Band gap,Color,Crystallinity,Electroluminescence,Lasing,Morphology,Photoluminescence,Temperature derivative of band gap,Thermoluminescence,conditions,dataType,method,units
system,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
1,Pb1Te1,"[{u'doi': u'10.1063/1.3253115', u'citation': u...",,,Single crystalline,,,,,,,,,,
1,Pb1Te1,"[{u'doi': u'10.1063/1.3253115', u'citation': u...",0.29,,,,,,,,,"[{u'scalars': [{'value': u'Indirect'}], u'name...",EXPERIMENTAL,{u'name': u'Reflection'},eV
1,Pb1Te1,"[{u'doi': u'10.1063/1.3253115', u'citation': u...",,,,True,,,,,,"[{u'units': u'K', u'scalars': [{'value': u'300...",EXPERIMENTAL,,
1,Pb1Te1,"[{u'doi': u'10.1063/1.3253115', u'citation': u...",,,,,,,True,,,"[{u'units': u'K', u'scalars': [{'value': u'300...",EXPERIMENTAL,,
1,Pb1Te1,"[{u'doi': u'10.1063/1.3253115', u'citation': u...",,,,,,,,,True,"[{u'units': u'K', u'scalars': [{'value': u'300...",EXPERIMENTAL,,
2,Pb1Te1,"[{u'doi': u'10.1063/1.3253115', u'citation': u...",,,Single crystalline,,,,,,,,,,
2,Pb1Te1,"[{u'doi': u'10.1063/1.3253115', u'citation': u...",,,,,,Thin film,,,,,,,
2,Pb1Te1,"[{u'doi': u'10.1063/1.3253115', u'citation': u...",0.217,,,,,,,,,"[{u'scalars': [{'value': u'Direct'}], u'name':...",EXPERIMENTAL,{u'name': u'Magnetoabsorption'},eV
3,Pb1Te1,"[{u'doi': u'10.1063/1.3253115', u'citation': u...",,,Single crystalline,,,,,,,,,,
3,Pb1Te1,"[{u'doi': u'10.1063/1.3253115', u'citation': u...",0.21,,,,,,,,,"[{u'scalars': [{'value': u'Indirect'}], u'name...",EXPERIMENTAL,{u'name': u'Reflection'},eV


## Step 2:

### Filter out rows with null values of band gap

In [3]:
import numpy as np

df = df[np.isfinite(df['Band gap'])]
df

Unnamed: 0_level_0,chemicalFormula,references,Band gap,Color,Crystallinity,Electroluminescence,Lasing,Morphology,Photoluminescence,Temperature derivative of band gap,Thermoluminescence,conditions,dataType,method,units
system,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
1,Pb1Te1,"[{u'doi': u'10.1063/1.3253115', u'citation': u...",0.29,,,,,,,,,"[{u'scalars': [{'value': u'Indirect'}], u'name...",EXPERIMENTAL,{u'name': u'Reflection'},eV
2,Pb1Te1,"[{u'doi': u'10.1063/1.3253115', u'citation': u...",0.217,,,,,,,,,"[{u'scalars': [{'value': u'Direct'}], u'name':...",EXPERIMENTAL,{u'name': u'Magnetoabsorption'},eV
3,Pb1Te1,"[{u'doi': u'10.1063/1.3253115', u'citation': u...",0.21,,,,,,,,,"[{u'scalars': [{'value': u'Indirect'}], u'name...",EXPERIMENTAL,{u'name': u'Reflection'},eV
4,Pb1Te1,"[{u'doi': u'10.1063/1.3253115', u'citation': u...",0.29,,,,,,,,,"[{u'scalars': [{'value': u'Indirect'}], u'name...",EXPERIMENTAL,{u'name': u'Absorption'},eV
5,Pb1Te1,"[{u'doi': u'10.1063/1.3253115', u'citation': u...",0.19,,,,,,,,,"[{u'scalars': [{'value': u'Direct'}], u'name':...",EXPERIMENTAL,{u'name': u'Magnetoabsorption'},eV
6,Pb1Te1,"[{u'doi': u'10.1063/1.3253115', u'citation': u...",0.34,,,,,,,,,"[{u'scalars': [{'value': u'Direct'}], u'name':...",EXPERIMENTAL,{u'name': u'Reflection'},eV
7,Pb1Te1,"[{u'doi': u'10.1063/1.3253115', u'citation': u...",0.32,,,,,,,,,"[{u'scalars': [{'value': u'Direct'}], u'name':...",EXPERIMENTAL,{u'name': u'Absorption'},eV
8,Pb1Te1,"[{u'doi': u'10.1063/1.3253115', u'citation': u...",0.21,,,,,,,,,"[{u'scalars': [{'value': u'Direct'}], u'name':...",EXPERIMENTAL,{u'name': u'Photoconduction'},eV
9,Pb1Te1,"[{u'doi': u'10.1063/1.3253115', u'citation': u...",0.19,,,,,,,,,"[{u'scalars': [{'value': u'Indirect'}], u'name...",EXPERIMENTAL,{u'name': u'Absorption'},eV
10,Pb1Te1,"[{u'doi': u'10.1063/1.3253115', u'citation': u...",0.185,,,,,,,,,"[{u'scalars': [{'value': u'Direct'}], u'name':...",EXPERIMENTAL,{u'name': u'Photoconduction'},eV


## Step 3:

### Get basic statistics

In [4]:
df['Band gap'].describe()

count    14.000000
mean      0.245929
std       0.059461
min       0.185000
25%       0.190250
50%       0.213500
75%       0.305000
max       0.340000
Name: Band gap, dtype: float64