In [35]:
from matscholar.rest import Rester
from os import environ

In [36]:
#Rester object will look for the MATERIALS_SCHOLAR_API_KEY and MATERIALS_SCHOLAR_ENDPOINT environment variables by default, or we can explicitly specify them.
rest_connection = Rester(api_key=environ['MATERIALS_SCHOLAR_API_KEY'],endpoint=environ['MATERIALS_SCHOLAR_ENDPOINT'])

In [37]:
#Let's start by finding the 20 materials which are most commonly associated with "Thermoelectric"
thermoelectric_materials = rest_connection.materials_search_ents(entities=["thermoelectric"],elements=[],cutoff=20)

print("Materials most commonly associated with thermoelectric: ")
for mat,count,dois in thermoelectric_materials[:20]:
    print("%s, Abstract Count: %d"%(mat,count))


http://api.matscholar.com/search/material_search
<Response [200]>
{'valid_response': True, 'response': [['In', 148, ['10.1039/C5EE01309G', '10.1039/C3EE43240H', '10.1039/C5CP00972C', '10.1039/C5TC01977J', '10.1039/C5TC01573A', '10.1039/C7TC00009J', '10.1039/C7TC02162C', '10.1039/C3TA01599H', '10.1039/C4TA00487F', '10.1039/C4TA01333F', '10.1039/C5TA01967B', '10.1039/C6TA08788D', '10.1039/C7TA03359A', '10.1039/C5MH00021A', '10.1039/C4DT01909A', '10.1016/j.jallcom.2013.11.189', '10.1016/0022-3093(93)90060-B', '10.1039/C7CP03749J', '10.1016/j.solmat.2003.12.003', '10.1016/j.ceramint.2016.01.023', '10.1016/j.jallcom.2010.10.004', '10.1016/j.matdes.2016.03.001', '10.1016/j.matdes.2015.12.172', '10.1016/S0022-3697(03)00214-2', '10.1016/S0022-3697(02)00327-X', '10.1016/j.jpcs.2010.03.028', '10.1016/j.jpcs.2016.12.008', '10.1016/j.jallcom.2012.01.093', '10.1016/j.matchemphys.2014.03.055', '10.1016/j.matchemphys.2007.10.021', '10.1016/S0022-3093(99)00260-4', '10.1016/0022-3093(84)90736-1', '10.1

In [38]:
#What about those materials which don't contain Te?
thermoelectric_materials = rest_connection.materials_search_ents(entities=["thermoelectric"],elements=["-Te"],cutoff=20)

print("Materials most commonly associated with thermoelectric: ")
for mat,count,dois in thermoelectric_materials[:20]:
    print("%s, Abstract Count: %d"%(mat,count))

http://api.matscholar.com/search/material_search
<Response [200]>
{'valid_response': True, 'response': [['Ti', 124, ['10.1149/1.3493591', '10.1039/C3NJ00309D', '10.1039/C7CP00020K', '10.1039/C6CP06364K', '10.1039/C5TC02025E', '10.1039/C5TC01196E', '10.1039/C3TA13955G', '10.1039/C5TA01794G', '10.1039/C5TA00694E', '10.1039/C6TA04957E', '10.1039/C6TA04123J', '10.1016/j.jallcom.2014.05.087', '10.1016/0022-3093(93)90189-5', '10.1039/C4CP02561J', '10.1016/j.ceramint.2016.07.091', '10.1016/S0167-2738(01)00987-0', '10.1016/j.matlet.2004.07.041', '10.1016/j.ceramint.2015.02.083', '10.1016/j.ceramint.2015.01.157', '10.1016/j.jallcom.2010.11.027', '10.1016/0038-1098(94)90568-1', '10.1016/j.jallcom.2011.12.130', '10.1016/j.apsusc.2014.02.173', '10.1016/j.jeurceramsoc.2006.04.012', '10.1016/S0925-8388(00)01285-8', '10.1016/j.compositesb.2011.07.015', '10.1016/0167-2738(94)90130-9', '10.1016/j.jallcom.2004.05.078', '10.1016/0038-1098(81)90321-5', '10.1016/j.intermet.2006.08.008', '10.1016/j.intermet

In [49]:
#We could find out what other entities these materials are associated with too
material_summary = rest_connection.get_summary({'material': ['Sb2Te3']})
#Valid filters are "material", "property", "application", "descriptor", "characterization", "synthesis", "phase"
#We can restrict to abstracts without entities using a leading '-'

for entity_type,entities in material_summary.items():
    print(entity_type)
    for entity in entities[:5]:
        print("Entity: %s, Abstract Count: %d, Abstract Fraction: %f.2"%(entity[0],entity[1],entity[2]))


http://api.matscholar.com/ent_search/summary
<Response [200]>
{'valid_response': True, 'response': {'DSC': [['films', 143, 0.3620253164556962], ['thin films', 104, 0.26329113924050634], ['substrate', 80, 0.20253164556962025], ['layer', 76, 0.19240506329113924], ['alloy', 55, 0.13924050632911392], ['amorphous', 43, 0.10886075949367088], ['single crystal', 40, 0.10126582278481013], ['doped', 39, 0.09873417721518987], ['crystals', 35, 0.08860759493670886], ['crystalline', 31, 0.07848101265822785], ['nanostructures', 29, 0.07341772151898734], ['bulk', 27, 0.06835443037974684], ['polycrystalline', 23, 0.05822784810126582], ['surface', 23, 0.05822784810126582], ['interface', 23, 0.05822784810126582], ['stoichiometric', 21, 0.053164556962025315], ['solid solution', 18, 0.04556962025316456], ['as-prepared', 17, 0.043037974683544304], ['powder', 15, 0.0379746835443038], ['glass', 15, 0.0379746835443038], ['multilayer', 12, 0.030379746835443037], ['nanocrystalline', 12, 0.030379746835443037], ['

In [53]:
#Or we could find similar materials
#Similar materials are found using word embeddings, as per 10.1038/s41586-019-1335-8
similar_materials = rest_connection.get_similar_materials(material="Sb2Te3")
print("Similar materials to Sb2Te3:")
for material in similar_materials:
    print(material)

http://api.matscholar.com/materials/similar/Sb2Te3
<Response [200]>
{'valid_response': True, 'response': ['SbTe', 'GeSb4Te7', 'Sb2SeTe2', 'Sb2Te', 'Bi2Sb8Te15', 'Sb7Te3', 'Ge2Sb2Te5', 'GeSbTe2', 'Sb3Te', 'Sb4Te']}
Similar materials to Sb2Te3:
SbTe
GeSb4Te7
Sb2SeTe2
Sb2Te
Bi2Sb8Te15
Sb7Te3
Ge2Sb2Te5
GeSbTe2
Sb3Te
Sb4Te


In [18]:
#We can also search for abstracts instead of grouping by material
thermoelectric_abstracts = rest_connection.search_text_with_ents(text="", filters={'property':['thermoelectric']}, cutoff=100)
#We get back the first 200 characters of the abstract + metadata (eg doi, journal, authors etc.)
#Let's just print the DOIs
for abstract in thermoelectric_abstracts:
    print(abstract['doi'])


http://api.matscholar.com/search
<Response [200]>
{'valid_response': True, 'response': [{'authors': ['Mi-Kyung Han', 'Byung-Gyu Yu', 'Yingshi Jin', 'Sung-Jin Kim'], 'link': 'http://pubs.rsc.org/en/content/articlehtml/2017/qi/c6qi00544f', 'abstract': 'In this study, a series of bismuth telluride (Bi2Te3) samples doped with a metal iodide (MI; M = Cu, Cs, K) were successfully prepared by the conventional solid-state reaction. The electron and thermal transport properties, i.e., electrical conductivity, carrier concentration, Hall mobility, Seebeck coefficient, and thermal conductivity, of MI-doped Bi2Te3 were measured in the temperature range of 300–650 K to understand the effect of metal iodide doping on the thermoelectric performance of Bi2Te3. The microstructure and morphologies of MI-doped Bi2Te3 were investigated by powder X-ray diffraction and high resolution transmission electron microscopy. The electrical conductivity increases with increasing MI contents due to the co-doping eff

In [25]:
#We can also search in the text
thermoelectric_abstracts = rest_connection.search_text_with_ents(text="doped", filters={'property':['thermoelectric'],"synthesis":['solvothermal']}, cutoff=100)

for abstract in thermoelectric_abstracts:
    print(abstract['doi'])
    
#Note that the text search also includes metadata such as journal name
thermoelectric_abstracts = rest_connection.search_text_with_ents(text="\"Solid State Communications\"", filters={'property':['thermoelectric']}, cutoff=100)

for abstract in thermoelectric_abstracts:
    print(abstract['journal'],abstract['doi'])

http://api.matscholar.com/search
<Response [200]>
{'valid_response': True, 'response': [{'authors': ['Kim, Dain', 'Park, Juyun', 'Kim, Joo Hyun', 'Kang, Yong-Cheol', 'Kim, Hyun Sung'], 'link': 'https://www.sciencedirect.com/science/article/pii/S0040609017308817', 'abstract': 'Antimony (Sb)-doped tin oxide (ATO) nanoparticles with various doping amounts were prepared by solvothermal synthesis. ATO thin films were fabricated on glass substrates via spin coating using a prepared ATO nanoparticle suspension solution. The effect of doping amount and annealing temperature in production of thin film on Seebeck coefficient and electrical conductivity was investigated. The optimized doping conditions (2mol% Sb and annealing temperature=500°C) produced ATO thin films exhibiting power factors (S2σ), 64.4μWm−1K−2 which is encouraging for synthesis using inexpensive and facile solution-processing procedures. ', 'title': 'Thermoelectric properties of solution-processed antimony-doped tin oxide thin 

In [33]:
#We can even perform entity extraction on a new raw-text abstract
abstract = "Over the past years, thermoelectric Mg3Sb2 alloys particularly in n‐type conduction, have attracted increasing attentions for thermoelectric applications, due to the multivalley conduction band, abundance of constituents, and less toxicity. However, the high vapor pressure, causticity of Mg, and the high melting point of Mg3Sb2 tend to cause the inclusion in the materials of boundary phases and defects that affect the transport properties. In this work, a utilization of tantalum‐sealing for melting enables n‐type Mg3Sb2 alloys to show a substantially higher mobility than ever reported, which can be attributed to the purification of phases and to the coarse grains. Importantly, the inherently high mobility successfully enables the thermoelectric figure of merit in optimal compositions to be highly competitive to that of commercially available n‐type Bi2Te3 alloys and to be higher than that of other known n‐type thermoelectrics at 300–500 K. This work reveals Mg3Sb2 alloys as a top candidate for near‐room‐temperature thermoelectric applications."

tagged_abstract = rest_connection.get_ner_tags(docs=[abstract],return_type="normalized")
#Let's print the tagged words
for tagged_word in tagged_abstract[0][0]:
    #Most words are not entities and are tagged 'O'
    if tagged_word[1] != 'O':
        print("Word: %s, tag: %s"%(tagged_word[0],tagged_word[1]))

http://api.matscholar.com/ner
<Response [200]>
{'valid_response': True, 'response': [[[['over', 'O'], ['the', 'O'], ['past', 'O'], ['years', 'O'], [',', 'O'], ['thermoelectric', 'PRO'], ['Mg3Sb2', 'MAT'], ['alloy', 'DSC'], ['particularly', 'O'], ['in', 'O'], ['n-type conduction', 'PRO'], [',', 'O'], ['have', 'O'], ['attracted', 'O'], ['increasing', 'O'], ['attentions', 'O'], ['for', 'O'], ['thermoelectric applications', 'APL'], [',', 'O'], ['due', 'O'], ['to', 'O'], ['the', 'O'], ['multivalley', 'O'], ['conduction band', 'PRO'], [',', 'O'], ['abundance', 'O'], ['of', 'O'], ['constituents', 'O'], [',', 'O'], ['and', 'O'], ['less', 'O'], ['toxicity', 'PRO'], ['.', 'O']], [['however', 'O'], [',', 'O'], ['the', 'O'], ['high', 'O'], ['vapor', 'O'], ['pressure', 'O'], [',', 'O'], ['causticity', 'PRO'], ['of', 'O'], ['Mg', 'MAT'], [',', 'O'], ['and', 'O'], ['the', 'O'], ['high', 'O'], ['melting point', 'PRO'], ['of', 'O'], ['Mg3Sb2', 'MAT'], ['tend', 'O'], ['to', 'O'], ['cause', 'O'], ['the',

In [57]:
#We can also get Word2Vec word embeddings (as per 10.1038/s41586-019-1335-8)
embedding = rest_connection.get_embedding(wordphrases=['thermoelectric'])
print(embedding)

#Or find similar words using cosine similarity of embeddings as a metric
close_words = rest_connection.close_words(positive=['thermoelectric'])
print("Words most similar to thermoelectric:")
for word in close_words['close_words']:
    print(word)

http://api.matscholar.com/embeddings
<Response [200]>
{'valid_response': True, 'response': {'embeddings': [[0.10964030027389526, 0.023685799911618233, 0.034630101174116135, 0.009291499853134155, -0.028289999812841415, 0.026606600731611252, 0.17078959941864014, -0.120243601500988, 0.013456299901008606, 0.20113469660282135, -0.03279370069503784, 0.014385499991476536, -0.02420170046389103, -0.011055800132453442, 0.046129800379276276, 0.05212010070681572, -0.02431109920144081, 0.021949900314211845, -0.01902030035853386, 0.07509759813547134, 0.07995100319385529, -0.05699539929628372, -0.016665799543261528, -0.0376909002661705, 0.00012029999925289303, -0.06318250298500061, 0.0376909002661705, 0.16659200191497803, 0.11668000370264053, 0.15297169983386993, -0.029623599722981453, -0.03334030136466026, 0.013624699786305428, 0.08406119793653488, -0.015666600316762924, -0.1861806958913803, -0.10686369985342026, -0.03675080090761185, -0.10279729962348938, 0.02977669984102249, 0.13194000720977783, -

<Response [200]>
{'valid_response': True, 'response': {'close_words': ['thermoelectrics', 'thermoelectric_properties', 'thermoelectric_power_generation', 'thermoelectric_figure_of_merit', 'seebeck_coefficient', 'thermoelectric_generators', 'figure_of_merit_ZT', 'thermoelectricity', 'Bi2Te3', 'thermoelectric_modules'], 'original_negative': '', 'scores': [0.8435713648796082, 0.8338961601257324, 0.7931461334228516, 0.7916650772094727, 0.7754083275794983, 0.7641181349754333, 0.7587718963623047, 0.7515588402748108, 0.7480264902114868, 0.7434927225112915], 'positive': ['thermoelectric'], 'original_positive': 'thermoelectric', 'negative': []}}
Words most similar to thermoelectric:
thermoelectrics
thermoelectric_properties
thermoelectric_power_generation
thermoelectric_figure_of_merit
seebeck_coefficient
thermoelectric_generators
figure_of_merit_ZT
thermoelectricity
Bi2Te3
thermoelectric_modules
