In [1]:
import requests
url = 'https://query.wikidata.org/sparql'
query = """
SELECT 
  ?countryLabel ?population ?area ?medianIncome ?age
WHERE {
  ?country wdt:P463 wd:Q458.
  OPTIONAL { ?country wdt:P1082 ?population }
  OPTIONAL { ?country wdt:P2046 ?area }
  OPTIONAL { ?country wdt:P3529 ?medianIncome }
  OPTIONAL { ?country wdt:P571 ?inception. 
    BIND(year(now()) - year(?inception) AS ?age)
  }
  SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
}
"""
r = requests.get(url, params = {'format': 'json', 'query': query})
data = r.json()

import pandas as pd
from collections import OrderedDict
countries = []
for item in data['results']['bindings']:
    countries.append(OrderedDict({
        'country': item['countryLabel']['value'],
        'population': item['population']['value'],
        'area': item['area']['value'] 
            if 'area' in item else None,
        'medianIncome': item['medianIncome']['value'] 
            if 'medianIncome' in item else None,
        'age': item['age']['value'] 
            if 'age' in item else None}))
df = pd.DataFrame(countries)
df.set_index('country', inplace=True)
df = df.astype({'population': float, 'area': float, 'medianIncome': float, 'age': float})
df.head()

Unnamed: 0_level_0,population,area,medianIncome,age
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Kingdom of Denmark,5930987.0,2220930.0,,1220.0
Kingdom of the Netherlands,17100715.0,42201.0,,205.0
Ireland,4761865.0,70273.0,25969.0,83.0
Belgium,11431406.0,30528.0,31112.0,190.0
Hungary,9769526.0,93011.4,,1020.0


In [11]:
import requests
import sys
from SPARQLWrapper import SPARQLWrapper, JSON
import random
url = 'https://query.wikidata.org/sparql'
query = """
    SELECT ?item ?itemLabel 
    WHERE 
    {
      ?item wdt:P31 wd:Q11344.
      SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
    }
"""
r = requests.get(url, params = {'format': 'json', 'query': query})
data = r.json()
print(data)

# import pandas as pd
# from collections import OrderedDict
# countries = []
# for item in data['results']['bindings']:
#     countries.append(OrderedDict({
#         'country': item['countryLabel']['value'],
#         'population': item['population']['value'],
#         'area': item['area']['value'] 
#             if 'area' in item else None,
#         'medianIncome': item['medianIncome']['value'] 
#             if 'medianIncome' in item else None,
#         'age': item['age']['value'] 
#             if 'age' in item else None}))
# df = pd.DataFrame(countries)
# df.set_index('country', inplace=True)
# df = df.astype({'population': float, 'area': float, 'medianIncome': float, 'age': float})
# df.head()

{'head': {'vars': ['item', 'itemLabel']}, 'results': {'bindings': [{'item': {'type': 'uri', 'value': 'http://www.wikidata.org/entity/Q556'}, 'itemLabel': {'xml:lang': 'en', 'type': 'literal', 'value': 'hydrogen'}}, {'item': {'type': 'uri', 'value': 'http://www.wikidata.org/entity/Q560'}, 'itemLabel': {'xml:lang': 'en', 'type': 'literal', 'value': 'helium'}}, {'item': {'type': 'uri', 'value': 'http://www.wikidata.org/entity/Q568'}, 'itemLabel': {'xml:lang': 'en', 'type': 'literal', 'value': 'lithium'}}, {'item': {'type': 'uri', 'value': 'http://www.wikidata.org/entity/Q569'}, 'itemLabel': {'xml:lang': 'en', 'type': 'literal', 'value': 'beryllium'}}, {'item': {'type': 'uri', 'value': 'http://www.wikidata.org/entity/Q618'}, 'itemLabel': {'xml:lang': 'en', 'type': 'literal', 'value': 'boron'}}, {'item': {'type': 'uri', 'value': 'http://www.wikidata.org/entity/Q623'}, 'itemLabel': {'xml:lang': 'en', 'type': 'literal', 'value': 'carbon'}}, {'item': {'type': 'uri', 'value': 'http://www.wikida

In [13]:
import json
print(json.dumps(data, indent=2))

{
  "head": {
    "vars": [
      "item",
      "itemLabel"
    ]
  },
  "results": {
    "bindings": [
      {
        "item": {
          "type": "uri",
          "value": "http://www.wikidata.org/entity/Q556"
        },
        "itemLabel": {
          "xml:lang": "en",
          "type": "literal",
          "value": "hydrogen"
        }
      },
      {
        "item": {
          "type": "uri",
          "value": "http://www.wikidata.org/entity/Q560"
        },
        "itemLabel": {
          "xml:lang": "en",
          "type": "literal",
          "value": "helium"
        }
      },
      {
        "item": {
          "type": "uri",
          "value": "http://www.wikidata.org/entity/Q568"
        },
        "itemLabel": {
          "xml:lang": "en",
          "type": "literal",
          "value": "lithium"
        }
      },
      {
        "item": {
          "type": "uri",
          "value": "http://www.wikidata.org/entity/Q569"
        },
        "itemLabel": {
          "xm

In [16]:
query = """SELECT ?wdLabel {
	  VALUES (?element) {(wd:"""+str("Q560")+""")}

	  ?element ?p ?statement .
	  

	  ?wd wikibase:claim ?p.
	  

	  SERVICE wikibase:label { bd:serviceParam wikibase:language "en" }
	} ORDER BY ?wd ?statement ?ps_"""


r2 = requests.get(url, params = {'format': 'json', 'query': query})
data2 = r2.json()
print(json.dumps(data2, indent=2))

{
  "head": {
    "vars": [
      "wdLabel"
    ]
  },
  "results": {
    "bindings": [
      {
        "wdLabel": {
          "xml:lang": "en",
          "type": "literal",
          "value": "named after"
        }
      },
      {
        "wdLabel": {
          "xml:lang": "en",
          "type": "literal",
          "value": "image"
        }
      },
      {
        "wdLabel": {
          "xml:lang": "en",
          "type": "literal",
          "value": "GND ID"
        }
      },
      {
        "wdLabel": {
          "xml:lang": "en",
          "type": "literal",
          "value": "CAS Registry Number"
        }
      },
      {
        "wdLabel": {
          "xml:lang": "en",
          "type": "literal",
          "value": "EC number"
        }
      },
      {
        "wdLabel": {
          "xml:lang": "en",
          "type": "literal",
          "value": "canonical SMILES"
        }
      },
      {
        "wdLabel": {
          "xml:lang": "en",
          "type": "literal"

In [18]:
for result in data2["results"]["bindings"]:
		if result['wdLabel']['value'][0] != 'P':
			print(str(result['wdLabel']['value'])+'\n')

named after

image

GND ID

CAS Registry Number

EC number

canonical SMILES

InChI

InChIKey

Library of Congress authority ID

element symbol

instance of

discoverer or inventor

discoverer or inventor

atomic number

electronegativity

refractive index

U.S. National Archives Identifier

OmegaWiki Defined Meaning

described by source

described by source

Encyclopædia Britannica Online ID

has quality

has quality

has quality

density

mass

speed of sound

speed of sound

melting point

melting point

melting point

melting point

boiling point

boiling point

NDF-RT ID

YSO ID

ECHA InfoCard ID

Bibliothèque nationale de France ID

chemical formula

subclass of

subclass of

subclass of

by-product of

UMLS CUI

Great Russian Encyclopedia Online ID

standard molar entropy

DSSTox substance ID

Encyclopædia Universalis ID

NE.se ID

RxNorm ID

Treccani ID

Quora topic ID

National Diet Library ID

part of

part of

use

use

use

use

Commons category

JSTOR topic ID

conjugate a

In [20]:
query = """SELECT ?wdLabel {
	  VALUES (?element) {(wd:"""+str("Q560")+""")}

	  ?element ?p ?statement .
	  

	  ?wd wikibase:claim ?p.
	  

	  SERVICE wikibase:label { bd:serviceParam wikibase:language "en" }
	}"""


r2 = requests.get(url, params = {'format': 'json', 'query': query})
data2 = r2.json()
print(json.dumps(data2, indent=2))

{
  "head": {
    "vars": [
      "wdLabel"
    ]
  },
  "results": {
    "bindings": [
      {
        "wdLabel": {
          "xml:lang": "en",
          "type": "literal",
          "value": "MeSH tree code"
        }
      },
      {
        "wdLabel": {
          "xml:lang": "en",
          "type": "literal",
          "value": "MeSH tree code"
        }
      },
      {
        "wdLabel": {
          "xml:lang": "en",
          "type": "literal",
          "value": "ZVG number"
        }
      },
      {
        "wdLabel": {
          "xml:lang": "en",
          "type": "literal",
          "value": "ChEBI ID"
        }
      },
      {
        "wdLabel": {
          "xml:lang": "en",
          "type": "literal",
          "value": "NKCR AUT ID"
        }
      },
      {
        "wdLabel": {
          "xml:lang": "en",
          "type": "literal",
          "value": "DrugBank ID"
        }
      },
      {
        "wdLabel": {
          "xml:lang": "en",
          "type": "litera

In [21]:
for result in data2["results"]["bindings"]:
		if result['wdLabel']['value'][0] != 'P':
			print(str(result['wdLabel']['value'])+'\n')

MeSH tree code

MeSH tree code

ZVG number

ChEBI ID

NKCR AUT ID

DrugBank ID

topic's main category

Commons gallery

spoken text audio

atomic number

electronegativity

refractive index

U.S. National Archives Identifier

OmegaWiki Defined Meaning

described by source

described by source

Encyclopædia Britannica Online ID

has quality

has quality

has quality

density

mass

speed of sound

speed of sound

melting point

melting point

melting point

melting point

boiling point

boiling point

NDF-RT ID

YSO ID

ECHA InfoCard ID

by-product of

UMLS CUI

Great Russian Encyclopedia Online ID

standard molar entropy

DSSTox substance ID

Encyclopædia Universalis ID

NE.se ID

RxNorm ID

Treccani ID

Quora topic ID

JSTOR topic ID

conjugate acid

Unicode hex codepoint

Store norske leksikon ID

ASHRAE refrigerant number

Brockhaus Enzyklopädie online ID

Römpp online ID

Common Procurement Vocabulary

Microsoft Academic ID

Klexikon article ID

Australian Educational Vocabulary ID

In [25]:
query = """SELECT ?wdLabel {
	  VALUES (?element) {(wd:"""+str("Q560")+""")}

	  ?element ?p ?statement .
	  

	  ?wd wikibase:claim ?p.

	  SERVICE wikibase:label { bd:serviceParam wikibase:language "en" }
	}"""


r2 = requests.get(url, params = {'format': 'json', 'query': query})
data2 = r2.json()
print(json.dumps(data2, indent=2))

{
  "head": {
    "vars": [
      "wdLabel"
    ]
  },
  "results": {
    "bindings": [
      {
        "wdLabel": {
          "xml:lang": "en",
          "type": "literal",
          "value": "MeSH tree code"
        }
      },
      {
        "wdLabel": {
          "xml:lang": "en",
          "type": "literal",
          "value": "MeSH tree code"
        }
      },
      {
        "wdLabel": {
          "xml:lang": "en",
          "type": "literal",
          "value": "ZVG number"
        }
      },
      {
        "wdLabel": {
          "xml:lang": "en",
          "type": "literal",
          "value": "ChEBI ID"
        }
      },
      {
        "wdLabel": {
          "xml:lang": "en",
          "type": "literal",
          "value": "NKCR AUT ID"
        }
      },
      {
        "wdLabel": {
          "xml:lang": "en",
          "type": "literal",
          "value": "DrugBank ID"
        }
      },
      {
        "wdLabel": {
          "xml:lang": "en",
          "type": "litera

In [27]:

import os 
import sys
import random

files=os.listdir('chemical_elements')
first_file=random.choice(files)
second_file=first_file
while second_file==first_file:
	second_file=random.choice(files)

file=open('chemical_elements/'+first_file,'r')
temp=file.readlines()
first=set()
for i in temp:
	first.add(i.strip())

file=open('chemical_elements/'+second_file,'r')
temp=file.readlines()
second=set()
for i in temp:
	second.add(i.strip())

difference=list(first-second)
value=random.choice(difference)
print(first_file)

print('तत्व '+str(second_file)+ ' का '+str(value)+' क्या है?')

सोडियम
तत्व कोबाल्ट का इससे अलग क्या है?


In [36]:
import sys
from SPARQLWrapper import SPARQLWrapper, JSON
import random

endpoint_url = "https://query.wikidata.org/sparql"

query = """SELECT ?item ?itemLabel 
WHERE 
{
  ?item wdt:P31 wd:Q11344.
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],hi". }
}"""

# def get_results(endpoint_url, query):
#     user_agent = "WDQS-example Python/%s.%s" % (sys.version_info[0], sys.version_info[1])
#     # TODO adjust user agent; see https://w.wiki/CX6
#     sparql = SPARQLWrapper(endpoint_url, agent=user_agent)
#     sparql.setQuery(query)
#     sparql.setReturnFormat(JSON)
#     return sparql.query().convert()

r = requests.get(url, params = {'format': 'json', 'query': query})
results = r.json()
# print(data)
# results = get_results(endpoint_url, query)
print("HERE0")
all_elements=[] # stores item name code eg: Q560
names=[] #stores name of item eg: helium
for result in results["results"]["bindings"]:
    all_elements.append(result['item']['value'].split('/')[-1])
    names.append(result['itemLabel']['value'])

properties_list=[]
count=0
print("HERE")
for i in range(0,1):  ## ?wd wikibase:claim ?p. isse saare properties aa ja re
    query = """SELECT ?wdLabel {
      VALUES (?element) {(wd:"""+str(all_elements[i])+""")}

      ?element ?p ?statement .


      ?wd wikibase:claim ?p. 


      SERVICE wikibase:label { bd:serviceParam wikibase:language "hi" }
    } ORDER BY ?wd ?statement ?ps_"""
    #ORDER BY ?wd ?statement ?ps_""" se alphabetical order me properties aare

    def get_results(endpoint_url, query):
        user_agent = "WDQS-example Python/%s.%s" % (sys.version_info[0], sys.version_info[1])
        # TODO adjust user agent; see https://w.wiki/CX6
        sparql = SPARQLWrapper(endpoint_url, agent=user_agent)
        sparql.setQuery(query)
        sparql.setReturnFormat(JSON)
        return sparql.query().convert()
    r = requests.get(url, params = {'format': 'json', 'query': query})
    results = r.json()


    #file=open(names[i],'w')
    # 	for i in properties_list:
    # 		for j in i:
    # 			print("hi")
    # 			print(j)
    # 	results = get_results(endpoint_url, query)

#     print(json.dumps(results, indent=2))
    for result in results["results"]["bindings"]:
        if result['wdLabel']['value'][0] != '':
            print(str(result['wdLabel']['value'])+'\n')
#     	file.close()

HERE0
HERE
द्वारा नामांकित

चित्र

ख़ोज की जगह

ख़ोज की जगह

ख़ोज की जगह

GND अभिज्ञापक

P231

LCCN अभिज्ञापक

P246

का उदहारण है

खोजकर्ता या आविष्कारक

खोजकर्ता या आविष्कारक

खोजकर्ता या आविष्कारक

P1014

P1036

P1036

P1051

P1086

P1108

P1109

P1121

P1121

P1121

P1245

स्रोत द्वारा वर्णित

P1417

इससे अलग

इससे अलग

इससे अलग

P2054

द्रव्यमान

उष्मा समीकरण

ध्वनि का वेग

P2101

P2102

P2114

P2116

P2152

P2260

P2347

P2581

P268

का उपवर्ग

का उपवर्ग

का उपवर्ग

P2924

P2959

P3219

P3222

P3365

P3417

NDL पहचानकर्ता

P3569

का भाग

का भाग

कॉमन्स श्रेणी

P3827

P3916

P4213

रंग

P4839

P486

P487

P5008

P5019

P5076

BNCF कोश

P575

P6366

फ्रीबेस पहचानकर्ता

P6573

P672

P672

P683

P6900

P7033

P7818

P7822

P7827

P7829

P8000

P8408

विषय की मुख्य श्रेणी

कॉमन्स गैलरी

P950



In [39]:
import requests
import sys
from SPARQLWrapper import SPARQLWrapper, JSON
import random
url = 'https://query.wikidata.org/sparql'
query = """
    SELECT ?item ?itemLabel 
    WHERE 
    {
      ?item wdt:P31 wd:Q21294996 .
      SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
    }
"""
r = requests.get(url, params = {'format': 'json', 'query': query})
data = r.json()
print(data)

{'head': {'vars': ['item', 'itemLabel']}, 'results': {'bindings': [{'item': {'type': 'uri', 'value': 'http://www.wikidata.org/entity/P5040'}, 'itemLabel': {'xml:lang': 'en', 'type': 'literal', 'value': 'GHS hazard pictogram'}}, {'item': {'type': 'uri', 'value': 'http://www.wikidata.org/entity/P5041'}, 'itemLabel': {'xml:lang': 'en', 'type': 'literal', 'value': 'GHS hazard statement'}}, {'item': {'type': 'uri', 'value': 'http://www.wikidata.org/entity/P5042'}, 'itemLabel': {'xml:lang': 'en', 'type': 'literal', 'value': 'GHS precautionary statement'}}, {'item': {'type': 'uri', 'value': 'http://www.wikidata.org/entity/P5219'}, 'itemLabel': {'xml:lang': 'en', 'type': 'literal', 'value': 'BMRB ID'}}, {'item': {'type': 'uri', 'value': 'http://www.wikidata.org/entity/P5220'}, 'itemLabel': {'xml:lang': 'en', 'type': 'literal', 'value': 'ICSC ID'}}, {'item': {'type': 'uri', 'value': 'http://www.wikidata.org/entity/P5929'}, 'itemLabel': {'xml:lang': 'en', 'type': 'literal', 'value': 'limiting ox

In [40]:
SELECT ?p1080 ?pLabel ?w ?wLabel WHERE {
   wd:Q p:P6/ps:P6 ?p .
   ?p wdt:P26 ?w .
   SERVICE wikibase:label {
    bd:serviceParam wikibase:language "en" .
   }
 }

SyntaxError: invalid syntax (<ipython-input-40-1ec873f2208a>, line 1)

In [57]:
x="P1086"
query = """
SELECT  *
WHERE {
        wd:x rdfs:label ?label .
        FILTER (langMatches( lang(?label), "EN" ) )
      } 
LIMIT 1

"""
r = requests.get(url, params = {'format': 'json', 'query': query})
data = r.json()
# print(data)
print(json.dumps(data, indent=2))

{
  "head": {
    "vars": [
      "label"
    ]
  },
  "results": {
    "bindings": []
  }
}


In [50]:
for result in data["results"]["bindings"]:
    print(str(result['label']['value']))

atomic number


In [1]:
import os 
import sys
import random
import sqlite3
from collections import defaultdict

# con=sqlite3.connect('questions.db')
# con.execute('CREATE TABLE IF NOT EXISTS theorems (SERIAL_NO Int NOT NULL, QUESTION TEXT NOT NULL) ;')
# conn=con.cursor()

files=os.listdir('chem_folder')
properties=defaultdict(int)
for i in files:
	# print(i)
	i=i.strip()
	file=open('chemical_elements/'+i,'r')
	temp=file.readlines()
	first=set()
	for j in temp:
		first.add(j.strip())
	for j in first:
		properties[j]+=1

print(properties.keys())
# final_properties=[]
# for i in properties:
# 	properties[i]/=len(files)
# 	if properties[i]>=0.75:
# 		final_properties.append(i)

# # print(final_properties)
# print(len(final_properties))


FileNotFoundError: [Errno 2] No such file or directory: 'chemical_elements/कैडमियम'

In [12]:
import os 
import sys
import random
import sqlite3
from collections import defaultdict

# con=sqlite3.connect('questions.db')
# con.execute('CREATE TABLE IF NOT EXISTS theorems (SERIAL_NO Int NOT NULL, QUESTION TEXT NOT NULL) ;')
# conn=con.cursor()

files=os.listdir('chem_folder')
properties=defaultdict(int)
for i in files:
	# print(i)
	i=i.strip()
	file=open('chem_folder/'+i,'r')
	temp=file.readlines()
	first=set()
	for j in temp:
		first.add(j.strip())
	for j in first:
		properties[j]+=1

final_properties=[]
for i in properties:
	properties[i]/=len(files)
	if properties[i]>=0.75:
		final_properties.append(i)

print(final_properties)
print(len(final_properties))


['खोजकर्ता या आविष्कारक', 'GND अभिज्ञापक', 'DSSTOX_compound_identifier', 'कॉमन्स गैलरी', 'विषय की मुख्य श्रेणी', 'time of discovery or invention', 'फ्रीबेस पहचानकर्ता', 'चित्र', 'द्रव्यमान', 'Unicode character', 'का भाग', 'UNII', 'कॉमन्स श्रेणी', 'LCCN अभिज्ञापक', 'chemical formula', 'Encyclopædia Britannica Online ID', 'का उदहारण है', 'स्रोत द्वारा वर्णित', 'Treccani ID']
19


In [5]:
import os 
import sys
import random
import sqlite3
from collections import defaultdict

# con=sqlite3.connect('questions.db')
# con.execute('CREATE TABLE IF NOT EXISTS theorems (SERIAL_NO Int NOT NULL, QUESTION TEXT NOT NULL) ;')
# conn=con.cursor()

files=os.listdir('chem_folder')
properties=defaultdict(int)
for i in files:
	# print(i)
	i=i.strip()
	file=open('chem_folder/'+i,'r')
	temp=file.readlines()
	first=set()
	for j in temp:
		first.add(j.strip())
	for j in first:
		properties[j]+=1

final_properties=[]
for i in properties:
	properties[i]/=len(files)
	if properties[i]>=0.50:
		final_properties.append(i)

print(final_properties)

['खोजकर्ता या आविष्कारक', 'NDL पहचानकर्ता', 'GND अभिज्ञापक', 'DSSTOX_compound_identifier', 'कॉमन्स गैलरी', 'विषय की मुख्य श्रेणी', 'BNCF कोश', 'time of discovery or invention', 'फ्रीबेस पहचानकर्ता', 'चित्र', 'द्वारा नामांकित', 'द्रव्यमान', 'Unicode character', 'का भाग', 'UNII', 'कॉमन्स श्रेणी', 'LCCN अभिज्ञापक', 'chemical formula', 'Encyclopædia Britannica Online ID', 'का उदहारण है', 'स्रोत द्वारा वर्णित', 'Treccani ID', 'का उपवर्ग']


In [22]:
import os 
import sys
import random
while(1):
    files=os.listdir('chem_folder')
    first_file=random.choice(files)
    second_file=first_file
    while second_file==first_file:
        second_file=random.choice(files)

    file=open('chem_folder/'+first_file,'r')
    temp=file.readlines()
    first=set()
    for i in temp:
        first.add(i.strip())

    file=open('chemical_elements/'+second_file,'r')
    temp=file.readlines()
    second=set()
    for i in temp:
        second.add(i.strip())

    difference=list(first-second)
    value=random.choice(difference)

    if(value in final_properties):
        print('तत्व '+str(second_file)+ ' का '+ '\"' +str(value)+'\"'+' क्या है?')
        break
        

तत्व ऑक्सीजन का "का भाग" क्या है?


In [41]:
## question from old properties
import sqlite3
import pandas as pd

# Create the connection
cnx = sqlite3.connect(r'../question_sch.db')
# create the dataframe from a query
df = pd.read_sql_query("SELECT * FROM ELEMENTS", cnx)
df.to_csv('ques.csv',index=False)

In [30]:
### ques after adding english properties
import sqlite3
import pandas as pd

# Create the connection
cnx = sqlite3.connect(r'questions_new.db')

# create the dataframe from a query
df = pd.read_sql_query("SELECT * FROM ELEMENTS", cnx)
df

Unnamed: 0,SERIAL_NO,QUESTION
0,1,"तत्व नाइट्रोजन का ""UNII"" क्या है?"
1,2,"तत्व नाइट्रोजन का ""chemical formula"" क्या है?"
2,3,"तत्व नाइट्रोजन का ""DSSTOX_compound_identifier""..."
3,4,"तत्व मेइट्नेरियम का ""Treccani ID"" क्या है?"
4,5,"तत्व मेइट्नेरियम का ""LCCN अभिज्ञापक"" क्या है?"
...,...,...
185,186,"तत्व थोरियम का ""chemical formula"" क्या है?"
186,187,"तत्व क्षुद्रातु का ""चित्र"" क्या है?"
187,188,"तत्व क्षुद्रातु का ""द्रव्यमान"" क्या है?"
188,189,"तत्व रेडॉन का ""चित्र"" क्या है?"


In [3]:
import sqlite3
import pandas as pd

# Create the connection
cnx = sqlite3.connect(r'../questions_new.db')

# create the dataframe from a query
df = pd.read_sql_query("SELECT * FROM ELEMENTS", cnx)
df

Unnamed: 0,SERIAL_NO,QUESTION
0,1,"तत्व नाइट्रोजन का ""DSSTOX_compound_identifier""..."
1,2,"तत्व नाइट्रोजन का ""UNII"" क्या है?"
2,3,"तत्व नाइट्रोजन का ""chemical formula"" क्या है?"
3,4,"तत्व मेइट्नेरियम का ""DSSTOX_compound_identifie..."
4,5,"तत्व मेइट्नेरियम का ""LCCN अभिज्ञापक"" क्या है?"
...,...,...
185,186,"तत्व थोरियम का ""chemical formula"" क्या है?"
186,187,"तत्व क्षुद्रातु का ""चित्र"" क्या है?"
187,188,"तत्व क्षुद्रातु का ""द्रव्यमान"" क्या है?"
188,189,"तत्व रेडॉन का ""चित्र"" क्या है?"


In [None]:
from inltk.inltk import get_similar_sentences

# get similar sentences to the one given in hindi
output = get_similar_sentences('मैं आज बहुत खुश हूं', 5, 'hi')

print(output)

In [1]:
import stanfordnlp

In [6]:
stanfordnlp.download('hi')

Using the default treebank "hi_hdtb" for language "hi".
Would you like to download the models for: hi_hdtb now? (Y/n)
y

Default download directory: /home/kumar/stanfordnlp_resources
Hit enter to continue or type an alternate directory.


Downloading models for: hi_hdtb
Download location: /home/kumar/stanfordnlp_resources/hi_hdtb_models.zip


100%|██████████| 208M/208M [04:41<00:00, 739kB/s] 



Download complete.  Models saved to: /home/kumar/stanfordnlp_resources/hi_hdtb_models.zip
Extracting models file for: hi_hdtb
Cleaning up...Done.


In [2]:
nlp = stanfordnlp.Pipeline(lang="hi", treebank="hi_hdtb")

Use device: cpu
---
Loading: tokenize
With settings: 
{'model_path': '/home/kumar/stanfordnlp_resources/hi_hdtb_models/hi_hdtb_tokenizer.pt', 'lang': 'hi', 'shorthand': 'hi_hdtb', 'mode': 'predict'}
---
Loading: pos
With settings: 
{'model_path': '/home/kumar/stanfordnlp_resources/hi_hdtb_models/hi_hdtb_tagger.pt', 'pretrain_path': '/home/kumar/stanfordnlp_resources/hi_hdtb_models/hi_hdtb.pretrain.pt', 'lang': 'hi', 'shorthand': 'hi_hdtb', 'mode': 'predict'}
---
Loading: lemma
With settings: 
{'model_path': '/home/kumar/stanfordnlp_resources/hi_hdtb_models/hi_hdtb_lemmatizer.pt', 'lang': 'hi', 'shorthand': 'hi_hdtb', 'mode': 'predict'}
Building an attentional Seq2Seq model...
Using a Bi-LSTM encoder
Using soft attention for LSTM.
Finetune all embeddings.
[Running seq2seq lemmatizer with edit classifier]
---
Loading: depparse
With settings: 
{'model_path': '/home/kumar/stanfordnlp_resources/hi_hdtb_models/hi_hdtb_parser.pt', 'pretrain_path': '/home/kumar/stanfordnlp_resources/hi_hdtb_mo

In [3]:
hindi_doc = nlp("""केंद्र की मोदी सरकार ने शुक्रवार को अपना अंतरिम बजट पेश किया. कार्यवाहक वित्त मंत्री पीयूष गोयल ने अपने बजट में किसान, मजदूर, करदाता, महिला वर्ग समेत हर किसी के लिए बंपर ऐलान किए. हालांकि, बजट के बाद भी टैक्स को लेकर काफी कन्फ्यूजन बना रहा. केंद्र सरकार के इस अंतरिम बजट क्या खास रहा और किसको क्या मिला, आसान भाषा में यहां समझें""")



In [52]:
import pandas as pd
#extract parts of speech
def extract_pos(doc):
    parsed_text = {'word':[], 'pos':[], 'exp':[]}
    for sent in doc.sentences:
        for wrd in sent.words:
            if wrd.pos in pos_dict.keys():
                pos_exp = pos_dict[wrd.pos]
            else:
                pos_exp = 'NA'
            parsed_text['word'].append(wrd.text)
            parsed_text['pos'].append(wrd.pos)
            parsed_text['exp'].append(pos_exp)
    #return a dataframe of pos and text
    return pd.DataFrame(parsed_text)
words=['द्वारा नामांकित', 'कॉमन्स श्रेणी', 'का उपवर्ग', 'विषय की मुख्य श्रेणी', 'BNCF कोश', 'द्रव्यमान', 'GND अभिज्ञापक', 'फ्रीबेस पहचानकर्ता', 'कॉमन्स गैलरी', 'LCCN अभिज्ञापक', 'NDL पहचानकर्ता', 'स्रोत द्वारा वर्णित', 'खोजकर्ता या आविष्कारक', 'उदहारण है', 'चित्र', 'ख़ोज की जगह', 'इससे अलग', 'का भाग', 'राष्ट्रीय पुस्तकालय चेक गणराज्य पहचानकर्ता', 'उष्मा समीकरण', 'रंग', 'ध्वनि का वेग', 'नाम', 'पिछला है', 'अगला है', 'देशी लेबल', 'मानचित्र छवि', 'लोगो चित्र']
for w in words:
    hindi_doc = nlp(w)
    pos_dict = {
    'CC': 'coordinating conjunction','CD': 'cardinal digit','DT': 'determiner',
    'EX': 'existential there (like: \"there is\" ... think of it like \"there exists\")',
    'FW': 'foreign word','IN':  'preposition/subordinating conjunction','JJ': 'adjective \'big\'',
    'JJR': 'adjective, comparative \'bigger\'','JJS': 'adjective, superlative \'biggest\'',
    'LS': 'list marker 1)','MD': 'modal could, will','NN': 'noun, singular \'desk\'',
    'NNS': 'noun plural \'desks\'','NNP': 'proper noun, singular \'Harrison\'',
    'NNPS': 'proper noun, plural \'Americans\'','PDT': 'predeterminer \'all the kids\'',
    'POS': 'possessive ending parent\'s','PRP': 'personal pronoun I, he, she',
    'PRP$': 'possessive pronoun my, his, hers','RB': 'adverb very, silently,',
    'RBR': 'adverb, comparative better','RBS': 'adverb, superlative best',
    'RP': 'particle give up','TO': 'to go \'to\' the store.','UH': 'interjection errrrrrrrm',
    'VB': 'verb, base form take','VBD': 'verb, past tense took',
    'VBG': 'verb, gerund/present participle taking','VBN': 'verb, past participle taken',
    'VBP': 'verb, sing. present, non-3d take','VBZ': 'verb, 3rd person sing. present takes',
    'WDT': 'wh-determiner which','WP': 'wh-pronoun who, what','WP$': 'possessive wh-pronoun whose',
    'WRB': 'wh-abverb where, when','QF' : 'quantifier, bahut, thoda, kam (Hindi)','VM' : 'main verb',
    'PSP' : 'postposition, common in indian langs','DEM' : 'demonstrative, common in indian langs'
    }
    print(extract_pos(hindi_doc))



       word  pos                                   exp
0    द्वारा  PSP  postposition, common in indian langs
1  नामांकित   JJ                       adjective 'big'
     word pos                    exp
0  कॉमन्स  VM              main verb
1  श्रेणी  NN  noun, singular 'desk'
     word  pos                                   exp
0      का  PSP  postposition, common in indian langs
1  उपवर्ग   NN                 noun, singular 'desk'
     word  pos                                   exp
0    विषय   NN                 noun, singular 'desk'
1      की  PSP  postposition, common in indian langs
2   मुख्य   JJ                       adjective 'big'
3  श्रेणी   NN                 noun, singular 'desk'




   word  pos                               exp
0  BNCF  NNP  proper noun, singular 'Harrison'
1   कोश   NN             noun, singular 'desk'
        word pos                    exp
0  द्रव्यमान  NN  noun, singular 'desk'
        word   pos                    exp
0        GND  NNPC                     NA
1  अभिज्ञापक    NN  noun, singular 'desk'
         word  pos                               exp
0     फ्रीबेस  NNP  proper noun, singular 'Harrison'
1  पहचानकर्ता   VM                         main verb




     word pos                    exp
0  कॉमन्स  VM              main verb
1   गैलरी  NN  noun, singular 'desk'
        word   pos                               exp
0       LCCN  NNPC                                NA
1  अभिज्ञापक   NNP  proper noun, singular 'Harrison'
         word   pos                    exp
0         NDL  NNPC                     NA
1  पहचानकर्ता    NN  noun, singular 'desk'




     word  pos                                   exp
0   स्रोत   NN                 noun, singular 'desk'
1  द्वारा  PSP  postposition, common in indian langs
2  वर्णित   JJ                       adjective 'big'
        word pos                       exp
0   खोजकर्ता  NN     noun, singular 'desk'
1         या  CC  coordinating conjunction
2  आविष्कारक  NN     noun, singular 'desk'
     word pos                    exp
0  उदहारण  NN  noun, singular 'desk'
1      है  VM              main verb
    word pos                    exp
0  चित्र  NN  noun, singular 'desk'
   word  pos                                   exp
0  ख़ोज   NN                 noun, singular 'desk'
1    की  PSP  postposition, common in indian langs
2   जगह   NN                 noun, singular 'desk'




   word  pos                                   exp
0  इससे  PRP           personal pronoun I, he, she
1   अलग  PSP  postposition, common in indian langs
  word  pos                                   exp
0   का  PSP  postposition, common in indian langs
1  भाग   NN                 noun, singular 'desk'
         word   pos                               exp
0   राष्ट्रीय  NNPC                                NA
1   पुस्तकालय  NNPC                                NA
2         चेक  NNPC                                NA
3     गणराज्य   NNP  proper noun, singular 'Harrison'
4  पहचानकर्ता   NNP  proper noun, singular 'Harrison'




     word  pos                    exp
0   उष्मा  NNC                     NA
1  समीकरण   NN  noun, singular 'desk'
  word pos                    exp
0  रंग  NN  noun, singular 'desk'
    word  pos                                   exp
0  ध्वनि   NN                 noun, singular 'desk'
1     का  PSP  postposition, common in indian langs
2    वेग   NN                 noun, singular 'desk'




  word pos                    exp
0  नाम  NN  noun, singular 'desk'
    word pos              exp
0  पिछला  JJ  adjective 'big'
1     है  VM        main verb
   word pos              exp
0  अगला  JJ  adjective 'big'
1    है  VM        main verb
   word pos                    exp
0  देशी  JJ        adjective 'big'
1  लेबल  NN  noun, singular 'desk'




       word pos                    exp
0  मानचित्र  JJ        adjective 'big'
1       छवि  NN  noun, singular 'desk'
    word pos                    exp
0   लोगो  NN  noun, singular 'desk'
1  चित्र  NN  noun, singular 'desk'




In [25]:
hindi_doc = nlp("""तत्व रेडॉन का द्रव्यमान कौन है?""")



In [43]:
import pandas as pd
pos_dict = {
'CC': 'coordinating conjunction','CD': 'cardinal digit','DT': 'determiner',
'EX': 'existential there (like: \"there is\" ... think of it like \"there exists\")',
'FW': 'foreign word','IN':  'preposition/subordinating conjunction','JJ': 'adjective \'big\'',
'JJR': 'adjective, comparative \'bigger\'','JJS': 'adjective, superlative \'biggest\'',
'LS': 'list marker 1)','MD': 'modal could, will','NN': 'noun, singular \'desk\'',
'NNS': 'noun plural \'desks\'','NNP': 'proper noun, singular \'Harrison\'',
'NNPS': 'proper noun, plural \'Americans\'','PDT': 'predeterminer \'all the kids\'',
'POS': 'possessive ending parent\'s','PRP': 'personal pronoun I, he, she',
'PRP$': 'possessive pronoun my, his, hers','RB': 'adverb very, silently,',
'RBR': 'adverb, comparative better','RBS': 'adverb, superlative best',
'RP': 'particle give up','TO': 'to go \'to\' the store.','UH': 'interjection errrrrrrrm',
'VB': 'verb, base form take','VBD': 'verb, past tense took',
'VBG': 'verb, gerund/present participle taking','VBN': 'verb, past participle taken',
'VBP': 'verb, sing. present, non-3d take','VBZ': 'verb, 3rd person sing. present takes',
'WDT': 'wh-determiner which','WP': 'wh-pronoun who, what','WP$': 'possessive wh-pronoun whose',
'WRB': 'wh-abverb where, when','QF' : 'quantifier, bahut, thoda, kam (Hindi)','VM' : 'main verb',
'PSP' : 'postposition, common in indian langs','DEM' : 'demonstrative, common in indian langs'
}

#extract parts of speech
def extract_pos(doc):
    parsed_text = {'word':[], 'pos':[], 'exp':[]}
    for sent in doc.sentences:
        for wrd in sent.words:
            if wrd.pos in pos_dict.keys():
                pos_exp = pos_dict[wrd.pos]
            else:
                pos_exp = 'NA'
            parsed_text['word'].append(wrd.text)
            parsed_text['pos'].append(wrd.pos)
            parsed_text['exp'].append(pos_exp)
    #return a dataframe of pos and text
    return pd.DataFrame(parsed_text)

In [44]:
extract_pos(hindi_doc)

Unnamed: 0,word,pos,exp
0,खोजकर्ता,NN,"noun, singular 'desk'"
1,या,CC,coordinating conjunction
2,आविष्कारक,NN,"noun, singular 'desk'"
3,अभिज्ञापक,JJ,adjective 'big'
4,DSSTOX_compound_identifier,VM,main verb
5,कॉमन्स,VM,main verb
6,गैलरी,NNC,
7,विषय,NN,"noun, singular 'desk'"
8,की,PSP,"postposition, common in indian langs"
9,मुख्य,JJ,adjective 'big'


In [30]:
#ip = nlp("""तत्व द्रव्यमान क्षुद्रातु का "चित्र" dicoverer खोजकर्ता या आविष्कारक क्या  chemical formula है""")
ip = nlp("""चित्र""")



In [31]:
extract_pos(ip)

Unnamed: 0,word,pos,exp
0,चित्र,NN,"noun, singular 'desk'"


In [23]:
import sqlite3
import pandas as pd

# Create the connection
cnx = sqlite3.connect(r'../questions_new.db')

# create the dataframe from a query
df = pd.read_sql_query("SELECT * FROM ELEMENTS", cnx)
df.head(60)

Unnamed: 0,SERIAL_NO,QUESTION
0,1,"तत्व नाइट्रोजन का ""DSSTOX_compound_identifier""..."
1,2,"तत्व नाइट्रोजन का ""UNII"" क्या है?"
2,3,"तत्व नाइट्रोजन का ""chemical formula"" क्या है?"
3,4,"तत्व मेइट्नेरियम का ""DSSTOX_compound_identifie..."
4,5,"तत्व मेइट्नेरियम का ""LCCN अभिज्ञापक"" क्या है?"
5,6,"तत्व मेइट्नेरियम का ""स्रोत द्वारा वर्णित"" क्या..."
6,7,"तत्व मेइट्नेरियम का ""Treccani ID"" क्या है?"
7,8,"तत्व मेइट्नेरियम का ""UNII"" क्या है?"
8,9,"तत्व मेइट्नेरियम का ""चित्र"" क्या है?"
9,10,"तत्व मेइट्नेरियम का ""द्रव्यमान"" क्या है?"


In [48]:
import pandas as pd
#extract parts of speech
def extract_pos(doc):
    parsed_text = {'word':[], 'pos':[], 'exp':[]}
    for sent in doc.sentences:
        for wrd in sent.words:
            if wrd.pos in pos_dict.keys():
                pos_exp = pos_dict[wrd.pos]
            else:
                pos_exp = 'NA'
            parsed_text['word'].append(wrd.text)
            parsed_text['pos'].append(wrd.pos)
            parsed_text['exp'].append(pos_exp)
    #return a dataframe of pos and text
    return pd.DataFrame(parsed_text)

hindi_doc = nlp("""तत्व ऑक्सीजन का "खोजकर्ता या आविष्कारक" क्या है?""")
pos_dict = {
'CC': 'coordinating conjunction','CD': 'cardinal digit','DT': 'determiner',
'EX': 'existential there (like: \"there is\" ... think of it like \"there exists\")',
'FW': 'foreign word','IN':  'preposition/subordinating conjunction','JJ': 'adjective \'big\'',
'JJR': 'adjective, comparative \'bigger\'','JJS': 'adjective, superlative \'biggest\'',
'LS': 'list marker 1)','MD': 'modal could, will','NN': 'noun, singular \'desk\'',
'NNS': 'noun plural \'desks\'','NNP': 'proper noun, singular \'Harrison\'',
'NNPS': 'proper noun, plural \'Americans\'','PDT': 'predeterminer \'all the kids\'',
'POS': 'possessive ending parent\'s','PRP': 'personal pronoun I, he, she',
'PRP$': 'possessive pronoun my, his, hers','RB': 'adverb very, silently,',
'RBR': 'adverb, comparative better','RBS': 'adverb, superlative best',
'RP': 'particle give up','TO': 'to go \'to\' the store.','UH': 'interjection errrrrrrrm',
'VB': 'verb, base form take','VBD': 'verb, past tense took',
'VBG': 'verb, gerund/present participle taking','VBN': 'verb, past participle taken',
'VBP': 'verb, sing. present, non-3d take','VBZ': 'verb, 3rd person sing. present takes',
'WDT': 'wh-determiner which','WP': 'wh-pronoun who, what','WP$': 'possessive wh-pronoun whose',
'WRB': 'wh-abverb where, when','QF' : 'quantifier, bahut, thoda, kam (Hindi)','VM' : 'main verb',
'PSP' : 'postposition, common in indian langs','DEM' : 'demonstrative, common in indian langs'
}
print(extract_pos(hindi_doc))

         word  pos                                   exp
0        तत्व   NN                 noun, singular 'desk'
1     ऑक्सीजन   NN                 noun, singular 'desk'
2          का  PSP  postposition, common in indian langs
3           "  SYM                                    NA
4    खोजकर्ता   NN                 noun, singular 'desk'
5          या   CC              coordinating conjunction
6   आविष्कारक   NN                 noun, singular 'desk'
7           "  SYM                                    NA
8        क्या   WQ                                    NA
9          है   VM                             main verb
10          ?  SYM                                    NA


