In [1]:
from transformers import pipeline
from neo4j import GraphDatabase
import logging
from neo4j.exceptions import ServiceUnavailable
import pandas as pd
import numpy as np
from neo4j.exceptions import Neo4jError

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import spacy
# !python -m spacy download en_core_web_sm
nlp = spacy.load('en_core_web_sm')

def extract_entities(question):
    doc = nlp(question)
    gpe_entities=[]
    date_entities=[]
    for ent in doc.ents:
        if ent.label_ =="GPE":
            gpe_entities.append(ent.text)
        if ent.label_ =="DATE":
            date_entities.append(ent.text)
        # print(ent.text, ent.start_char, ent.end_char, ent.label_)
    # print("")
    nsubj_enitities=[]
    dobj_enitities=[]
    for token in doc:
        if token.dep_ == "nsubj": 
            nsubj_enitities.append(token.text)
        if token.dep_ == "dobj": 
            dobj_enitities.append(token.text)
        # print(token.text, token.dep_, token.head.text, token.head.pos_,[child for child in token.children])
    return gpe_entities,date_entities,nsubj_enitities,dobj_enitities

In [3]:
usa_sim=["us"
,"US"
,"usa"
,"U.S."
,"USA"
,"U. S."
,"U.S.A."
,"U. S. A."
,"US of A"
,"U.S. of A"
,"U. S. of A"
,"United States"
,"United States of America"]


In [4]:
import rltk

def country_similarity(r1, r2):
    ''' Example dummy similiary function '''
    s1 = r1.lower()
    s2 = r2.lower()
    
    sim1=rltk.jaro_winkler_similarity(s1, s2)
    sim2=rltk.dice_similarity(set(s1), set(s2))
    return 0.7*sim1+0.3*sim2


def product_similarity(r1, r2):
    ''' Example dummy similiary function '''
    s1 = r1.lower()
    s2 = r2.lower()
    
    sim1=rltk.jaro_winkler_similarity(s1, s2)
    sim2=rltk.dice_similarity(set(s1), set(s2))
    return 0.7*sim1+0.3*sim2



In [5]:
import pandas as pd
cc=pd.read_csv('country_codes.csv')

In [6]:
df2 = pd.DataFrame({
"iso_3":["USA_1","USA_2","USA_3","USA_4","USA_5","USA_6","USA_7","USA_8","USA_9","USA_10","USA_11","USA_12","USA_13"],"name":
["us",
"US",
"usa",
"U.S.",
"USA",
"U. S.",
"U.S.A.",
"U. S. A.",
"US of A",
"U.S. of A",
"U. S. of A",
"United States",
"United States of America"]})

In [7]:
cc[cc['iso_3']=="USA"]

Unnamed: 0,iso_3,name
235,USA,united-states


In [8]:
cc=pd.concat([cc,df2]).reset_index().drop(columns=['index'])
cc

Unnamed: 0,iso_3,name
0,AFG,afghanistan
1,ALA,aland-islands
2,ALB,albania
3,DZA,algeria
4,ASM,american-samoa
...,...,...
257,USA_9,US of A
258,USA_10,U.S. of A
259,USA_11,U. S. of A
260,USA_12,United States


In [9]:
dict_country=dict()

for i,c in cc.iterrows():
    dict_country[c['iso_3'].lower()]=c['name']
# print(dict_country)

def find_the_matching_country(dict_country,match_country):
    track_score=dict()
    for eachCountry in dict_country.values():
        track_score[(match_country,eachCountry)]=country_similarity(eachCountry, match_country)

    return dict(sorted(track_score.items(), key=lambda item: item[1],reverse=True))

In [10]:
products=pd.read_csv('products.csv')

In [11]:
truth_product_list=list(products['section_name'].unique())

In [12]:
def find_the_matching_product(truth_product_list,match_product):
    track_score=dict()
    for eachCountry in truth_product_list:
        track_score[(match_product,eachCountry)]=country_similarity(eachCountry, match_product)
    return dict(sorted(track_score.items(), key=lambda item: item[1], reverse=True))

In [13]:
class App:

    def __init__(self, uri, user, password):
        self.driver = GraphDatabase.driver(uri, auth=(user, password))

    def close(self):
          self.driver.close()

    def c_trade(self, p1):
        with self.driver.session(database="neo4j") as session:
            return session.execute_write(
                self.fetch_c_trade, p1)

    @staticmethod
    def fetch_c_trade(tx, p1):

        query1=(
            " MATCH (t1)-[r3:exportedFrom]->(c1) MATCH (t1)-[r4:exportedTo]->(c2) MATCH (t1)-[r1:tradedYear]->(y1) WHERE c1.countryID=$p1 return c1,c2,y1,sum(t1.tradedValue) as traded_val"
            )

        result=tx.run(query1, p1=p1)
        try:
            return [[record["c1"]["name"], record["c2"]["name"],record["traded_val"],record["y1"]["year"]] for record in result]
        except Neo4jError as exception:
            logging.error("{query} raised an error: \n {exception}".format(
                query=query1, exception=exception))
            raise

    def c2c_trade(self, p1, p2):
        with self.driver.session(database="neo4j") as session:
            return session.execute_write(
                self.fetch_c2c_trade, p1, p2)

    @staticmethod
    def fetch_c2c_trade(tx, p1, p2):

        query1=(
            " MATCH (t1)-[r3:exportedFrom]->(c1) MATCH (t1)-[r4:exportedTo]->(c2) MATCH (t1)-[r1:tradedYear]->(y1) WHERE c1.countryID=$p1 AND c2.countryID=$p2 return c1,c2,y1,sum(t1.tradedValue) as traded_val"
            )

        result=tx.run(query1, p1=p1, p2=p2,)
        try:
            return [[record["c1"]["name"], record["c2"]["name"],record["traded_val"],record["y1"]["year"]] for record in result]
        except Neo4jError as exception:
            logging.error("{query} raised an error: \n {exception}".format(
                query=query1, exception=exception))
            raise

#########################################

    def c2c_trade_product(self, p1, p2):
        with self.driver.session(database="neo4j") as session:
            return session.execute_write(
                self.fetch_c2c_trade_product, p1, p2 )

    @staticmethod
    def fetch_c2c_trade_product(tx, p1, p2):

        query1=(
            " MATCH (t1)-[r3:exportedFrom]->(c1) MATCH (t1)-[r4:exportedTo]->(c2) MATCH (t1)-[r1:tradedYear]->(y1) MATCH (t1)-[r2:tradedProduct]->(p1) WHERE c1.countryID=$p1 AND c2.countryID=$p2 return c1,c2,p1,sum(t1.tradedValue) as traded_val,y1.year as year"
            )

        result=tx.run(query1, p1=p1, p2=p2)
        try:
            return [[record["c1"]["name"], record["c2"]["name"],record["traded_val"],record["p1"]["section"],record["year"]] for record in result]
        except Neo4jError as exception:
            logging.error("{query} raised an error: \n {exception}".format(
                query=query1, exception=exception))
            raise





    def c2c_products(self, p1, p2):
        with self.driver.session(database="neo4j") as session:
            return session.execute_write(
                self.fetch_c2c_products, p1, p2)

    @staticmethod
    def fetch_c2c_products(tx, p1, p2):

        query1=("MATCH (t1)-[r3:exportedFrom]->(c1) MATCH (t1)-[r4:exportedTo]->(c2) MATCH (t1)-[r2:tradedProduct]->(p1) WHERE c1.countryID=$p1 AND c2.countryID=$p2 return p1")

        result=tx.run(query1, p1=p1, p2=p2)
        try:
            return [[record["p1"]["section"]] for record in result]
        except Neo4jError as exception:
            logging.error("{query} raised an error: \n {exception}".format(
                query=query1, exception=exception))
            raise


    def c2c_products_year(self, p1, p2):
        with self.driver.session(database="neo4j") as session:
            return session.execute_write(
                self.fetch_c2c_products_year, p1, p2)

    @staticmethod
    def fetch_c2c_products_year(tx, p1, p2):

        query1=("MATCH (t1)-[r3:exportedFrom]->(c1) MATCH (t1)-[r4:exportedTo]->(c2) MATCH (t1)-[r1:tradedYear]->(y1) MATCH (t1)-[r2:tradedProduct]->(p1) WHERE c1.countryID=$p1 AND c2.countryID=$p2 return y1,p1")

        result=tx.run(query1, p1=p1, p2=p2)
        try:
            return [[record["y1"]["year"],record["p1"]["section"]] for record in result]
        except Neo4jError as exception:
            logging.error("{query} raised an error: \n {exception}".format(
                query=query1, exception=exception))
            raise

    def country(self, p1):
        with self.driver.session(database="neo4j") as session:
            return session.execute_write(
                self.fetch_country, p1)

    @staticmethod
    def fetch_country(tx, p1):

        query1=("MATCH (c1:Country) where c1.countryID=$p1 return c1")
        result=tx.run(query1, p1=p1)
        try:
            return [[record["c1"]["name"],record["c1"]["population"],record["c1"]["gdp_2020"],record["c1"]["gdp_2019"],record["c1"]["gdp_2018"],record["c1"]["gdp_2017"],record["c1"]["gdp_2016"],record["c1"]["gdp_2015"],record["c1"]["gdp_2014"],record["c1"]["gdp_2013"],record["c1"]["gdp_2012"],record["c1"]["gdp_2011"],record["c1"]["gdp_2010"]] for record in result]
        except Neo4jError as exception:
            logging.error("{query} raised an error: \n {exception}".format(
                query=query1, exception=exception))
            raise


    def product_hierarchy(self):
        with self.driver.session(database="neo4j") as session:
            return session.execute_write(
                self.fetch_product_hierarchy)

    @staticmethod
    def fetch_product_hierarchy(tx):

        query1=("match (s1)<-[r1:hasSection]-(c1) return c1,s1")
        result=tx.run(query1)
        try:
            return [[record["s1"]["section"],record["c1"]["Category"]] for record in result]
        except Neo4jError as exception:
            logging.error("{query} raised an error: \n {exception}".format(
                query=query1, exception=exception))
            raise


    def FTA(self):
        with self.driver.session(database="neo4j") as session:
            return session.execute_write(
                self.fetch_FTA)

    @staticmethod
    def fetch_FTA(tx):

        query1=("match (f1)-[r1:hasFTA]->(c1) where f1.has_fta=true return c1")
        result=tx.run(query1)
        try:
            return [[record["c1"]["name"]] for record in result]
        except Neo4jError as exception:
            logging.error("{query} raised an error: \n {exception}".format(
                query=query1, exception=exception))
            raise

In [28]:
if __name__ == "__main__":
    # Aura queries use an encrypted connection using the "neo4j+s" URI scheme
    uri = "neo4j+s://2a05e02b.databases.neo4j.io"
    user = "neo4j"
    password = "tjWEIE9P86QZZqfpN-nKF7z-rfx1KD11OfNFQmoiFr0"
    app = App(uri, user, password)
    
def get_gdp_details(buyer):
    generated_gdp=""
    output_5=pd.DataFrame(app.country(buyer),columns=["Name","Population","gdp_2020","gdp_2019","gdp_2018","gdp_2017","gdp_2016","gdp_2015","gdp_2014","gdp_2013","gdp_2012","gdp_2011","gdp_2010"])
    for i,text in output_5.iterrows():
        generated_gdp+=text['Name']+" has population of "+str(text['Population'])+ ". "+text['Name']+" had GDP of "+str(text['gdp_2020'])+" in 2020 "+text['Name']+" had GDP of "+str(text['gdp_2019'])+" in 2019. "+text['Name']+" had GDP of "+str(text['gdp_2018'])+" in 2018. "+text['Name']+" had GDP of "+str(text['gdp_2017'])+" in 2017. "+text['Name']+" had GDP of "+str(text['gdp_2016'])+" in 2016. "+text['Name']+" had GDP of "+str(text['gdp_2015'])+" in 2015. "+text['Name']+" had GDP of "+str(text['gdp_2014'])+" in 2014. "+text['Name']+" had GDP of "+str(text['gdp_2013'])+" in 2013. "+text['Name']+" had GDP of "+str(text['gdp_2012'])+" in 2012. "+text['Name']+" had GDP of "+str(text['gdp_2011'])+" in 2011. "+text['Name']+" had GDP of "+str(text['gdp_2010'])+" in 2010. "
    return generated_gdp
    
def generate_c2c_trades(buyer, seller):
    generated_c2c_trades=""
    output_3=pd.DataFrame(app.c2c_products_year(buyer,seller),columns=["Year","Section"])

    for year in list(output_3['Year'].unique()):
        line=[]
        for text in output_3[output_3['Year']==year]['Section']:
            line.append(text)
        generated_c2c_trades+= " In "+str(year)+", "+dict_country[buyer]+" exported the following products from "+dict_country[seller]+":  "+','.join(str(item) for item in line)+"."


    output_1=pd.DataFrame(app.c2c_trade(buyer,seller),columns=["Buyer","Seller","Worth","Year"])
    for i,text in output_1.iterrows():
        generated_c2c_trades+= " "+text['Buyer']+ " exported from "+text['Seller']+ " worth "+str(text['Worth'])+" in "+str(text["Year"]) +"."

    
    output_4=pd.DataFrame(app.c2c_trade_product(buyer,seller),columns=["Buyer","Seller","Worth","Product","Year"])
    for i,text in output_4.iterrows():
        generated_c2c_trades+= " "+text['Buyer']+ " exported " + text["Product"]+ " from "+text['Seller']+ " worth "+str(text['Worth'])+" in "+str(text["Year"]) +"."
    # print(output_4) 
    return generated_c2c_trades
    
def generate_products():
    output_6=pd.DataFrame(app.product_hierarchy(),columns=["Section","Category"])
    generated_products=""
    for section in list(output_6['Section'].unique()):
        line=[]
        for text in output_6[output_6['Section']==section]['Category']:
            line.append(text)
        generated_products+= " "+section+" contains "+', '.join(str(item) for item in line)+"."
    return generated_products

def generate_FTA():
    output_7=pd.DataFrame(app.FTA(),columns=["Country"])
    generated_FTAs=""
    output_7=output_7[output_7['Country']!="United States"]
    print(output_7['Country'].unique())

    line=[]
    for c in list(output_7['Country'].unique()):
        line.append(c)
    generated_FTAs+= " "+"USA has FTA with "+', '.join(str(item) for item in line)+"."
    return generated_FTAs

def generate_country_trades(buyer):
    generated_country_trades=""
    output_7=pd.DataFrame(app.c_trade(buyer),columns=["Buyer","Seller","Worth","Year"])
    country_sellers=set()
    for i,text in output_7.iterrows():
        country_sellers.add(text['Seller'])
        generated_country_trades+= " "+text['Buyer']+ " exported from "+text['Seller']+ " worth "+str(text['Worth'])+" in "+str(text["Year"]) +"."
    generated_country_trades+= " "+dict_country[buyer]+" trades with "+', '.join(str(item) for item in list(country_sellers))+"."
    return generated_country_trades
    # print(generated_text)

In [29]:
dict_country_rev=dict()
for key, value in dict_country.items():
    # do something with value
    dict_country_rev[value] = key
# dict_country_rev

In [80]:
def question_answer(question):
    gpe,date,nsubj,dobj=extract_entities(question)
    # print("\n",gpe,date,nsubj,dobj)

    question_dict=dict()
    for country in gpe:
        list_c=find_the_matching_country(dict_country,country)
        question_dict[list(list_c)[0][0]]=dict_country_rev[list(list_c)[0][1]].split("_")[0]

    if date!=[]:
        processed_year=pd.DataFrame(date,columns=['date'])
        processed_year['date'] =  pd.to_datetime(processed_year['date'])
        processed_year=processed_year['date'].dt.year
        question_dict[date[0]]=processed_year[0]

    for subj in nsubj:
        if subj not in question_dict.keys():
            if "GDP" not in subj and "product" not in subj:  
                list_p=find_the_matching_product(truth_product_list,subj)
                question_dict[list(list_p)[0][0]]=list(list_p)[0][1]
            else:
                question_dict[subj]= subj.lower()

    for word, initial in question_dict.items():
        question = question.replace(word, str(initial))
    print("Updated_question: ",question)

    qa_model = pipeline("question-answering")
    if len(gpe)>1:
        # print(question_dict[gpe[0]],question_dict[gpe[1]])
        context=generate_c2c_trades(question_dict[gpe[0]],question_dict[gpe[1]])
        print(1)
        # print(context)
        if "product" in question.lower():
            return qa_model(question = question, context = context,top_k=5)
        else:
            return qa_model(question = question, context = context)

    elif "gdp" in question.lower():
        context=get_gdp_details(question_dict[gpe[0]])
        print(2)
        return qa_model(question = question, context = context)

    elif "fta" in question.lower():
        context=generate_FTA()
        # return (context)
        print(3)
        return qa_model(question = question, context = context, top_k = 10)

    elif "contain" in question.lower() or "include" in question.lower() or "compromise" in question.lower():
        context=generate_products()
        print(4)
        return qa_model(question = question, context = context, top_k = 17)

    else:
        print(5)
        context=generate_country_trades(question_dict[gpe[0]])
        return qa_model(question = question, context = context, top_k=3)
    

In [81]:
# question = "How much metals did USA export from China in 1st Jan 2010?"
# question = "What was the GDP of United States in 2018?"
# question = "What products did US export from China by 1st jan 2020?"
# question = "What does Machine contain?"
# question = "With whom does U.S trade?"
question = "With whom does U.S has FTA with?"

print("Orginal Question: ",question)
ans=question_answer(question)
try:
    df = pd.DataFrame(ans, index=[0])
    print(df)
except:
    df = pd.DataFrame(ans)
    print(df)

No model was supplied, defaulted to distilbert-base-cased-distilled-squad and revision 626af31 (https://huggingface.co/distilbert-base-cased-distilled-squad).
Using a pipeline without specifying a model name and revision in production is not recommended.


Orginal Question:  With whom does U.S has FTA with?
Updated_question:  With whom does usa has FTA with?
['Australia' 'Bahrain' 'Canada' 'Chile' 'Colombia' 'Costa Rica'
 'Dominican Republic' 'Guatemala' 'Honduras' 'Israel' 'Jordan'
 'South Korea' 'Morocco' 'Mexico' 'Nicaragua' 'Oman' 'Panama' 'Peru'
 'Singapore' 'El Salvador']
3
      score  start  end                                             answer
0  0.667012     18   27                                          Australia
1  0.021032     18   36                                 Australia, Bahrain
2  0.016542     18   44                         Australia, Bahrain, Canada
3  0.014562     18   93  Australia, Bahrain, Canada, Chile, Colombia, C...
4  0.013742     18   73  Australia, Bahrain, Canada, Chile, Colombia, C...
5  0.010333     18   51                  Australia, Bahrain, Canada, Chile
6  0.009154     18   61        Australia, Bahrain, Canada, Chile, Colombia
7  0.006049      1   27                         USA has FTA with Austr