# Hugging Face API inference

### Setting up

In [1]:
# libraries
import pandas as pd
import json
import os
#import re

from getpass import getpass

from langchain import HuggingFaceHub
from langchain import PromptTemplate, LLMChain
from langchain.prompts.few_shot import FewShotPromptTemplate
from langchain.chains import SequentialChain

#from Levenshtein import distance 

#from langchain.llms import OpenAI

In [2]:
def load_jsonl(path):
    lines = []
    with open(path) as file:
        lines = file.read().splitlines()

    return pd.DataFrame([json.loads(line) for line in lines])

In [3]:
aNLIPath = '/home/navitas/Documents/irsko/C3_project_CT5108/data/anli/'
aNLI_test = load_jsonl(aNLIPath+'test.jsonl')
label_test = pd.read_csv(aNLIPath+'test-labels.lst', header=None, names=['label'])
aNLI_test = aNLI_test.join(label_test)
print('test data length:', len(aNLI_test))

aNLI_train = load_jsonl(aNLIPath+'train.jsonl')
label_train = pd.read_csv(aNLIPath+'train-labels.lst', header=None, names=['label'])
aNLI_train = aNLI_train.join(label_train)

pd.set_option('display.max_colwidth', None)
aNLI_test.head(2)

test data length: 3059


Unnamed: 0,story_id,obs1,obs2,hyp1,hyp2,label
0,87aa0983-9b84-48b1-86ff-160b1567487c-1,Jane was a professor teaching piano to students.,Jane spent the morning sipping coffee and reading a book.,Two of Jane's students were early for their lessons.,None of Jane's students had a lesson that day.,2
1,dfc8584e-13fe-4e26-bdf6-2485e90ef29d-1,Nate had the summer off before college.,Nate's last summer before college was a total blast!,Nate spent the summer traveling and partying.,Nate decided to spend the entire summer working in the Mines.,1


### Logging to HuggingFace

In [4]:
# logging in
HUGGINGFACEHUB_API_TOKEN = getpass()

os.environ["HUGGINGFACEHUB_API_TOKEN"] = HUGGINGFACEHUB_API_TOKEN


 ········


### Models

In [5]:
temp = 0.1
length = 2000
tokens = 64

# FLAN-T5 model - 11.3B parameters; xl - 3B
llm_t5 = HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"max_new_tokens": tokens})#, "max_length": length})

# Falcon model
llm_falcon = HuggingFaceHub(repo_id="tiiuae/falcon-7b", model_kwargs={"max_new_tokens": tokens})#, "max_length": length})
llm_falcon_inst = HuggingFaceHub(repo_id="tiiuae/falcon-7b-instruct", model_kwargs={"max_new_tokens": tokens})#, "max_length": length})


In [None]:
prediction = give_prediction(llm_falcon_inst, overall_chain, aNLI_test, 'falcon_chain2_pokus_likely_3.csv')

## Chain

In [26]:
llm = llm_falcon_inst # llm_t5 llm_falcon_inst

In [36]:
# i could add a chain that answers why is something incoherent or less likely...

# ending sentence only and then together
ending_sentence_few = """Instruction: The task is to select one and only one of two provided sentences, nothing elese, 
to logically and coherently continue the given context. I will give you a few examples.

Context is: Alice does not have the ball. Sentence A is: Alice kept the ball. Sentence B: Alice gave the ball to Bill.
Response: Alice does not have the ball. That is because Alice gave the ball to Bill. Sentence B explains the context better.
Context is: They all loved the cool treat! Sentence A is: Everyone was given ice cream. Sentence B is: Everyone was given soup. 
Response: They all loved the cool treat! That is because Everyone was given ice cream. Sentence A explains the context better.
Context is: She bought them all. Sentence A is: The bakery had no cupcakes left. Sentence B is: The bakery only had three cupcakes left.
Response: She bought them all. That is because The bakery only had three cupcakes left. Sentence B explains the context better.

Context is: {O2} Sentence A is: {H1} Sentence B is: {H2} 
Response:
"""
ending_sentence_template_few = PromptTemplate(input_variables=["O2", "H1", "H2"], template=ending_sentence_few)
ending_sentence_few = LLMChain(llm=llm, prompt=ending_sentence_template_few, output_key="answerEF")

consistency = """Instruction: The task is to decide if the sentences in the given text are consistent (answer "yes") 
or if they contain a logical with each other (answer "no"). Then provide an explanation.
Text: {O1} {answerEF}
Response:
"""
consistency_template = PromptTemplate(input_variables=["O1", "answerEF"], template=consistency)
consistency_end_chain = LLMChain(llm=llm, prompt=consistency_template, output_key="consistency_O1")


###############


# out: Example of contradiction: I was walking to the mall. On the way, I realized I forgot my phone. I almost cracked my phone.
consistencyBoth = """Instruction: There are two independent texts, B and A. The task is to decide which one of the texts 
is not logically consistent and contains a contradiction. Consider only the information in the text. 
Now decide which of those two texts contains contradictions.
Text A: {O1} {H1} {O2} Text B: {O1} {H2} {O2} 
Response:
"""
consistency_templateBoth = PromptTemplate(input_variables=["O1", "O2", "H1", "H2"], template=consistencyBoth)
consistency_both = LLMChain(llm=llm, prompt=consistency_templateBoth, output_key="consistency_both")



###############

inconsistency = """Instruction: The task is to explain the inconsistency and logical fallacy between the sentences of given text.
Answer with phrase "It is because ___ ", do not use the words "inconsistency" and "logical fallacy" in the explanation.
Text: {O1} {H1} {O2}
Response:
"""
inconsistency_template = PromptTemplate(input_variables=["O1","H1","O2"], template=inconsistency)
inconsistency_chain = LLMChain(llm=llm, prompt=inconsistency_template, output_key="inconsistency")


correctness = """Instruction: The task is to decide if given explanation is coherent and correct.
Explanation: {inconsistency}
Response:
"""
correctness_template = PromptTemplate(input_variables=["inconsistency"], template=correctness)
correctness_chain = LLMChain(llm=llm, prompt=correctness_template, output_key="correctness")


likely_expl = """Instruction: The task is to decide which one of the explanations is more correct and more likely. 
Answer with phrase "Explanation __ is more likely because ___".
The context is: {O1} {O2}
Explanation A is: {explanation1}
Explanation B is: {explanation2}
Response:
"""
likely_expl_template = PromptTemplate(input_variables=["explanation1", "explanation2", "O1", "O2"], template=likely_expl)
likely_expl_chain = LLMChain(llm=llm, prompt=likely_expl_template, output_key="likely_expl")


################
# NOT USED

likeliness = """Instruction: The task is to decide which story (B or A) is more logically coherent and consistent, 
considering only the information in the story. 
Story A: {O1} {H1} {O2} Story B: {O1} {H2} {O2}. Which Story (B or A) is more coherent?"""
likeliness_template = PromptTemplate(input_variables=["O1", "O2", "H1", "H2"], template=likeliness)
likeliness_chain = LLMChain(llm=llm, prompt=likeliness_template, output_key="likeliness")



#  or the people or the things in the text do not match
#consistency_one = """Instruction: The task is to decide if the sentences in the given text are consistent with each other.
#Now decide if there is a logical or commonsense contradiction in the following text and explain it.
#Text: {O1} {H1} {O2}
#Response:
#"""
#consistency_one_template = PromptTemplate(input_variables=["O1", "O2", "H1"], template=consistency_one)
#consistency__one_chain = LLMChain(llm=llm, prompt=consistency_one_template, output_key="consistencyOne")


"""Example of contradiction: Jenny wanted cupcakes. The bakery had no cupcakes left. She bought them all.
Example of contradiction: It was a swelteringly hot day. Everyone was given soup. They all loved the cool treat!
Example of contradiction: Jake was at the park. Jake saw a tree. He chased after it but never caught it.
Example of contradiction: Alice has a ball. Alice kept the ball. Alice does not have the ball.
Now decide if there is a logical or commonsense contradiction in the following text and explain it."""



'Example of contradiction: Jenny wanted cupcakes. The bakery had no cupcakes left. She bought them all.\nExample of contradiction: It was a swelteringly hot day. Everyone was given soup. They all loved the cool treat!\nExample of contradiction: Jake was at the park. Jake saw a tree. He chased after it but never caught it.\nExample of contradiction: Alice has a ball. Alice kept the ball. Alice does not have the ball.\nNow decide if there is a logical or commonsense contradiction in the following text and explain it.'

In [37]:
#overall_chain = SequentialChain(chains=[ending_sentence_few, ending_sentence_few2, starting_sentence_few, consistency_chain, same_hyp_chain],
#                                input_variables=["O1","O2", "H1", "H2","answerEF", "answerSF", "answerEF2" ], verbose=True)

In [38]:
def give_prediction(model, samples, save):
    answers = []
    story_id = []
    end, consist_end, consist_both, inc1, inc2, cor1, cor2, lik_expl = [],[],[],[],[],[],[],[]
    c = 1

    for n in samples.index:
        obs1 = samples.obs1[n]
        obs2 = samples.obs2[n]
        hyp1 = samples.hyp1[n]
        hyp2 = samples.hyp2[n]

        endingF = ending_sentence_few.run({"O2":obs2,"H1":hyp1,"H2":hyp2})
        end.append(endingF)
        #print(endingF)
        CE = consistency_end_chain.run({"O1":obs1,"answerEF":endingF})
        consist_end.append(CE)
        #print(C)

        
        CB = consistency_both.run({"O1":obs1, "O2":obs2,"H1":hyp1,"H2":hyp2})
        consist_both.append(CB)
        #print(CA)

        
        h1 = inconsistency_chain.run({"O1":obs1,"O2":obs2, "H1":hyp1})
        inc1.append(h1)
        h2 = inconsistency_chain.run({"O1":obs1,"O2":obs2, "H1":hyp2})
        inc2.append(h2)

        c1 = correctness_chain.run({"inconsistency":h1})
        cor1.append(c1)
        c2 = correctness_chain.run({"inconsistency":h2})
        cor2.append(c2)

        le = likely_expl_chain.run({"explanation1":h1, "explanation2":h2, "O1":obs1, "O2":obs2})
        lik_expl.append(le)
        
        #print(t)
        #out = ChainOfThought_chain.run({"O1":obs1,"O2":obs2,"H1":hyp1, "H2":hyp2,"answerA":consA, "answerB":consB, "likeliness":lik})
        #print('ANSWER', out)
        #print('..............')

        # with chain i dont know how to get the partial answers to csv
        #answers.append(out)
        story_id.append(samples.story_id[n])

        if c%10 == 0:
            pd.DataFrame({'story_id':story_id, "end":end, "end_cons":consist_end, "consist": consist_both, 
                          "inconsH1":inc1, "inconsH2":inc2, "cor1":cor1, "cor2":cor2, "likEx":lik_expl}).to_csv(save, index=False)
            print(c)
        c += 1
        

    pd.DataFrame({'story_id':story_id, "end":end, "end_cons":consist_end, "consist": consist_both, 
                "inconsH1":inc1, "inconsH2":inc2, "cor1":cor1, "cor2":cor2, "likEx":lik_expl}).to_csv(save, index=False)
    
    print('samples answered: ', len(answers))

    return answers


In [None]:
prediction = give_prediction(llm, aNLI_test, 'falcon_multiprompt_finaly_final.csv')

10
20
30
40
50
60
70
80
90
100
110
120
130
140
150
160
170
180
190
200
210
220
230
240
250
260
270
280
290
300
310
320
330


## Legacy

In [None]:
# i could add a chain that answers why is something incoherent or less likely...

consistencyA1 = """E is: {O1} {H1} C is: {O2} Could E explain C in this context? Answer yes or no and explain why."""
consistencyA1_template = PromptTemplate(input_variables=["O1", "O2", "H1"], template=consistencyA1)
consistencyA1_chain = LLMChain(llm=llm, prompt=consistencyA1_template, output_key="answerA1")

consistencyB1 = """The task is to decide if the continuation of the story is coherent with the story and logical. The Story is: {O1} {H1}. Could the Story logically continue with the sentence "{O2}"? Answer yes or no and explain why or why not.""" 
consistencyB1_template = PromptTemplate(input_variables=["O1","O2", "H1"], template=consistencyB1)
consistencyB1_chain = LLMChain(llm=llm, prompt=consistencyB1_template, output_key="answerB1")

consistencyA2 = """E is: {O1} {H2} C is: {O2} Could E explain C in this context? Answer yes or no and explain why."""
consistencyA2_template = PromptTemplate(input_variables=["O1", "O2", "H2"], template=consistencyA2)
consistencyA2_chain = LLMChain(llm=llm, prompt=consistencyA2_template, output_key="answerA2")

consistencyB2 = """The task is to decide if the continuation of the story is coherent with the story and logical. The Story is: {O1} {H2}. Could the Story logically continue with the sentence "{O2}"? Answer yes or no and explain why or why not.""" 
consistencyB2_template = PromptTemplate(input_variables=["O1","O2", "H2"], template=consistencyB2)
consistencyB2_chain = LLMChain(llm=llm, prompt=consistencyB2_template, output_key="answerB2")


likeliness1 = """ The task is to choose a better and more coherent Explanation (B or A) for the Conclusion.

Context: Dan was digging in his yard to put in an extension to his home. Explanation A: Dan hit a pipe. Explanation B: Dan's shovel hit something soft. Conclusion: When he sniffed the air he realized he'd struck a sewage pipe.
Answer: Explanation A explains the Conclusion better because the sewage pipe is not soft. Correct explanation is A.

Context: Sam got a cold one day. Explanation A: Sam went to the library. Explanation B: He went for a run and his cold got worse. Conclusion: Sam had to take medicine and rest for a week.
Answer: The Explanation B explains the Conclusion better because the run worsened the cold so Sam had to rest. Correct explanation is B.

Context: Jenny wanted cupcakes. Explanation A: The bakery only had three cupcakes left. Explanation B: The bakery had no cupcakes left. Conclusion: She bought them all.
Answer: Explanation A explains the Conclusion better becaues the bakery had cupcakes left so Jenny bought them. Explanation A is better. Correct explanation is A.

Context: Jake was at the park. Explanation A: Jake saw a tree. Explanation B: Jake saw a squirrel. Conclusion: He chased after it but never caught it.
Answer: Explanation B explains the Conclusion better because when Jake saw a squirrel he chased after it. Correct explanation is B.

Context:  When I was younger, I went to summer camp. Explanation A: We ate soup every night. Explanation B: We played cards a lot. Conclusion: My favorite card game was poker.
Answer: Explanation B explains the Conclusion better because we played cards alot and my favorite card game was poker are related. Correct explanation is B.

Context: It was a swelteringly hot day. Explanation A: Everyone was given ice cream. Explanation B: Everyone was given soup. Conclusion: They all loved the cool treat!
Answer: Explanation A explains the Conclusion better because a cool treat could be the ice cream. Correct explanation is A.

Context: I was walking to the mall. Explanation A: I tripped on a stick. Explanation B: On the way, I realized I forgot my phone. Conclusion: I almost cracked my phone.
Answer: Explanation A explains the Conclusion better because cracking phone better explains that I tripped on a stick. Correct explanation is A.

Context: Neil was traveling in Vietnam. Explanation A:Neil decided to teach new things to local people. Explanation B: It was a college class trip. Conclusion: He had a great time on his educational trip.
Answer: Explanation B explains the Conclusion better because the educational trip was more likely a college class trip. Correct explanation is B.

The task is to choose a better and more coherent Explanation (B or A) for the Conclusion.
Context: {O1} Explanation A: {H1} Explanation B: {H2} Conclusion: {O2}
Answer: """

likeliness_template1 = PromptTemplate(input_variables=["O1", "O2", "H1", "H2"], template=likeliness1)
likeliness1_chain = LLMChain(llm=llm, prompt=likeliness_template1, output_key="likeliness1")

likeliness2 = """The task is to decide which story is more coherent, considering only the information in the story. Story A: {O1} {H1} {O2} Story B: {O1} {H2} {O2}. Which Story (B or A) is more coherent?"""
likeliness_template2 = PromptTemplate(input_variables=["O1", "O2", "H1", "H2"], template=likeliness2)
likeliness2_chain = LLMChain(llm=llm, prompt=likeliness_template2, output_key="likeliness2")

likeliness3 = """The task is to decide which Text is worst explanation for Conclusion ({O2}).
Text A is: {O1} {H1} Text B is: {O1} {H2} Which Text (B or A) is the worst explanation for Conclusion and why?"""
likeliness_template3 = PromptTemplate(input_variables=["O1", "O2", "H1", "H2"], template=likeliness3)
likeliness3_chain = LLMChain(llm=llm, prompt=likeliness_template3, output_key="likeliness3")


ChainOfThought = """Select the more probable explanation of given context from a pair of hypothesis choices and give an explanation for the choice.
Next, I will give you four examples for test.

Context: Jenny wanted cupcakes. She bought them all.
Hypothesis choices: A. The bakery only had three cupcakes left. B. The bakery had no cupcakes left.
Is the hypothesis A consistent? Yes. Is the hypothesis B consistent? No, Jenny can not buy cupcakes as there are no cupcakes left. 
The more likely is hypothesis A. The correct hypothesis is A.

Context: Jake was at the park. He chased after it but never caught it.
Hypothesis choices: A. Jake saw a tree. B. Jake saw a squirrel.
Answer: Is the hypothesis A consistent? No. Is the hypothesis B consistent? Yes. 
The more likely is hypothesis B. The correct hypothesis is B.

Context: When I was younger, I went to summer camp. My favorite card game was poker.
Hypothesis choices: A. We ate soup every night. B. We played cards a lot.
Answer: Is the hypothesis A consistent? Yes. Is the hypothesis B consistent? Yes. 
The more likely is hypothesis B. The correct hypothesis is B.

Context: It was a swelteringly hot day. They all loved the cool treat!
Hypothesis choices: A. Everyone was given ice cream. B. Everyone was given soup.
Answer: Is the hypothesis A consistent? Yes. Is the hypothesis B consistent? Yes. 
The more likely is hypothesis A. The correct hypothesis is A.

Next, I will give you an example for test.
Context: {O1} {O2} 
Hypothesis choices: A. {H1} B. {H2}
Answer: Is the hypothesis A consistent? {answerA}. Is the hypothesis B consistent? {answerB}.
The more likely is hypothesis {likeliness}.
"""

ChainOfThought_template = PromptTemplate(input_variables=["O1", "O2", "H1", "H2","answerA", "answerB", "likeliness"], template=ChainOfThought)
ChainOfThought_chain = LLMChain(llm=llm, prompt=ChainOfThought_template)


In [None]:
overall_chain = SequentialChain(chains=[likeliness1_chain, likeliness2_chain, likeliness3_chain], #likeliness_chain, ChainOfThought_chain],
                                input_variables=["O1","O2", "H1", "H2"], verbose=True)

In [None]:
def give_prediction(model, chain, samples, save):
    answers = []
    story_id = []
    cA1, cA2, cA3, cA4 = [], [], [], []
    cB1, cB2, cB3, cB4 = [],[],[],[]
    cC1, cC2, cC3, cC4 = [],[],[],[]
    likely1, likely2, likely3 = [], [], []
    c = 1

    for n in samples.index:
        obs1 = samples.obs1[n]
        obs2 = samples.obs2[n]
        hyp1 = samples.hyp1[n]
        hyp2 = samples.hyp2[n]

        #print(obs1, hyp1)
        #print(obs2)
        likely1.append(likeliness1_chain.run({"O1":obs1,"O2":obs2,"H1":hyp1, "H2":hyp2}))
        likely2.append(likeliness2_chain.run({"O1":obs1,"O2":obs2,"H1":hyp1, "H2":hyp2}))
        likely3.append(likeliness3_chain.run({"O1":obs1,"O2":obs2,"H1":hyp1, "H2":hyp2}))
        """
        consA1 = consistencyA1_chain.run({"O1":obs1,"O2":obs2,"H1":hyp1})
        cA1.append(consA1)
        cA2.append(consistencyA2_chain.run({"O1":obs1,"O2":obs2,"H2":hyp2}))
        cA3.append(consistencyA3_chain.run({"O1":obs1,"O2":obs2,"H1":hyp1}))
        cA4.append(consistencyA4_chain.run({"O1":obs1,"O2":obs2,"H2":hyp2}))
        #consB = consistencyC_chain.run({"O1":obs1,"O2":obs2,"H1":hyp2})
        #cB.append(consB)
        cB1.append(consistencyB1_chain.run({"O1":obs1,"O2":obs2,"H1":hyp1}))
        cB2.append(consistencyB2_chain.run({"O1":obs1,"O2":obs2,"H2":hyp2}))
        cB3.append(consistencyB3_chain.run({"O1":obs1,"O2":obs2,"H1":hyp1}))
        cB4.append(consistencyB4_chain.run({"O1":obs1,"O2":obs2,"H2":hyp2}))
        cC1.append(consistencyC1_chain.run({"O1":obs1,"O2":obs2,"H1":hyp1}))
        cC2.append(consistencyC2_chain.run({"O1":obs1,"O2":obs2,"H2":hyp2}))
        cC3.append(consistencyC3_chain.run({"O1":obs1,"O2":obs2,"H1":hyp1}))
        cC4.append(consistencyC4_chain.run({"O1":obs1,"O2":obs2,"H2":hyp2}))
        """

        #lik = likeliness_chain.run({"O1":obs1,"O2":obs2,"H1":hyp1, "H2":hyp2})
        #print(lik)
        #likely.append(lik)
        #out = chain.run({"O1":obs1,"O2":obs2,"H1":hyp1, "H2":hyp2})#, "likeliness":lik})
        #print('ANSWER', out)
        #print('..............')

        # with chain i dont know how to get the partial answers to csv
        #a = chain.run({"O1":obs1,"O2":obs2,"H1":hyp1, "H2":hyp2})

        #answers.append(out)
        story_id.append(samples.story_id[n])

        if c%10 == 0:
            pd.DataFrame({'story_id':story_id, "likely1":likely1, "likely2":likely2, "likely3":likely3}).to_csv(save, index=False)
            print(c)
        c += 1
        
    pd.DataFrame({'story_id':story_id, "likely1":likely1, "likely2":likely2, "likely3":likely3}).to_csv(save, index=False)
    #pd.DataFrame({'story_id':story_id, "consA1":cA1, "consB1":cB1, "consC1":cC1, # 'answer':answers
    #                      "consA2":cA2, "consB2":cB2, "consC2":cC2,
    #                      "consA3":cA3, "consB3":cB3, "consC3":cC3,
    #                      "consA4":cA4, "consB4":cB4, "consC4":cC4}).to_csv(save, index=False)
    print('samples answered: ', len(answers))

    return answers
