## Install libraries

In [39]:
!pip install sentencepiece
!pip install transformers



## Load model

In [40]:
import os
from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModelForSeq2SeqLM
os.environ["HF_ENDPOINT"] = "https://huggingface.co"
from transformers import T5Tokenizer, T5ForConditionalGeneration
tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-small")
model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-small")

### Set the prompt

In [41]:
template = """You are expert in indian law.
Example 1:
Query: the appellant on february 9, 1961 was appointed as an officer in grade iii in the respondent bank ( for short'the bank'). he was promoted on april 1, 1968 to the grade officer in the foreign exchange department in the head office of the bank. sometime in 1964, mch society ( for short'the society') was formed of which the appellant was one of the chief promoters and thereafter its secretary. the object of the society was to construct residential premises for the employees of the bank and its other members. it appears that the complaint was received in respect of the affairs of the society relating to misappropriation of the funds of the society and consequently, in exercise of the powers under section s of act a1, the registrar on april 23, 1969 instituted an inquiry thereof. p1 was appointed the registrar's nominee who on october 4, 1969 ; submitted the report holding the appellant and two other office bearers of the society negligent in dealing with the funds of the society causing a loss to the tune of rs. 3, 59, 000 / -. the registrar on october 21, 1969, passed an order appointing an officer under section s of a1 to assess the loss caused to the society. however,
Docuemnt: whoever, being in any manner entrusted with property, or with any dominion over property in his capacity of a public servant or in the way of his business as a banker, merchant, factor, broker, attorney or agent, commits criminal breach of trust in respect of that property, shall be punished with 1 [ imprisonment for life ], or with imprisonment of either description for a term which may extend to ten years, and shall also be liable to fine. substituted by act 26 of 1955, section 117 and schedule, for " transportation for life " ( w. e. f. 1 - 1 - 1956 ).
Given the content of the Document, does it relevant to the Query? Yes

Example 2:
Query: the appellant on february 9, 1961 was appointed as an officer in grade iii in the respondent bank ( for short'the bank'). he was promoted on april 1, 1968 to the grade officer in the foreign exchange department in the head office of the bank. sometime in 1964, mch society ( for short'the society') was formed of which the appellant was one of the chief promoters and thereafter its secretary. the object of the society was to construct residential premises for the employees of the bank and its other members. it appears that the complaint was received in respect of the affairs of the society relating to misappropriation of the funds of the society and consequently, in exercise of the powers under section s of act a1, the registrar on april 23, 1969 instituted an inquiry thereof. p1 was appointed the registrar's nominee who on october 4, 1969 ; submitted the report holding the appellant and two other office bearers of the society negligent in dealing with the funds of the society causing a loss to the tune of rs. 3, 59, 000 / -. the registrar on october 21, 1969, passed an order appointing an officer under section s of a1 to assess the loss caused to the society. however,
Docuemnt: in this act, " dowry " means any property or valuable security given or agreed to be given either directly or indirectly - ( a ) by one party to a marriage to the other party to the marriage ; or ( b ) by the parents of either party to a marriage or by any other person, to either party to the marriage or to any other person ; at or before 1 [ or at any time after the marriage ] 2 [ in connection with the marriage of the said parties, but does not include ] dower or mahr in the case of persons to whom the muslim personal law ( shariat ) applies 3 [... ] explanation ii. - the expression " valuable security " has the same meaning as in section 30 of the indian penal code ( 45 of 1860 ). substituted by act 43 of 1986, section 2, for " or after the marriage " ( w. e. f. 19 - 11 - 1986 ). for the words " as consideration for the marriage of the said parties, but does not include ", substituted by dowry prohibition ( amendment ) act, 1984, section 2 ( a ) ( w. e. f. 2 - 10 - 1995 ). explanation i omitted by dowry prohibition ( amendment ) act, 1984
Given the content of the Document, does it relevant to the Query? No

Example 3:
{}
Given the content of the statute, does it relevant to the mentioned Legal Situation?
"""

### probability of token

In [42]:
def get_score(tokens):
  token_true_id  = tokenizer.get_vocab()['▁true']
  token_false_id = tokenizer.get_vocab()['▁false']
  import torch
  output = model.generate(
      tokens,
      max_new_tokens=1,
      return_dict_in_generate=True,
      output_scores=True,
  )
  batch_scores = output.scores[0]
  batch_scores = batch_scores[:, [token_false_id, token_true_id]]
  batch_scores = torch.nn.functional.log_softmax(batch_scores, dim=1)
  score = batch_scores[:, 1].exp().tolist()[0]
  return score

### Q_1, D1 candidate doc is relevant

In [43]:
new_input_content = """Query: having been selected by the public service commission, the respondent herein was appointed as law officer - cum - draftsman in the directorate of cooperation. there was only one post in the same cadre and it had no promotional avenues. he filed a representation that his post be upgraded or two promotional avenues be provided to him. several representations made by him having not received consideration at the hands of the appellants, the respondent herein filed a writ petition seeking for a specific direction upon the appellant herein to provide at least two promotional avenues. the said contention of the respondent was accepted by the high court and by reason of its impugned judgment the appellant was directed to provide'the graded scale'to the appellant by providing three grades, the initial being grade iii which is the post of law officer cum draftsman and thereafter grade ii and grade i. officer of judicial service. it was further directed : " " the scale of pay of grade ii law officer - cum - draftsman shall be same as grade - ii officer of the judicial service. the scale of pay of grade - i law officer - cum - draftsman shall be equal to the scale of pay of grade - i officer of judicial service. " " questioning
Document: ( 1 ) notwithstanding anything in article 32 every high court shall have powers, throughout the territories in relation to which it exercise jurisdiction, to issue to any person or authority, including in appropriate cases, any government, within those territories directions, orders or writs, including writs in the nature of habeas corpus, mandamus, prohibitions, quo warranto and certiorari, or any of them, for the enforcement of any of the rights conferred by part iii and for any other purpose ( 2 ) the power conferred by clause ( 1 ) to issue directions, orders or writs to any government, authority or person may also be exercised by any high court exercising jurisdiction in relation to the territories within which the cause of action, wholly or in part, arises for the exercise of such power, notwithstanding that the seat of such government or authority or the residence of such person is not within those territories ( 3 ) where any party against whom an interim order, whether by way of injunction or stay or in any other manner, is made on, or in any proceedings relating to, a petition under clause ( 1 ), without ( a ) furnishing to such party copies of such petition and all documents in support of the plea for such interim
"""
tokens = tokenizer(template.format(new_input_content), return_tensors="pt").input_ids
outputs = model.generate(tokens)
str_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(str_output)
score = get_score(tokens)
print(score)


Token indices sequence length is longer than the specified maximum sequence length for this model (1688 > 512). Running this sequence through the model will result in indexing errors


No
0.8129456639289856


### Q_1, D2 candidate doc is non-relevant and should achieve lower score compared to the above one!

In [44]:
new_input_content = """Query: having been selected by the public service commission, the respondent herein was appointed as law officer - cum - draftsman in the directorate of cooperation. there was only one post in the same cadre and it had no promotional avenues. he filed a representation that his post be upgraded or two promotional avenues be provided to him. several representations made by him having not received consideration at the hands of the appellants, the respondent herein filed a writ petition seeking for a specific direction upon the appellant herein to provide at least two promotional avenues. the said contention of the respondent was accepted by the high court and by reason of its impugned judgment the appellant was directed to provide'the graded scale'to the appellant by providing three grades, the initial being grade iii which is the post of law officer cum draftsman and thereafter grade ii and grade i. officer of judicial service. it was further directed : " " the scale of pay of grade ii law officer - cum - draftsman shall be same as grade - ii officer of the judicial service. the scale of pay of grade - i law officer - cum - draftsman shall be equal to the scale of pay of grade - i officer of judicial service. " " questioning
Document: ( 1 ) whenever it appears to the appropriate government that land in any locality is needed or is likely to be needed for any public purpose 1 [ or for a company ], a notification to that effect shall be published in the official gazette 2 [ and in two daily newspapers circulating in that locality of which at least one shall be in the regional language ] and the collector shall cause public notice of the substance of such notification to be given at convenient places in the said locality 3 [ the last of the dates of such publication and the giving of such public notice, being hereinafter referred to as the date of publication of the notification ]. ( 2 ) thereupon it shall be lawful for any officer, either, generally or specially authorised by such government in this behalf, and for his servants and workmen, to enter upon and survey and take levels of any land in such locality ; to dig or bore in the sub - soil ; to do all other acts necessary to ascertain whether the land is adapted for such purpose ; to set out the boundaries of the land proposed to be taken and the intended line of the work ( if any ) proposed to be made thereon ; to mark such levels, boundaries and line by placing marks and cutting trenches,
"""
tokens = tokenizer(template.format(new_input_content), return_tensors="pt").input_ids
outputs = model.generate(tokens)
str_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(str_output)
score = get_score(tokens)
print(score)


No
0.8138723373413086


D1 score > D2 score. The model did a good relevance ordering at least for these two examples


# Try to assess the effectiveness on the validation set (queries_val.tsv)


## downloading files

In [45]:
!wget https://www.dropbox.com/scl/fi/md1qj0dz07wi66lan1aah/afternoon_session_files.zip?rlkey=ficet9hbs55wxs7e11u6yi9w0&dl=0
!unzip afternoon_session_files.zip?rlkey=ficet9hbs55wxs7e11u6yi9w0&dl=0

--2023-08-30 14:18:40--  https://www.dropbox.com/scl/fi/md1qj0dz07wi66lan1aah/afternoon_session_files.zip?rlkey=ficet9hbs55wxs7e11u6yi9w0
Resolving www.dropbox.com (www.dropbox.com)... 162.125.65.18, 2620:100:6021:18::a27d:4112
Connecting to www.dropbox.com (www.dropbox.com)|162.125.65.18|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://www.dropbox.com/e/scl/fi/md1qj0dz07wi66lan1aah/afternoon_session_files.zip?rlkey=ficet9hbs55wxs7e11u6yi9w0 [following]
--2023-08-30 14:18:41--  https://www.dropbox.com/e/scl/fi/md1qj0dz07wi66lan1aah/afternoon_session_files.zip?rlkey=ficet9hbs55wxs7e11u6yi9w0
Reusing existing connection to www.dropbox.com:443.
HTTP request sent, awaiting response... 302 Found
Location: https://ucca3c1b15ce7f24f2d6b3365243.dl.dropboxusercontent.com/cd/0/inline/CCwcS5eTqowGgJFg6xlIUaS6QVpkwfAccFxzPkn5dl8MekED0ABJmUGSysGIJ9owWt5oafuYq5dnL4gNeZintHW9iQ67AR6LqcQywdw71i8Nqbt3rdPnKmGEvbXcCtkhHO9f62ZdHJnaLGw1GIdLBR3e/file# [following]
--2023

## Steps
1. Iterate on each query
2. Per candidate document for that query, compute the score
3. Create ranking run file in TREC format
4. Evalute the effectiveness
5. Each one can present their results and how they designed and selected LLM at the end and try to improve it in future.
Note:

- Using larger LLM takes more time but probably lead to higher effectiveness so you can take this into the account.

- For loading larger model with less gpu you can take a look on https://huggingface.co/blog/4bit-transformers-bitsandbytes for maybe experimenting with larger models for friday.

- Feel free to select another LLM based on the leaderboard: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard

- Feel free to change the prompt

- Feel free to use the utils code and implementations from previous notebook




### read collections

In [46]:
import tqdm

def read_collection(f_path):
  corpus = {}
  with open(f_path, "r") as fp:
    for line in tqdm.tqdm(fp, desc="reading {}".format(f_path)):
      did, dtext = line.strip().split("\t")
      corpus[did] = dtext
  return corpus
from glob import glob
def read_aila_documents(f_path):
  files = glob(corpus_path+"*.txt")
  corpus = {}
  for file_ in tqdm.tqdm(files, desc="reading {}".format(f_path)):
    content = open(file_, "r").read().split("\n")[1].split(":")[1]
    doc_id = file_.split("/")[-1].replace(".txt", "")
    corpus[doc_id] = content
  return corpus

from glob import glob
def read_aila_documents(f_path):
  files = glob(corpus_path+"*.txt")
  corpus = {}
  for file_ in tqdm.tqdm(files, desc="reading {}".format(f_path)):
    content = open(file_, "r").read().split("\n")[1].split(":")[1]
    doc_id = file_.split("/")[-1].replace(".txt", "")
    corpus[doc_id] = content
  return corpus
def read_top1000_run(f_path, corpus, queries, separator = " "):
  samples = {}
  with open(f_path, "r") as fp:
    for line in tqdm.tqdm(fp, desc="reading {}".format(f_path)):
      qid, _, did, rank, score, __ = line.strip().split(separator)
      if qid not in queries: continue
      query = queries[qid]
      if qid not in samples:
        samples[qid] = {'qid': qid , 'query': query, 'docs': list(), 'docs_ids': list()}
      samples[qid]['docs'].append(corpus[did])
      samples[qid]['docs_ids'].append(did)
  return samples

In [47]:
queries_path = "queries_val.tsv"
corpus_path = "./Object_statutes/*"
queries = read_collection(queries_path)
corpus = read_aila_documents(corpus_path)

reading queries_val.tsv: 10it [00:00, 49402.87it/s]
reading ./Object_statutes/*: 100%|██████████| 197/197 [00:00<00:00, 33784.92it/s]


In [48]:
test_samples = read_top1000_run("base_run.txt", corpus, queries, separator = " ")

reading base_run.txt: 5000it [00:00, 1099136.27it/s]


## get_score_given_q_and_d

In [69]:
def get_score_given_q_and_d(document, query):
  # template = """You are expert in indian law.
  # Example 1:
  # Query: the appellant on february 9, 1961 was appointed as an officer in grade iii in the respondent bank ( for short'the bank'). he was promoted on april 1, 1968 to the grade officer in the foreign exchange department in the head office of the bank. sometime in 1964, mch society ( for short'the society') was formed of which the appellant was one of the chief promoters and thereafter its secretary. the object of the society was to construct residential premises for the employees of the bank and its other members. it appears that the complaint was received in respect of the affairs of the society relating to misappropriation of the funds of the society and consequently, in exercise of the powers under section s of act a1, the registrar on april 23, 1969 instituted an inquiry thereof. p1 was appointed the registrar's nominee who on october 4, 1969 ; submitted the report holding the appellant and two other office bearers of the society negligent in dealing with the funds of the society causing a loss to the tune of rs. 3, 59, 000 / -. the registrar on october 21, 1969, passed an order appointing an officer under section s of a1 to assess the loss caused to the society. however,
  # Docuemnt: whoever, being in any manner entrusted with property, or with any dominion over property in his capacity of a public servant or in the way of his business as a banker, merchant, factor, broker, attorney or agent, commits criminal breach of trust in respect of that property, shall be punished with 1 [ imprisonment for life ], or with imprisonment of either description for a term which may extend to ten years, and shall also be liable to fine. substituted by act 26 of 1955, section 117 and schedule, for " transportation for life " ( w. e. f. 1 - 1 - 1956 ).
  # Given the content of the Document, does it relevant to the Query? Yes

  # Example 2:
  # Query: the appellant on february 9, 1961 was appointed as an officer in grade iii in the respondent bank ( for short'the bank'). he was promoted on april 1, 1968 to the grade officer in the foreign exchange department in the head office of the bank. sometime in 1964, mch society ( for short'the society') was formed of which the appellant was one of the chief promoters and thereafter its secretary. the object of the society was to construct residential premises for the employees of the bank and its other members. it appears that the complaint was received in respect of the affairs of the society relating to misappropriation of the funds of the society and consequently, in exercise of the powers under section s of act a1, the registrar on april 23, 1969 instituted an inquiry thereof. p1 was appointed the registrar's nominee who on october 4, 1969 ; submitted the report holding the appellant and two other office bearers of the society negligent in dealing with the funds of the society causing a loss to the tune of rs. 3, 59, 000 / -. the registrar on october 21, 1969, passed an order appointing an officer under section s of a1 to assess the loss caused to the society. however,
  # Docuemnt: in this act, " dowry " means any property or valuable security given or agreed to be given either directly or indirectly - ( a ) by one party to a marriage to the other party to the marriage ; or ( b ) by the parents of either party to a marriage or by any other person, to either party to the marriage or to any other person ; at or before 1 [ or at any time after the marriage ] 2 [ in connection with the marriage of the said parties, but does not include ] dower or mahr in the case of persons to whom the muslim personal law ( shariat ) applies 3 [... ] explanation ii. - the expression " valuable security " has the same meaning as in section 30 of the indian penal code ( 45 of 1860 ). substituted by act 43 of 1986, section 2, for " or after the marriage " ( w. e. f. 19 - 11 - 1986 ). for the words " as consideration for the marriage of the said parties, but does not include ", substituted by dowry prohibition ( amendment ) act, 1984, section 2 ( a ) ( w. e. f. 2 - 10 - 1995 ). explanation i omitted by dowry prohibition ( amendment ) act, 1984
  # Given the content of the Document, does it relevant to the Query? No

  # Example 3:
  # {}
  # Given the content of the statute, does it relevant to the mentioned Legal Situation?
  # """

  # temlate 2 by max
  # template = """You are expert in indian law.
  # Example 1:
  # Query: the appellant on february 9, 1961 was appointed as an officer in grade iii in the respondent bank ( for short'the bank'). he was promoted on april 1, 1968 to the grade officer in the foreign exchange department in the head office of the bank. sometime in 1964, mch society ( for short'the society') was formed of which the appellant was one of the chief promoters and thereafter its secretary. the object of the society was to construct residential premises for the employees of the bank and its other members. it appears that the complaint was received in respect of the affairs of the society relating to misappropriation of the funds of the society and consequently, in exercise of the powers under section s of act a1, the registrar on april 23, 1969 instituted an inquiry thereof. p1 was appointed the registrar's nominee who on october 4, 1969 ; submitted the report holding the appellant and two other office bearers of the society negligent in dealing with the funds of the society causing a loss to the tune of rs. 3, 59, 000 / -. the registrar on october 21, 1969, passed an order appointing an officer under section s of a1 to assess the loss caused to the society. however,
  # Docuemnt: whoever, being in any manner entrusted with property, or with any dominion over property in his capacity of a public servant or in the way of his business as a banker, merchant, factor, broker, attorney or agent, commits criminal breach of trust in respect of that property, shall be punished with 1 [ imprisonment for life ], or with imprisonment of either description for a term which may extend to ten years, and shall also be liable to fine. substituted by act 26 of 1955, section 117 and schedule, for " transportation for life " ( w. e. f. 1 - 1 - 1956 ).
  # Given the content of the Document, does it relevant to the Query? Yes

  # Example 2:
  # Query: the appellant on february 9, 1961 was appointed as an officer in grade iii in the respondent bank ( for short'the bank'). he was promoted on april 1, 1968 to the grade officer in the foreign exchange department in the head office of the bank. sometime in 1964, mch society ( for short'the society') was formed of which the appellant was one of the chief promoters and thereafter its secretary. the object of the society was to construct residential premises for the employees of the bank and its other members. it appears that the complaint was received in respect of the affairs of the society relating to misappropriation of the funds of the society and consequently, in exercise of the powers under section s of act a1, the registrar on april 23, 1969 instituted an inquiry thereof. p1 was appointed the registrar's nominee who on october 4, 1969 ; submitted the report holding the appellant and two other office bearers of the society negligent in dealing with the funds of the society causing a loss to the tune of rs. 3, 59, 000 / -. the registrar on october 21, 1969, passed an order appointing an officer under section s of a1 to assess the loss caused to the society. however,
  # Docuemnt: in this act, " dowry " means any property or valuable security given or agreed to be given either directly or indirectly - ( a ) by one party to a marriage to the other party to the marriage ; or ( b ) by the parents of either party to a marriage or by any other person, to either party to the marriage or to any other person ; at or before 1 [ or at any time after the marriage ] 2 [ in connection with the marriage of the said parties, but does not include ] dower or mahr in the case of persons to whom the muslim personal law ( shariat ) applies 3 [... ] explanation ii. - the expression " valuable security " has the same meaning as in section 30 of the indian penal code ( 45 of 1860 ). substituted by act 43 of 1986, section 2, for " or after the marriage " ( w. e. f. 19 - 11 - 1986 ). for the words " as consideration for the marriage of the said parties, but does not include ", substituted by dowry prohibition ( amendment ) act, 1984, section 2 ( a ) ( w. e. f. 2 - 10 - 1995 ). explanation i omitted by dowry prohibition ( amendment ) act, 1984
  # Given the content of the Document, does it relevant to the Query? No


  # Given the content of the statute, does it relate to the mentioned Legal Situation?
  # {}
  # """

  # tempalte 3 by Jihwan
  template = """You are expert in indian law.
Example 1:
Query: the appellant on february 9, 1961 was appointed as an officer in grade iii in the respondent bank ( for short'the bank'). he was promoted on april 1, 1968 to the grade officer in the foreign exchange department in the head office of the bank. sometime in 1964, mch society ( for short'the society') was formed of which the appellant was one of the chief promoters and thereafter its secretary. the object of the society was to construct residential premises for the employees of the bank and its other members. it appears that the complaint was received in respect of the affairs of the society relating to misappropriation of the funds of the society and consequently, in exercise of the powers under section s of act a1, the registrar on april 23, 1969 instituted an inquiry thereof. p1 was appointed the registrar's nominee who on october 4, 1969 ; submitted the report holding the appellant and two other office bearers of the society negligent in dealing with the funds of the society causing a loss to the tune of rs. 3, 59, 000 / -. the registrar on october 21, 1969, passed an order appointing an officer under section s of a1 to assess the loss caused to the society. however,
Docuemnt: whoever, being in any manner entrusted with property, or with any dominion over property in his capacity of a public servant or in the way of his business as a banker, merchant, factor, broker, attorney or agent, commits criminal breach of trust in respect of that property, shall be punished with 1 [ imprisonment for life ], or with imprisonment of either description for a term which may extend to ten years, and shall also be liable to fine. substituted by act 26 of 1955, section 117 and schedule, for " transportation for life " ( w. e. f. 1 - 1 - 1956 ).
Given the content of the Document, does it relate to the Query? Yes

Example 2:
Query: the appellant on february 9, 1961 was appointed as an officer in grade iii in the respondent bank ( for short'the bank'). he was promoted on april 1, 1968 to the grade officer in the foreign exchange department in the head office of the bank. sometime in 1964, mch society ( for short'the society') was formed of which the appellant was one of the chief promoters and thereafter its secretary. the object of the society was to construct residential premises for the employees of the bank and its other members. it appears that the complaint was received in respect of the affairs of the society relating to misappropriation of the funds of the society and consequently, in exercise of the powers under section s of act a1, the registrar on april 23, 1969 instituted an inquiry thereof. p1 was appointed the registrar's nominee who on october 4, 1969 ; submitted the report holding the appellant and two other office bearers of the society negligent in dealing with the funds of the society causing a loss to the tune of rs. 3, 59, 000 / -. the registrar on october 21, 1969, passed an order appointing an officer under section s of a1 to assess the loss caused to the society. however,
Docuemnt: in this act, " dowry " means any property or valuable security given or agreed to be given either directly or indirectly - ( a ) by one party to a marriage to the other party to the marriage ; or ( b ) by the parents of either party to a marriage or by any other person, to either party to the marriage or to any other person ; at or before 1 [ or at any time after the marriage ] 2 [ in connection with the marriage of the said parties, but does not include ] dower or mahr in the case of persons to whom the muslim personal law ( shariat ) applies 3 [... ] explanation ii. - the expression " valuable security " has the same meaning as in section 30 of the indian penal code ( 45 of 1860 ). substituted by act 43 of 1986, section 2, for " or after the marriage " ( w. e. f. 19 - 11 - 1986 ). for the words " as consideration for the marriage of the said parties, but does not include ", substituted by dowry prohibition ( amendment ) act, 1984, section 2 ( a ) ( w. e. f. 2 - 10 - 1995 ). explanation i omitted by dowry prohibition ( amendment ) act, 1984
Given the content of the Document, does it relate to the Query? No

Example 3:
Query: Company A, a multinational tech corporation, entered into a licensing agreement with Company B on January 15, 2022. The agreement granted Company B the right to use certain patented technologies owned by Company A in their products. In return, Company B agreed to pay a licensing fee to Company A based on the sales of their products incorporating the patented technologies. However, by the end of the year, Company B had failed to make the full payment as stipulated in the agreement. As a result, Company A sent a notice of breach of contract to Company B on March 2, 2023, demanding the outstanding payment.
Document: Any party that enters into a contract with another party wherein certain considerations, such as payment or services, are agreed upon, is legally bound to fulfill those obligations as per the terms outlined in the contract. Failure to fulfill these obligations constitutes a breach of contract. The non-breaching party has the right to send a notice of breach of contract to the breaching party, specifying the nature of the breach and the actions required to remedy the breach. Such notices are a common initial step in resolving contractual disputes and ensuring that the parties uphold their agreed-upon obligations.
Given the content of the Document, does it relate to the Query? Yes

Example 4:
Query: Dr. Smith conducted a research study to investigate the effects of a new drug on patients with hypertension. The study involved administering the drug to a group of 200 participants over a period of six months. The results showed a significant reduction in blood pressure among the participants who received the drug compared to those who received a placebo. Dr. Smith concluded that the new drug could be an effective treatment for hypertension and submitted the findings to a medical journal for publication.
Document: When conducting a research study involving human participants, it is essential to adhere to ethical guidelines and obtain informed consent from the participants. Researchers should ensure that the study's design and methods are sound, the data collected is accurate, and the statistical analyses used are appropriate. The findings of the study should be presented objectively, along with any limitations or potential biases. Submission of research findings to a peer-reviewed journal allows for independent evaluation and verification of the study's methodology and conclusions before publication.
Given the content of the Document, does it relate to the Query? Yes

Example 5:
Query: The novel "Pinebrook Secrets" follows the journey of a young detective as she uncovers a series of mysterious events in a small town. As she delves deeper into the case, she realizes that the townspeople are hiding dark secrets about a decades-old unsolved murder.
Document: The migration patterns of birds in North America have been extensively studied by ornithologists over the past century. These studies have revealed fascinating insights into the timing, routes, and behaviors of various bird species during their migratory journeys.
Given the content of the Document, does it relate to the Query? No

Example 6:
Query: In the context of climate change, renewable energy sources like solar, wind, and hydropower have gained significant attention due to their low environmental impact and potential to reduce greenhouse gas emissions.
Document: The history of ancient civilizations in Mesopotamia showcases the development of early writing systems and the transition from hunter-gatherer societies to settled agricultural communities along the Tigris and Euphrates rivers.
Given the content of the Document, does it relate to the Query? No

Example 7:
Query: The process of photosynthesis involves the conversion of light energy into chemical energy by plants, which is used to synthesize glucose from carbon dioxide and water.
Document: The principles of supply and demand play a crucial role in determining market prices for goods and services. When demand exceeds supply, prices tend to rise, whereas oversupply can lead to price decreases.
Given the content of the Document, does it relate to the Query? No

Example 8:
Query: In George Orwell's novel "1984," the concept of "Newspeak" is introduced. It's a controlled language created by the totalitarian regime to limit freedom of thought and concepts that pose a threat to the regime.
Document: The process of mitosis is a fundamental aspect of cell division in eukaryotic cells. During mitosis, a single cell divides into two identical daughter cells, each with the same number of chromosomes as the parent cell.
Given the content of the Document, does it relate to the Query? No

Example 9:
Query: The theory of relativity, proposed by Albert Einstein, consists of two major formulations: the special theory of relativity and the general theory of relativity. These theories revolutionized our understanding of space, time, and gravity.
Document: The traditional art of origami originated in Japan and involves the intricate folding of paper to create various forms, including animals, plants, and geometric shapes.
Given the content of the Document, does it relate to the Query? No

Example 10:
Query: The Lewis and Clark Expedition, also known as the Corps of Discovery Expedition, was the first American overland expedition to the Pacific Coast and back. It was commissioned by President Thomas Jefferson to explore the newly acquired western territory after the Louisiana Purchase.
Document: The process of volcanic eruption is driven by the movement of magma from the Earth's mantle to the surface. As magma rises, it can lead to the formation of various volcanic landforms, such as shield volcanoes, stratovolcanoes, and calderas.
Given the content of the Document, does it relate to the Query? No

Example 11:
Query: The concept of cognitive dissonance, introduced by psychologist Leon Festinger in 1957, refers to the mental discomfort or tension that arises when an individual holds conflicting beliefs, attitudes, or values.
Document: Cognitive dissonance theory explains how individuals strive for consistency in their thoughts and actions. When faced with conflicting beliefs, they may adjust their attitudes or behaviors to alleviate the discomfort and achieve a sense of internal harmony.
Given the content of the Document, does it relate to the Query? Yes

Example 12:
Query: The Industrial Revolution, which began in the late 18th century, marked a significant turning point in history with the transition from agrarian economies to industrialized societies. This period saw advancements in manufacturing, transportation, and technology.
Document: During the Industrial Revolution, urbanization and the growth of factory-based industries led to profound social and economic changes. The shift from hand production to mechanized manufacturing also had a profound impact on labor patterns and living conditions.
Given the content of the Document, does it relate to the Query? Yes

Example 13:
Query: The novel "To Kill a Mockingbird" by Harper Lee addresses themes of racial injustice and moral growth through the character of Atticus Finch, a lawyer who defends an innocent Black man accused of raping a white woman.
Document: "To Kill a Mockingbird" is set in the fictional town of Maycomb, Alabama, during the 1930s. The narrative explores the impact of deeply ingrained racial prejudices on the legal system and society, as seen through the trial of Tom Robinson.
Given the content of the Document, does it relate to the Query? Yes

Example 14:
Query: Plate tectonics is the scientific theory that explains the movement of Earth's lithospheric plates, which float on the semi-fluid asthenosphere beneath them, leading to phenomena like earthquakes, volcanic activity, and the formation of mountain ranges.
Document: The Earth's lithosphere is divided into several major tectonic plates, including the Pacific Plate, North American Plate, and Eurasian Plate. The movement of these plates is driven by convection currents in the mantle, resulting in various geological features and events.
Given the content of the Document, does it relate to the Query? Yes

Example 15:
Query: In William Shakespeare's play "Romeo and Juliet," the two titular characters belong to feuding families in Verona. Despite the enmity between their households, Romeo and Juliet fall deeply in love and ultimately meet a tragic end.
Document: "Romeo and Juliet" is a timeless tragedy that explores themes of love, fate, and conflict. The play's narrative unfolds against the backdrop of a longstanding feud between the Montague and Capulet families, highlighting the destructive power of unresolved animosity.
Given the content of the Document, does it relate to the Query? Yes

Given the content of the statute, does it relate to the mentioned Legal Situation?
{}
  """
  example_3 = """Query: {}
  Document: {}""".format(query, document)
  tokens = tokenizer(template.format(example_3), return_tensors="pt").input_ids
  score = get_score(tokens)
  return score

In [70]:
line_format = ""
lines=[]
for qid in list(test_samples.keys())[0:1]:
  query_content = queries[qid]
  for did in test_samples[qid]['docs_ids']: #limit it to one query
    print('did: ', did)
    document_content = corpus[did]
    score = get_score_given_q_and_d(document_content, query_content)
    # print("predicted score  given\nquery:{}\ncandidate document:{}\nscore:{}\n------\n".format(query_content, document_content, score))
    # print("{}: {}". format(qid, query_content))
    # print("{}: {}". format(did, document_content))
    line = "{query_id} Q0 {document_id} {rank} {score} STANDARD\n".format(query_id=qid,
                                                                          document_id=did,
                                                                          rank="-10",#rank
                                                                          score=str(score))
    lines.append(line)

did:  S104
did:  S146
did:  S43
did:  S115
did:  S172
did:  S169
did:  S29
did:  S98
did:  S156
did:  S56
did:  S161
did:  S167
did:  S57
did:  S67
did:  S42
did:  S144
did:  S134
did:  S149
did:  S170
did:  S28
did:  S179
did:  S109
did:  S139
did:  S22
did:  S143
did:  S164
did:  S27
did:  S31
did:  S69
did:  S35
did:  S128
did:  S82
did:  S163
did:  S87
did:  S147
did:  S159
did:  S186
did:  S157
did:  S94
did:  S102
did:  S65
did:  S83
did:  S193
did:  S198
did:  S93
did:  S116
did:  S81
did:  S137
did:  S150
did:  S13
did:  S97
did:  S145
did:  S55
did:  S33
did:  S140
did:  S89
did:  S14
did:  S177
did:  S191
did:  S71
did:  S196
did:  S126
did:  S199
did:  S44
did:  S39
did:  S1
did:  S189
did:  S165
did:  S120
did:  S158
did:  S38
did:  S17
did:  S135
did:  S48
did:  S99
did:  S62
did:  S111
did:  S77
did:  S151
did:  S90
did:  S192
did:  S26
did:  S168
did:  S95
did:  S166
did:  S47
did:  S46
did:  S12
did:  S113
did:  S54
did:  S152
did:  S181
did:  S194
did:  S176
did:  S10


In [51]:
!pip install pytrec_eval
import pytrec_eval

Collecting pytrec_eval
  Downloading pytrec_eval-0.5.tar.gz (15 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: pytrec_eval
  Building wheel for pytrec_eval (setup.py) ... [?25l[?25hdone
  Created wheel for pytrec_eval: filename=pytrec_eval-0.5-cp310-cp310-linux_x86_64.whl size=308205 sha256=38e5d4538348a38763012276a6b9628779c7f0fbe87a469e85f5d618e496dbe2
  Stored in directory: /root/.cache/pip/wheels/51/3a/cd/dcc1ddfc763987d5cb237165d8ac249aa98a23ab90f67317a8
Successfully built pytrec_eval
Installing collected packages: pytrec_eval
Successfully installed pytrec_eval-0.5


In [52]:
with open("qrels_aila.tsv", 'r') as f_qrel:
    qrel = pytrec_eval.parse_qrel(f_qrel)

## evaluation on prompt1 (original prompt introduced before the tutorial

In [60]:
# import json

# output_file = "prompt_1.ranking"
# f_w = open(output_file, "w+")
# f_w.write("".join(lines))
# f_w.close()

# with open(output_file, 'r') as f_run:
#   run = pytrec_eval.parse_run(f_run)

# all_metrics = {"ndcg_cut.10", "map_cut.1000", "recall.10"}
# evaluator = pytrec_eval.RelevanceEvaluator(qrel,all_metrics)
# scores = evaluator.evaluate(run)
# print(json.dumps(scores, indent=4))

# print("scores: ", scores)
# mean_metrics = {}
# metrics_string = ""
# import numpy as np
# for metric in all_metrics:
#     mean_metrics[metric] = np.mean([ele[metric.replace(".","_")] for ele in scores.values()])
#     metrics_string = metrics_string +  "{}: {} | ".format(metric, mean_metrics[metric])
# print("metrics eval: ", metrics_string)

{
    "AILA_Q41": {
        "recall_10": 0.0,
        "ndcg_cut_10": 0.0,
        "map_cut_1000": 0.01832512315270936
    },
    "AILA_Q42": {
        "recall_10": 0.0,
        "ndcg_cut_10": 0.0,
        "map_cut_1000": 0.01818181818181818
    },
    "AILA_Q43": {
        "recall_10": 0.0,
        "ndcg_cut_10": 0.0,
        "map_cut_1000": 0.020122804025243052
    },
    "AILA_Q44": {
        "recall_10": 0.0,
        "ndcg_cut_10": 0.0,
        "map_cut_1000": 0.020525451559934318
    }
}
scores:  {'AILA_Q41': {'recall_10': 0.0, 'ndcg_cut_10': 0.0, 'map_cut_1000': 0.01832512315270936}, 'AILA_Q42': {'recall_10': 0.0, 'ndcg_cut_10': 0.0, 'map_cut_1000': 0.01818181818181818}, 'AILA_Q43': {'recall_10': 0.0, 'ndcg_cut_10': 0.0, 'map_cut_1000': 0.020122804025243052}, 'AILA_Q44': {'recall_10': 0.0, 'ndcg_cut_10': 0.0, 'map_cut_1000': 0.020525451559934318}}
metrics eval:  ndcg_cut.10: 0.0 | recall.10: 0.0 | map_cut.1000: 0.01928879922992623 | 


## evaluation on second prompt, suggested by audience

In [68]:
import json

output_file = "prompt_2.ranking"
f_w = open(output_file, "w+")
f_w.write("".join(lines))
f_w.close()

with open(output_file, 'r') as f_run:
  run = pytrec_eval.parse_run(f_run)

all_metrics = {"ndcg_cut.10", "map_cut.1000", "recall.10"}
evaluator = pytrec_eval.RelevanceEvaluator(qrel,all_metrics)
scores = evaluator.evaluate(run)
print(json.dumps(scores, indent=4))

{
    "AILA_Q41": {
        "recall_10": 0.2,
        "ndcg_cut_10": 0.11305340175787204,
        "map_cut_1000": 0.0369047619047619
    }
}


## evaluation on third prompt, suggested by audience

In [71]:
import json

output_file = "prompt_3.ranking"
f_w = open(output_file, "w+")
f_w.write("".join(lines))
f_w.close()

with open(output_file, 'r') as f_run:
  run = pytrec_eval.parse_run(f_run)

all_metrics = {"ndcg_cut.10", "map_cut.1000", "recall.10"}
evaluator = pytrec_eval.RelevanceEvaluator(qrel,all_metrics)
scores = evaluator.evaluate(run)
print(json.dumps(scores, indent=4))

{
    "AILA_Q41": {
        "recall_10": 0.2,
        "ndcg_cut_10": 0.13120507751234178,
        "map_cut_1000": 0.04784313725490196
    }
}
