In [1]:
import google.generativeai as genai
import os
import pandas as pd
from tqdm import tqdm
import time

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
genai.configure(api_key=os.environ['API_KEY'])
model = genai.GenerativeModel('gemini-1.5-flash')

In [4]:
summaries = []
for i, file in enumerate(os.listdir('eval_data')):
    with open(f'eval_data/{file}', 'r', errors='ignore') as f:
        text = f.read()

        summaries.append(text)
        print(f'Processed {i+1}/{len(os.listdir("eval_data"))}', end='\r')

Processed 51/51

In [5]:
def generate_legal_queries(document, num=5):
    queries = []
    for _ in range(num):
        response = model.generate_content(
            f"Generate a realisitic one line legal research query based on the following summarized case file. Only generate a one line query nothing else. Also the query should not directly reference the case file, the query should only be something that could be answered using the below case file as a reference but the query should not directly refer the below case file, the query should be a general legal question only that can be answered by referring to this case file .\nCaseFile:\n\n{document}",
        )
        queries.append(response.text)
    return queries

In [6]:
case_query_pairs = {}
for summary in tqdm(summaries, desc='Generating queries for all cases'):
    queries = generate_legal_queries(summary)
    case_query_pairs[summary] = queries
    time.sleep(20)

Generating queries for all cases: 100%|██████████| 51/51 [20:57<00:00, 24.65s/it]


In [9]:
df = pd.DataFrame(case_query_pairs.items(), columns=['Case', 'Queries'])

In [10]:
df.head()

Unnamed: 0,Case,Queries
0,## 1. Case Title\n\n* **Case Name:** Arcelor M...,[Does a court retain jurisdiction to hear a Se...
1,## Rubinetterie Bresciane Bonomi SpA vs. M/S....,[Is a right of appeal a vested right that cann...
2,���Bud1����������������������������������...,[What are the legal requirements for establish...
3,## 1. Case Title\n\n* **Case Name:** M/s.Indus...,[What are the legal requirements for granting ...
4,## M/S Reva Electric Car Co. P. Ltd. vs. M/S ...,[Can an arbitration clause in a terminated Mem...


In [12]:
df.to_csv('eval_data.csv', index=False, escapechar='\\')


In [13]:
import pickle
with open('eval_data.pkl', 'wb') as f:
    pickle.dump(case_query_pairs, f)

In [None]:
# load the pickled data
with open('eval_data.pkl', 'rb') as f:
    data = pickle.load(f)