# Preprocessing
## Imports

In [42]:
import networkx as nx
from langchain_core.documents import Document
import pandas as pd
import json

## Functions

In [3]:
def load_data(path: str) -> list:
    with open(path, 'r') as js:
        data = json.load(js)
    return data

In [63]:
def create_doc(row: pd.Series) -> Document:
    doc = Document(page_content=row["text"], id=row["title"], metadata=row.drop(labels=["text", "title", "references"]).to_dict())
    return doc

## Main

### EPC

In [50]:
path = "../../resources/extracted_data/EPC_data.json"
articles = load_data(path)
articles_df = pd.DataFrame(articles)
articles_df

Unnamed: 0,book,part,chapter,title,text,references
0,EPC,Part I - General and institutional provisions,Chapter I - General provisions,Article 1 - European law for the grant of patents,"A system of law, common to the Contracting Sta...",
1,EPC,Part I - General and institutional provisions,Chapter I - General provisions,Article 2 - European patent,(1) Patents granted under this Convention shal...,"[Article 63 Term of the European patent, Artic..."
2,EPC,Part I - General and institutional provisions,Chapter I - General provisions,Article 3 - Territorial effect,The grant of a European patent may be requeste...,"[Article 79 Designation of Contracting States,..."
3,EPC,Part I - General and institutional provisions,Chapter I - General provisions,Article 4 - 3 - European Patent Organisation,"(1) A European Patent Organisation, hereinafte...",[Article 4a Conference of ministers of the Con...
4,EPC,Part I - General and institutional provisions,Chapter I - General provisions,Article 4a - 4 - Conference of ministers of th...,A conference of ministers of the Contracting S...,[Article 4 European Patent Organisation]
...,...,...,...,...,...,...
163,EPC,Part XII - Final provisions,Art. 167 - (deleted),Article 174 - Denunciation,Any Contracting State may at any time denounce...,"[Article 175 Preservation of acquired rights, ..."
164,EPC,Part XII - Final provisions,Art. 167 - (deleted),Article 175 - Preservation of acquired rights,(1) In the event of a State ceasing to be part...,
165,EPC,Part XII - Final provisions,Art. 167 - (deleted),Article 176 - Financial rights and obligations...,(1) Any State which has ceased to be a party t...,
166,EPC,Part XII - Final provisions,Art. 167 - (deleted),Article 177 - Languages of the Convention,"(1) This Convention, drawn up in a single orig...",


In [15]:
articles_df.loc[articles_df["implementation"] == {}]
#ces guidelines deviennent des articles puisqu'ils n'implémentent aucune règle?

Unnamed: 0,book,part,chapter,title,implementation
0,EPO_guidelines,Part A – Guidelines for Formalities Examination,Chapter I – Introduction,Chapter I – Introduction,{}
1,EPO_guidelines,Part A – Guidelines for Formalities Examination,Chapter I – Introduction,1. Overview,{}
3,EPO_guidelines,Part A – Guidelines for Formalities Examination,Chapter I – Introduction,3. Purpose of - Part A -,{}
4,EPO_guidelines,Part A – Guidelines for Formalities Examination,Chapter I – Introduction,4. Other parts relating to formalities,{}
5,EPO_guidelines,Part A – Guidelines for Formalities Examination,Chapter II – Filing of applications and examin...,Chapter II – Filing of applications and examin...,{}
...,...,...,...,...,...
1779,EPO_guidelines,Part H – Amendments and Corrections,Chapter VI – Correction of errors,3.2 Allowability of the correction of bibliogr...,{}
1780,EPO_guidelines,Part H – Amendments and Corrections,Chapter VI – Correction of errors,3.3 Correction of the decision to grant while ...,{}
1781,EPO_guidelines,Part H – Amendments and Corrections,Chapter VI – Correction of errors,4. Correction of formatting/editing errors,{}
1782,EPO_guidelines,Part H – Amendments and Corrections,Chapter VI – Correction of errors,5. Correction of the translations of the claims,{}


In [25]:
from langchain_chroma import Chroma
from langchain_ollama import OllamaEmbeddings
from langchain_core.documents import Document

d = Document("lolilol", id="x", metadata={"type": "guideline"})

local_embeddings = OllamaEmbeddings(model="all-minilm")

vectorstore = Chroma.from_texts(articles_df["text"].to_list(), embedding=local_embeddings)

""" config_path = "../../config/retriever_config.yaml"
retriever = Retriever(config_path, articles_df) """

' config_path = "../../config/retriever_config.yaml"\nretriever = Retriever(config_path, articles_df) '

In [22]:
query = "How is decided whether an idea is novel or not?"
docs = vectorstore.similarity_search(query)

In [23]:
docs

[Document(id='03fbd4e8-dfe2-4ff0-acbf-e5b403318d27', metadata={}, page_content='An invention shall be considered as involving an inventive step if, having regard to the state of the art, it is not obvious to a person skilled in the art. If the state of the art also includes documents within the meaning of Article\xa054, paragraph\xa03, these documents shall not be considered in deciding whether there has been an inventive step.\n\n\n46See decisions/opinions of the Enlarged Board of Appeal G\xa02/98, G\xa03/98, G\xa02/99, G\xa01/03, G\xa02/03, G\xa01/16 (Annex\xa0I).\n'),
 Document(id='3277f3cb-7377-4006-b9ae-67990689efbd', metadata={}, page_content='(1) The decisions of the European Patent Office may only be based on grounds or evidence on which the parties concerned have had an opportunity to present their comments.\xa0\n(2) The European Patent Office shall examine, and decide upon, the European patent application or the European patent only in the text submitted to it, or agreed, by 

In [28]:
g = nx.MultiDiGraph()
d = Document("lolilol", id="x", metadata={"type": "guideline"})
g.add_node(d.id, data=d)

In [40]:
print(list(g.nodes()))

[('x', {'data': Document(id='x', metadata={'type': 'guideline'}, page_content='lolilol')})]


In [68]:
docs = []
for _, row in articles_df.iterrows():
    doc = create_doc(row)
    g.add_node(doc.id, data=doc)

for _, row in articles_df.iterrows():
    node1_id = row["title"]
    references = row["references"]
    for ref in references:
        print(node1_id, " -> ", ref)

Article 2 - European patent  ->  Article 63 Term of the European patent
Article 2 - European patent  ->  Article 65 Translation of the European patent
Article 2 - European patent  ->  Article 68 Effect of revocation or limitation of the European patent
Article 2 - European patent  ->  Article 69 Extent of protection
Article 2 - European patent  ->  Article 70 Authentic text of a European patent application or European patent
Article 2 - European patent  ->  Article 99 Opposition
Article 2 - European patent  ->  Article 105c Publication of the amended specification of the European patent
Article 2 - European patent  ->  Article 142 Unitary patents
Article 2 - European patent  ->  Rule 75 Surrender or lapse of the patent
Article 2 - European patent  ->  Rule 85 Transfer of the European patent
Article 2 - European patent  ->  Rule 89 Intervention of the assumed infringer
Article 2 - European patent  ->  Rule 90 Subject of proceedings
Article 2 - European patent  ->  Rule 96 Content and fo

In [65]:
docs

[Document(id='Article\xa01 - European law for the grant of patents', metadata={'book': 'EPC', 'part': 'Part I - General and institutional provisions', 'chapter': 'Chapter I - General provisions'}, page_content='A system of law, common to the Contracting States, 2 for the grant of patents for invention is established by this Convention.\n\n\n2There are currently 3839 Contracting States: AL, AT, BE, BG, CH, CY, CZ, DE, DK, EE, ES, FI, FR, GB, GR, HR, HU, IE, IS, IT, LI, LT, LU, LV, MC, ME, MK, MT, NL, NO, PL, PT, RO, RS, SE, SI, SK, SM, TR.\n'),
 Document(id='Article\xa02 - European patent', metadata={'book': 'EPC', 'part': 'Part I - General and institutional provisions', 'chapter': 'Chapter I - General provisions'}, page_content='(1) Patents granted under this Convention shall be called European patents.\xa0\n(2) The European patent shall, in each of the Contracting States for which it is granted, have the effect of and be subject to the same conditions as a national patent granted by t