# Chains

> Chains-based functions for NER.

In [None]:
# | default_exp ner.chains


In [None]:
# | export

from langchain_ray.imports import *
from langchain_ray.chains import *
from langchain_ray.utils import *
from langchain_ray.pdf.utils import *
from langchain_ray.pdf.chains import *
from langchain_ray.ner.utils import *


In [None]:
#| hide

%load_ext autoreload
%autoreload 2
%reload_ext autoreload

In [None]:
# | export


def add_ners_to_docs_chain(
    e_ner,
    j_ner,
    input_variables=["docs"],
    output_variables=["ner_docs"],
    verbose=False,
):
    "Chain that adds the NERs to a list of `Documents` usung `e_ner` and `j_ner`."
    return transform_chain(
        add_ners_to_docs,
        transform_kwargs=dict(e_ner=e_ner, j_ner=j_ner),
        input_variables=input_variables,
        output_variables=output_variables,
        vars_kwargs_mapping={input_variables[0]: "docs"},
        verbose=verbose,
    )

In [None]:
# | eval: false

device = default_device()
verbose = True
cats_model = SetFitModel.from_pretrained("HamzaFarhan/PDFSegs").to(device)
cats_folder = "/media/hamza/data2/faiss_data/saved_cats"
if os.path.exists(cats_folder):
    shutil.rmtree(cats_folder)


In [None]:
# | eval: false

chain1 = pdf_to_docs_chain(
    input_variables=["path"], output_variables=["docs"], verbose=verbose
)
chain2 = add_cats_to_docs_chain(
    cats_model=cats_model,
    input_variables=["docs"],
    output_variables=["cat_docs"],
    verbose=verbose,
)
chain3 = add_ners_to_docs_chain(
    e_ner=load_edu_model(device=device),
    j_ner=load_job_model(device=device),
    input_variables=["cat_docs"],
    output_variables=["ner_docs"],
    verbose=verbose,
)
chain4 = docs_to_json_chain(
    json_folder=cats_folder,
    indent=4,
    input_variables=["ner_docs"],
    output_variables=["json_docs"],
    verbose=verbose,
)
chain = SequentialChain(
    chains=[chain1, chain2, chain3, chain4],
    input_variables=["path"],
    output_variables=["json_docs"],
    verbose=verbose,
)


In [None]:
# | eval: false

pdf = "../../resumes_5/"
res = chain(dict(path=pdf))




[1m> Entering new  chain...[0m


[1m> Entering new  chain...[0m

[1m> Finished chain.[0m


[1m> Entering new  chain...[0m


Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.



[1m> Finished chain.[0m


[1m> Entering new  chain...[0m

[1m> Finished chain.[0m


[1m> Entering new  chain...[0m

[1m> Finished chain.[0m

[1m> Finished chain.[0m


In [None]:
# | eval: false

doc = res["json_docs"][0][-1]
print(doc.metadata)
print(doc.page_content)


{'source': '../../resumes_5/0cf20170-8051-41ba-9060-1a82d43f4289.pdf', 'page': 0, 'start_index': 3474, 'category': 'Education', 'ner': {'institute': 'university of mumbai', 'date': '2008 - 2011'}}
bachelor of commerce (b. com) - university of mumbai 2008 - 2011


In [None]:
# | hide
import nbdev

nbdev.nbdev_export()
